In [2]:
%matplotlib inline
import os
import sys
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import connectome
project_root = connectome.setup_notebook(use_project_root_as_cwd=True)

sys.path.insert(0, str(project_root))

from utils.model_inspection_funcs import (
    neuron_data_from_image,
    sample_images,
)
from utils.model_inspection_utils import propagate_data_with_steps

cmap = plt.cm.binary

device = torch.device("cpu")
dtype = torch.float32

Changed working directory to /home/eudald/Desktop/doctorat/connectome
Project root: /home/eudald/Desktop/doctorat/connectome


In [3]:
temp = pd.read_csv(
    os.path.join(project_root, "new_data", "connections_random3.csv"),
    dtype={
        "pre_root_id": "string",
        "post_root_id": "string",
        "syn_count": np.int32,
    },
    index_col=0,
)

grouped = temp.groupby(["pre_root_id", "post_root_id"]).sum("syn_count").reset_index()

connections = grouped.sort_values(["pre_root_id", "post_root_id"])

# Nuerons activated after 4 passes depending on connection matrix

## Real connectome

In [5]:
neuron_data = pd.read_csv(
    os.path.join(
        project_root, "new_data", "right_visual_positions_selected_neurons.csv"
    ),
    dtype={"root_id": "string"},
).drop(columns=["x", "y", "z", "PC1", "PC2"])

num_passes = 4
base_dir = os.path.join(project_root, "images", "one_to_ten", "train")
sub_dirs = ["yellow", "blue"]

sampled_images = sample_images(base_dir, sub_dirs, 1)
img = sampled_images[0]
activated_data = neuron_data_from_image(img, neuron_data)

propagation = (
    activated_data[["root_id", "activation"]]
    .fillna(0)
    .rename(columns={"activation": "input"})
)
activation = activated_data[["root_id", "activation"]]
connections["weight"] = 1

for i in range(num_passes):
    activation = propagate_data_with_steps(activation.copy(), connections, i)
    propagation = propagation.merge(activation, on="root_id", how="left").fillna(0)

# find percentage of non-zero values in activation_4
propagation["activation_4"].astype(bool).sum() / len(propagation)

In [6]:
# find percentage of non-zero values in activation_4
propagation["activation_4"].astype(bool).sum() / len(propagation)

0.5658108547286318

## Completely random connections

In [18]:
# reshuffle column post_rood_id of the dataframe connections
shuffled_connections = connections.copy()
shuffled_connections["post_root_id"] = np.random.permutation(
    connections["post_root_id"]
)
propagation = (
    activated_data[["root_id", "activation"]]
    .fillna(0)
    .rename(columns={"activation": "input"})
)
for i in range(num_passes):
    activation = propagate_data_with_steps(activation.copy(), shuffled_connections, i)
    propagation = propagation.merge(activation, on="root_id", how="left").fillna(0)

# find percentage of non-zero values in activation_4
propagation["activation_4"].astype(bool).sum() / len(propagation)

0.7491062723431914

## Random connections with reduced synaptic count

In [None]:
# reshuffle column post_rood_id of the dataframe connections
random_equalized_connections = pd.read_csv(os.path.join(project_root, "new_data", "connections_random.csv"))
propagation = (
    activated_data[["root_id", "activation"]]
    .fillna(0)
    .rename(columns={"activation": "input"})
)
for i in range(num_passes):
    activation = propagate_data_with_steps(activation.copy(), shuffled_connections, i)
    propagation = propagation.merge(activation, on="root_id", how="left").fillna(0)

# find percentage of non-zero values in activation_4
propagation["activation_4"].astype(bool).sum() / len(propagation)

0.7491062723431914

## Random connections within distance bins

In [10]:
# reshuffle column post_rood_id of the dataframe connections
shuffled_connections = pd.read_csv(
    os.path.join(project_root, "new_data", "connections_random3.csv"),
    dtype={
        "pre_root_id": "string",
        "post_root_id": "string",
        "syn_count": np.int32,
        },
)
shuffled_connections["weight"] = 1
propagation = (
    activated_data[["root_id", "activation"]]
    .fillna(0)
    .rename(columns={"activation": "input"})
)
for i in range(num_passes):
    activation = propagate_data_with_steps(activation.copy(), shuffled_connections, i)
    propagation = propagation.merge(activation, on="root_id", how="left").fillna(0)

# find percentage of non-zero values in activation_4
propagation["activation_4"].astype(bool).sum() / len(propagation)

0.5680857978550536

In [None]:
# horrible data stuff
checkpoint = torch.load("models/model_2024-05-20 03:41:43.pth", map_location="cpu")
model.load_state_dict(checkpoint["model"])
model.eval()
connections = (
    pd.read_csv(
        "adult_data/connections.csv",
        dtype={
            "pre_root_id": "string",
            "post_root_id": "string",
            "syn_count": np.int32,
        },
    )
    .groupby(["pre_root_id", "post_root_id"])
    .sum("syn_count")
    .reset_index()
)

connections["weight"] = model.connectome.edge_weight_multiplier.detach()
right_root_ids = data_processor.right_root_ids
all_neurons = (
    pd.read_csv("adult_data/classification_clean.csv")
    .merge(right_root_ids, on="root_id")
    .fillna("Unknown")
)
neuron_data = pd.read_csv(
    "adult_data/right_visual_positions_selected_neurons.csv",
    dtype={"root_id": "string"},
).drop(columns=["x", "y", "z", "PC1", "PC2"])
data_cols = ["x_axis", "y_axis"]
all_coords = pd.read_csv("adult_data/all_coords_clean.csv", dtype={"root_id": "string"})
rational_cell_types = pd.read_csv("adult_data/rational_cell_types.csv")
all_neurons["decision_making"] = np.where(
    all_neurons["cell_type"].isin(rational_cell_types["cell_type"].values.tolist()),
    1,
    0,
)
all_neurons["root_id"] = all_neurons["root_id"].astype("string")