In [1]:
import numpy as np
import pandas as pd
import scipy.sparse as ss

In [2]:
df = pd.read_csv("adult_data/connections.csv")

In [3]:
df

Unnamed: 0,pre_root_id,post_root_id,neuropil,syn_count,nt_type
0,720575940629970489,720575940631267655,AVLP_R,7,GABA
1,720575940605876866,720575940606514878,LAL_R,15,GABA
2,720575940627737365,720575940628914436,AL_L,32,ACH
3,720575940633587552,720575940626452879,SMP_R,15,ACH
4,720575940616871878,720575940621203973,AVLP_L,13,GABA
...,...,...,...,...,...
3871462,720575940621773517,720575940611139733,PVLP_R,1,ACH
3871463,720575940622734576,720575940609171395,ICL_L,2,ACH
3871464,720575940638466367,720575940627036426,LO_L,5,ACH
3871465,720575940629789660,720575940631553603,GOR_R,3,ACH


In [48]:
# there are repeated connections, so we add them
df = df.groupby(["pre_root_id", "post_root_id"]).sum("syn_count").reset_index()

In [49]:
roots = set(df["pre_root_id"]).union(set(df["post_root_id"]))

root_id_to_index = {
    root_id: i for i, root_id in enumerate(roots)
}

In [50]:
# change pre and post root ids to indices
df["pre_root_id"] = df["pre_root_id"].map(root_id_to_index)
df["post_root_id"] = df["post_root_id"].map(root_id_to_index)

Unnamed: 0,pre_root_id,post_root_id,syn_count
count,2701601.0,2701601.0,2701601.0
mean,67046.76,67458.99,12.64836
std,38697.84,38776.33,16.98593
min,0.0,0.0,5.0
25%,33592.0,33784.0,6.0
50%,67040.0,67974.0,8.0
75%,100649.0,101172.0,13.0
max,134190.0,134190.0,2405.0


In [53]:
# convert to sparse matrix
matrix = ss.coo_matrix((df["syn_count"], (df["pre_root_id"], df["post_root_id"])))

In [58]:
ss.save_npz("adult_data/good_synaptic_matrix.npz", matrix)

Separate left and right sides

In [6]:
cl = pd.read_csv("adult_data/classification.csv")

In [11]:
left_ids = cl[cl["side"] == "left"]["root_id"].drop_duplicates()
right_ids = cl[cl["side"] == "right"]["root_id"].drop_duplicates()

In [14]:
left_df = df[df["pre_root_id"].isin(left_ids) & df["post_root_id"].isin(left_ids)]
right_df = df[df["pre_root_id"].isin(right_ids) & df["post_root_id"].isin(right_ids)]

In [18]:
left_matrix = ss.coo_matrix((left_df["syn_count"], (left_df["pre_root_id"], left_df["post_root_id"])))
right_matrix = ss.coo_matrix((right_df["syn_count"], (right_df["pre_root_id"], right_df["post_root_id"])))
# save left and right matrices
ss.save_npz("adult_data/left_synaptic_matrix.npz", left_matrix)
ss.save_npz("adult_data/right_synaptic_matrix.npz", right_matrix)