Calculate inprop for right hemisphere neurons on a signle neuron level. Optic lobe neurons are not included. 

In [1]:
import pandas as pd
import numpy as np
import sys
import matplotlib.pyplot as plt
import time
from scipy.sparse import csc_matrix, coo_matrix, diags
import scipy as sp
from tqdm import tqdm

In [2]:
import seaserpent as ss

# reading data

In [3]:
info = pd.read_csv('https://raw.githubusercontent.com/flyconnectome/flywire_annotations/main/supplemental_files/Supplemental_file1_neuron_annotations.tsv',
                   delimiter = '\t')
info.columns

  exec(code_obj, self.user_global_ns, self.user_ns)


Index(['supervoxel_id', 'root_id', 'pos_x', 'pos_y', 'pos_z', 'soma_x',
       'soma_y', 'soma_z', 'nucleus_id', 'flow', 'super_class', 'cell_class',
       'cell_sub_class', 'cell_type', 'hemibrain_type', 'ito_lee_hemilineage',
       'hartenstein_hemilineage', 'morphology_group', 'top_nt', 'top_nt_conf',
       'side', 'nerve', 'vfb_id', 'fbbt_id', 'status'],
      dtype='object')

In [4]:
info.super_class.value_counts(dropna = False)

optic                 77536
central               32388
sensory               16903
visual_projection      8053
ascending              2362
descending             1303
visual_centrifugal      524
motor                   106
endocrine                80
Name: super_class, dtype: int64

In [5]:
# to make the matrix smaller, let's take visual projection neurons as visual input, and remove the optic lobe neurons
cb = info[~info.super_class.isin(['optic'])]
# filter out the visual sensory neurons, except for the ocellar ones
cb = cb[(cb.cell_class != 'visual') | (cb.cell_type == 'ocellar retinula cell')]
cb

Unnamed: 0,supervoxel_id,root_id,pos_x,pos_y,pos_z,soma_x,soma_y,soma_z,nucleus_id,flow,...,ito_lee_hemilineage,hartenstein_hemilineage,morphology_group,top_nt,top_nt_conf,side,nerve,vfb_id,fbbt_id,status
0,78112261444987077,720575940628857210,109306,50491,3960,104904.0,47464.0,5461.0,2453924.0,intrinsic,...,SMPpv2_ventral,CP1_ventral,,acetylcholine,0.917977,left,,fw138205,FBbt_20001935,
1,82475466912542440,720575940626838909,172029,55635,1592,177472.0,56936.0,1429.0,7393349.0,intrinsic,...,VLPl2_medial,BLAv2_medial,VLPl2_medial__0,acetylcholine,0.645442,right,,fw000001,,
2,83038623024880664,720575940626046919,180632,58664,1925,180632.0,58664.0,1925.0,7415038.0,intrinsic,...,,,,acetylcholine,0.844781,right,,fw000002,FBbt_20000538,
3,79801523353604463,720575940630311383,133800,56063,1847,180728.0,61008.0,1630.0,7415013.0,intrinsic,...,putative_primary,putative_primary,,acetylcholine,0.760387,right,,fw000003,FBbt_20000260,
4,83038554439606237,720575940633370649,180496,57448,2989,180496.0,57448.0,2989.0,7415848.0,intrinsic,...,,,,acetylcholine,0.895458,right,,fw000004,FBbt_00051248,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123502,79587599823629322,720575940618417409,130350,14001,3229,,,,,afferent,...,,,,,,left,OCN,fw123599,FBbt_00004242,
123503,79587599823608864,720575940628941553,130046,14747,3205,,,,,afferent,...,,,,serotonin,0.734219,left,OCN,fw123600,FBbt_00004242,
123504,79728680842360672,720575940612567258,132026,19096,3069,,,,,afferent,...,,,,serotonin,0.729409,center,OCN,fw123601,FBbt_00004242,
123505,79798912214529467,720575940630757750,133378,17839,3166,,,,,afferent,...,,,,serotonin,0.544816,center,OCN,fw123602,FBbt_00004242,


In [6]:
cb["combined_type"] = cb.cell_type
# fill the leftover nas with hemibrain_type
cb.combined_type.fillna(cb.hemibrain_type, inplace= True)
cb.combined_type.value_counts()

KCg-m       2190
KCab        1643
BM_InOm     1111
LC12         380
TmY14        370
            ... 
DNge149        1
OA-VUMa6       1
DNp72          1
CB1880         1
PLP104         1
Name: combined_type, Length: 7696, dtype: int64

In [7]:
# fill the still leftover nas with root ids
cb.combined_type.fillna(cb.root_id.astype(str), inplace= True)

In [9]:
# make dictionaries to map between columns 
typedict = dict(zip(cb.root_id, cb.combined_type))
sidedict = dict(zip(cb.root_id, cb.side))
type_super_class_dict = dict(zip(cb.combined_type, cb.super_class))
type_class_dict = dict(zip(cb.combined_type, cb.cell_class)) 
type_sub_class_dict = dict(zip(cb.combined_type, cb.cell_sub_class))

## read connectivity

In [10]:
import platform 

if platform.system() == 'Windows': 
    conn = pd.read_feather('C:/Users/44745/Downloads/syn_proof_analysis_filtered_consolidated_783.feather')
else: 
    conn = pd.read_feather('/Users/yijieyin/Downloads/syn_proof_analysis_filtered_consolidated_783.feather')

## get RHS

In [11]:
cb.side.value_counts(dropna = False) 

right     25264
left      25134
center      173
na           28
NaN           2
Name: side, dtype: int64

In [12]:
cb.root_id[cb.side.isna() | (cb.side == 'na')].values

array([720575940631953619, 720575940626411097, 720575940623787445,
       720575940622687880, 720575940619974193, 720575940629043817,
       720575940630331643, 720575940646039075, 720575940630508793,
       720575940624382727, 720575940624227509, 720575940640595840,
       720575940610611758, 720575940619388062, 720575940625368578,
       720575940615171084, 720575940615394463, 720575940621515190,
       720575940629540546, 720575940612137714, 720575940645559662,
       720575940621029615, 720575940627069256, 720575940612844202,
       720575940622091340, 720575940648455545, 720575940625162168,
       720575940628204008, 720575940621138779, 720575940627896634])

These are mostly ORNs. So we should include them. 

In [13]:
right = cb[cb.side != 'left']
right

Unnamed: 0,supervoxel_id,root_id,pos_x,pos_y,pos_z,soma_x,soma_y,soma_z,nucleus_id,flow,...,hartenstein_hemilineage,morphology_group,top_nt,top_nt_conf,side,nerve,vfb_id,fbbt_id,status,combined_type
1,82475466912542440,720575940626838909,172029,55635,1592,177472.0,56936.0,1429.0,7393349.0,intrinsic,...,BLAv2_medial,VLPl2_medial__0,acetylcholine,0.645442,right,,fw000001,,,CB0924
2,83038623024880664,720575940626046919,180632,58664,1925,180632.0,58664.0,1925.0,7415038.0,intrinsic,...,,,acetylcholine,0.844781,right,,fw000002,FBbt_20000538,,AVLP429
3,79801523353604463,720575940630311383,133800,56063,1847,180728.0,61008.0,1630.0,7415013.0,intrinsic,...,putative_primary,,acetylcholine,0.760387,right,,fw000003,FBbt_20000260,,AVLP151
4,83038554439606237,720575940633370649,180496,57448,2989,180496.0,57448.0,2989.0,7415848.0,intrinsic,...,,,acetylcholine,0.895458,right,,fw000004,FBbt_00051248,,LC27
5,83038760597920695,720575940638709326,180165,60936,2866,180608.0,59880.0,2461.0,7415851.0,intrinsic,...,,,glutamate,0.793691,right,,fw000005,,,LTe07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123496,80010155886161191,720575940637206670,136546,19358,3303,,,,,afferent,...,,,serotonin,0.671949,right,OCN,fw123593,FBbt_00004242,,ocellar retinula cell
123497,80010087166777823,720575940633421613,136575,18726,3353,,,,,afferent,...,,,serotonin,0.743321,right,OCN,fw123594,FBbt_00004242,,ocellar retinula cell
123504,79728680842360672,720575940612567258,132026,19096,3069,,,,,afferent,...,,,serotonin,0.729409,center,OCN,fw123601,FBbt_00004242,,ocellar retinula cell
123505,79798912214529467,720575940630757750,133378,17839,3166,,,,,afferent,...,,,serotonin,0.544816,center,OCN,fw123602,FBbt_00004242,,ocellar retinula cell


In [14]:
# filter for neurons on the right 
conn = conn[conn.pre_pt_root_id.isin(right.root_id) & conn.post_pt_root_id.isin(right.root_id)]
# sum across neuropils 
conn = conn.groupby(['pre_pt_root_id','post_pt_root_id'])['syn_count'].sum().reset_index()

## make coo

In [15]:
# instead of making a dense matrix based on the edgelist above, let's make a sparse one from the edgelist directly
# first make a coo matrix
nodes = set(right.root_id)
sorted_nodes = sorted(nodes)  # Convert the set to a sorted list
nodes_to_idx = {node:num for num, node in enumerate(sorted_nodes)}

# type to type connectivity
conn['pre_idx'] = conn.pre_pt_root_id.map(nodes_to_idx) 
conn['post_idx'] = conn.post_pt_root_id.map(nodes_to_idx)

# Create COO matrix
row = conn['pre_idx'].values
col = conn['post_idx'].values
data = conn['syn_count'].values
matrix_size = len(nodes)
coo = coo_matrix((data, (row, col)), shape=(matrix_size, matrix_size))

# then turn it into csc matrix
csc = coo.tocsc()

# calculate the size
csc_size = csc.data.nbytes  # Size of the data array
csc_size += csc.indices.nbytes  # Size of the indices array
csc_size += csc.indptr.nbytes  # Size of the index pointer array
# number of MB
csc_size/1e6

31.67386

In [16]:
csc.shape

(25467, 25467)

In [17]:
csc

<25467x25467 sparse matrix of type '<class 'numpy.int64'>'
	with 2630999 stored elements in Compressed Sparse Column format>

In [18]:
col_sums = csc.sum(axis=0)
# Handling division by zero in case some columns have a sum of zero
# that is, where a neuron doesn't have incoming synapses
# .A turns it from a sparse matrix into a dense np array
col_sums_with_inversion = np.reciprocal(col_sums.A.squeeze().astype(float), where=col_sums.A.squeeze() != 0)
# Multiply each column by the inverse of its sum
inprop = csc.multiply(col_sums_with_inversion)
# and then reduce the precision to float32 to save memory
inprop = inprop.astype(np.float32)

In [19]:
sp.sparse.save_npz('data/adult_inprop_right_neuron.npz', inprop)

In [20]:
# save to csv, one row is one neuron 
meta = pd.DataFrame(nodes_to_idx.items(), columns = ['root_id','idx'])
meta['cell_type'] = meta.root_id.map(typedict)
meta['super_class'] = meta.cell_type.map(type_super_class_dict)
meta['cell_class'] = meta.cell_type.map(type_class_dict)
meta['cell_sub_class'] = meta.cell_type.map(type_sub_class_dict)
# adult, right hemisphere, neuron level, meta information 
meta.to_csv('data/adult_right_neuron_meta.csv')