# load packages

In [1]:
from scipy.sparse import coo_matrix
import numpy as np
import scipy as sp
import pandas as pd

To set up access token, see [here](https://connectome-neuprint.github.io/neuprint-python/docs/quickstart.html).

In [2]:
from neuprint import Client, fetch_adjacencies,fetch_neurons, NeuronCriteria as NC

c = Client('neuprint.janelia.org', dataset='optic-lobe:v1.1')

# getting data

In [3]:
# connectivity between all neurons 
neuron_df, conn_df = fetch_adjacencies(NC(type='.*'), NC(type='.*'))

  0%|          | 0/270 [00:00<?, ?it/s]

# inspect

In [4]:
neuron_df

Unnamed: 0,bodyId,type,instance
0,10009,CT1,CT1_L
1,10012,VS,VS_R
2,10015,HSN,HSN_R
3,10016,HSE,HSE_R
4,10023,HSS,HSS_R
...,...,...,...
53742,876890618,R1-R6,R1-R6_R
53743,966201979,R1-R6,R1-R6_R
53744,967550825,R7d,R7d_R
53745,983968288,R8d,R8d_R


In [9]:
# is 'instance' column just 'type' + ending on '_L/R'? 
neuron_df.loc[:,'instance_noside'] = neuron_df['instance'].str.replace('_[LR]$','', regex=True)
(neuron_df.type == neuron_df.instance_noside).value_counts()

True    53747
Name: count, dtype: int64

In [11]:
# get ones that are not the same
neuron_df[neuron_df.type != neuron_df.instance_noside]

Unnamed: 0,bodyId,type,instance,instance_noside


In [12]:
# seems like we can just use the type column 
# and use the instance for side 
neuron_df.loc[:,'side'] = neuron_df['instance'].str[-1]
# replace 'L' with 'left' and 'R' with 'right'
neuron_df.loc[:,'side'] = neuron_df['side'].replace({'L':'left','R':'right'})
neuron_df

Unnamed: 0,bodyId,type,instance,instance_noside,side
0,10009,CT1,CT1_L,CT1,left
1,10012,VS,VS_R,VS,right
2,10015,HSN,HSN_R,HSN,right
3,10016,HSE,HSE_R,HSE,right
4,10023,HSS,HSS_R,HSS,right
...,...,...,...,...,...
53742,876890618,R1-R6,R1-R6_R,R1-R6,right
53743,966201979,R1-R6,R1-R6_R,R1-R6,right
53744,967550825,R7d,R7d_R,R7d,right
53745,983968288,R8d,R8d_R,R8d,right


In [13]:
neuron_df.type.value_counts()

type
R1-R6       2258
Tm3         1037
T3           976
T2a          939
L5           898
            ... 
MeVP53         1
LPi4b          1
aMe_TBD1       1
DNp11          1
PLP247         1
Name: count, Length: 883, dtype: int64

In [14]:
neuron_df[neuron_df.type.str.contains('unclear')].type.value_counts()

type
R8_unclear        321
R7_unclear        304
R7R8_unclear       80
LC10_unclear        7
ME_unclear          4
LO_unclear          3
LA_ME_unclear       3
MeTu4_unclear       2
MeVP6_unclear       2
LOP_unclear         2
LpMe_unclear        2
LOVP_unclear        2
LPC_unclear         2
LLPC_unclear        1
LPLC_unclear        1
LOP_LO_unclear      1
MeVC_unclear        1
Pm_unclear          1
TmY_unclear         1
LO_IB_unclear       1
Cm_unclear          1
Y_unclear           1
T4_unclear          1
ME_LOP_unclear      1
T5a_unclear         1
Tm_unclear          1
Name: count, dtype: int64

In [15]:
# replace the '_unclear' with ''
neuron_df.type = neuron_df.type.str.replace('_unclear', '')

In [16]:
conn_df

Unnamed: 0,bodyId_pre,bodyId_post,roi,weight
0,10009,10015,LO(R),7
1,10009,10015,NotPrimary,2
2,10009,10029,NotPrimary,3
3,10009,10029,LO(R),1
4,10009,10046,ME(R),1
...,...,...,...,...
6755786,1019617211,92214,ME(R),2
6755787,1019617211,99045,ME(R),3
6755788,1019617211,135920,ME(R),2
6755789,1019617211,138595,ME(R),2


In [17]:
conn_df.roi.unique()

array(['LO(R)', 'NotPrimary', 'ME(R)', 'LOP(R)', 'PLP(R)', 'AME(R)',
       'PVLP(R)', 'LA(R)'], dtype=object)

In [18]:
conn = conn_df.groupby(['bodyId_pre', 'bodyId_post']).weight.sum().reset_index()
conn

Unnamed: 0,bodyId_pre,bodyId_post,weight
0,10009,10015,9
1,10009,10029,4
2,10009,10046,1
3,10009,10053,7
4,10009,10072,14
...,...,...,...
6503304,1019617211,92214,2
6503305,1019617211,99045,3
6503306,1019617211,135920,2
6503307,1019617211,138595,2


In [19]:
# instead of making a dense matrix based on the edgelist above, let's make a sparse one from the edgelist directly
# first make a coo matrix
nodes = set(neuron_df.bodyId)
sorted_nodes = sorted(nodes)  # Convert the set to a sorted list
nodes_to_idx = {node: num for num, node in enumerate(sorted_nodes)}

# type to type connectivity
conn['pre_idx'] = conn.bodyId_pre.map(nodes_to_idx)
conn['post_idx'] = conn.bodyId_post.map(nodes_to_idx)

# Create COO matrix
row = conn['pre_idx'].values
col = conn['post_idx'].values
data = conn['weight'].values
matrix_size = len(nodes)
coo = coo_matrix((data, (row, col)), shape=(matrix_size, matrix_size))

# then turn it into csc matrix
csc = coo.tocsc()

# calculate the size
csc_size = csc.data.nbytes  # Size of the data array
csc_size += csc.indices.nbytes  # Size of the indices array
csc_size += csc.indptr.nbytes  # Size of the index pointer array
# number of MB
csc_size/1e6

78.2547

In [20]:
csc.shape

(53747, 53747)

In [21]:
col_sums = csc.sum(axis=0)
# Handling division by zero in case some columns have a sum of zero
# that is, where a neuron doesn't have incoming synapses
# .A turns it from a sparse matrix into a dense np array
col_sums_with_inversion = np.reciprocal(
    col_sums.A.squeeze().astype(float), where=col_sums.A.squeeze() != 0)
# Multiply each column by the inverse of its sum
inprop = csc.multiply(col_sums_with_inversion)
# and then reduce the precision to float32 to save memory
inprop = inprop.astype(np.float32)

In [22]:
sp.sparse.save_npz(
    'data/neuprint_inprop_optic.npz', inprop)

In [23]:
neuron_df

Unnamed: 0,bodyId,type,instance,instance_noside,side
0,10009,CT1,CT1_L,CT1,left
1,10012,VS,VS_R,VS,right
2,10015,HSN,HSN_R,HSN,right
3,10016,HSE,HSE_R,HSE,right
4,10023,HSS,HSS_R,HSS,right
...,...,...,...,...,...
53742,876890618,R1-R6,R1-R6_R,R1-R6,right
53743,966201979,R1-R6,R1-R6_R,R1-R6,right
53744,967550825,R7d,R7d_R,R7d,right
53745,983968288,R8d,R8d_R,R8d,right


# NT

In [24]:
meta, _ = fetch_neurons(neuron_df.bodyId)

In [25]:
meta

Unnamed: 0,bodyId,instance,type,pre,post,downstream,upstream,size,status,statusLabel,...,predictedNt,otherNt,celltypeTotalNtPredictions,assignedOlHex1,mancGroup,ntReference,consensusNt,exitNerve,inputRois,outputRois
0,10009,CT1_L,CT1,31713,82800,189641,82800,30164249462,Traced,Roughly traced,...,gaba,,31535,,,Takemura et al 2017,gaba,,"[LAL(R), LO(R), LOP(R), LOP_R_col_02_09, LOP_R...","[LAL(R), LO(R), LOP(R), LOP_R_col_05_19, LOP_R..."
1,10012,VS_R,VS,923,18867,4817,18867,18184684149,Traced,Roughly traced,...,unclear,,2756,,,Zhao et al 2023,acetylcholine,,"[GNG, IPS(R), LOP(R), LOP_R_col_08_02, LOP_R_c...","[GNG, IPS(R), LOP(R), LOP_R_col_12_03, LOP_R_c..."
2,10015,HSN_R,HSN,913,17265,4903,17265,19592174183,Traced,Roughly traced,...,unclear,,487,,,Zhao et al 2023,acetylcholine,,"[GNG, IPS(R), LO(R), LOP(R), LOP_R_col_16_20, ...","[GNG, IPS(R), LO(R), LOP(R), LOP_R_col_17_32, ..."
3,10016,HSE_R,HSE,515,18816,2645,18816,19390346866,Traced,Roughly traced,...,unclear,,35,,,Zhao et al 2023,acetylcholine,,"[GNG, IPS(R), LO(R), LOP(R), LOP_R_col_07_18, ...","[GNG, IPS(R), LOP(R), LOP_R_col_14_12, LOP_R_c..."
4,10023,HSS_R,HSS,886,18241,5382,18241,19387850164,Traced,Roughly traced,...,unclear,,205,,,Zhao et al 2023,acetylcholine,,"[GNG, IPS(R), LOP(R), LOP_R_col_03_08, LOP_R_c...","[GNG, IPS(R), LOP(R), LOP_R_col_05_12, LOP_R_c..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53742,876890618,R1-R6_R,R1-R6,1,0,3,0,65168,Anchor,Primary Anchor,...,unclear,,74921,,,Davis et al 2020,histamine,,[],"[LA(R), OL(R)]"
53743,966201979,R1-R6_R,R1-R6,21,14,91,14,48883331,Orphan,Orphan hotknife,...,unclear,,74921,,,Davis et al 2020,histamine,,"[LA(R), OL(R)]","[LA(R), OL(R)]"
53744,967550825,R7d_R,R7d,40,89,306,89,86894628,Traced,Prelim Roughly traced,...,unclear,,2555,,,,histamine,,"[ME(R), ME_R_col_30_18, ME_R_col_31_17, ME_R_c...","[ME(R), ME_R_col_31_17, ME_R_col_32_18, ME_R_l..."
53745,983968288,R8d_R,R8d,28,88,141,88,127104018,Traced,Prelim Roughly traced,...,unclear,acetylcholine,2407,,,,histamine,,"[LA(R), ME(R), ME_R_col_35_35, ME_R_layer_01, ...","[ME(R), ME_R_col_35_35, ME_R_layer_01, ME_R_la..."


In [26]:
meta.columns

Index(['bodyId', 'instance', 'type', 'pre', 'post', 'downstream', 'upstream',
       'size', 'status', 'statusLabel', 'somaLocation', 'roiInfo',
       'somaNeuromere', 'assignedOlHex2', 'locationType', 'totalNtPredictions',
       'itoleeHl', 'flywireType', 'matchingNotes', 'birthtime',
       'celltypePredictedNtConfidence', 'flywireId', 'dimorphism',
       'celltypePredictedNt', 'predictedNtConfidence', 'otherNtReference',
       'entryNerve', 'trumanHl', 'hemilineage', 'mcnsSerial', 'synweight',
       'serialMotif', 'predictedNt', 'otherNt', 'celltypeTotalNtPredictions',
       'assignedOlHex1', 'mancGroup', 'ntReference', 'consensusNt',
       'exitNerve', 'inputRois', 'outputRois'],
      dtype='object')

In [27]:
meta[['type','consensusNt', 'totalNtPredictions',
      'celltypePredictedNt', 'otherNtReference', 'predictedNt', 'otherNt', 'ntReference', 'predictedNtConfidence', 'celltypePredictedNtConfidence', 'celltypeTotalNtPredictions']]

Unnamed: 0,type,consensusNt,totalNtPredictions,celltypePredictedNt,otherNtReference,predictedNt,otherNt,ntReference,predictedNtConfidence,celltypePredictedNtConfidence,celltypeTotalNtPredictions
0,CT1,gaba,31535,gaba,,gaba,,Takemura et al 2017,0.776037,0.776037,31535
1,VS,acetylcholine,437,dopamine,,unclear,,Zhao et al 2023,0.334622,0.394740,2756
2,HSN,acetylcholine,487,histamine,,unclear,,Zhao et al 2023,0.456840,0.456840,487
3,HSE,acetylcholine,35,unclear,,unclear,,Zhao et al 2023,0.366792,0.366792,35
4,HSS,acetylcholine,205,histamine,,unclear,,Zhao et al 2023,0.468701,0.468701,205
...,...,...,...,...,...,...,...,...,...,...,...
53742,R1-R6,histamine,0,histamine,,unclear,,Davis et al 2020,0.000000,0.949296,74921
53743,R1-R6,histamine,21,histamine,,unclear,,Davis et al 2020,0.960561,0.949296,74921
53744,R7d,histamine,40,histamine,,unclear,,,0.865494,0.730862,2555
53745,R8d,histamine,28,histamine,Davis et al 2020,unclear,acetylcholine,,0.722538,0.751943,2407


In [28]:
meta[['consensusNt','celltypePredictedNt']].value_counts(dropna=False)

consensusNt    celltypePredictedNt
acetylcholine  acetylcholine          32279
glutamate      glutamate              10283
gaba           gaba                    5424
histamine      histamine               3648
unclear        glutamate                892
               dopamine                 563
               unclear                  556
               serotonin                 28
               octopamine                21
octopamine     octopamine                12
acetylcholine  unclear                   10
               dopamine                   9
serotonin      serotonin                  7
glutamate      octopamine                 5
dopamine       dopamine                   4
gaba           dopamine                   2
acetylcholine  octopamine                 2
               histamine                  2
Name: count, dtype: int64

In [29]:
# use celltypePredictedNt 
bodyid_to_nt = dict(zip(meta.bodyId, meta.celltypePredictedNt))
# for now regarding everything except for Glu and GABA to be excitatory
# based on papers such as this (https://pubmed.ncbi.nlm.nih.gov/7472349/), when light hits photoreceptors, they reduce the release of histamine. 
# histamine inhibits the large monopolar cells 
# so excitation of photoreceptors excites the large monopolar cells (disinhibition). 
# so we'll model this as excitation, i.e. 'histamine': 1 
nt_to_sign = dict.fromkeys(meta.celltypePredictedNt.unique(), 1)
nt_to_sign.update({'gaba': -1, 'glutamate': -1})

# save meta

In [30]:
meta = neuron_df[['bodyId', 'type', 'side']].copy()
meta.rename(columns={'type': 'cell_type'}, inplace=True)
meta['idx'] = meta['bodyId'].map(nodes_to_idx)
meta.loc[:,['nt']] = meta['bodyId'].map(bodyid_to_nt)
meta.loc[:,['sign']] = meta['nt'].map(nt_to_sign)

meta.to_csv('data/neuprint_meta_optic.csv', index=False)
meta

Unnamed: 0,bodyId,cell_type,side,idx,nt,sign
0,10009,CT1,left,0,gaba,-1
1,10012,VS,right,1,dopamine,1
2,10015,HSN,right,2,histamine,1
3,10016,HSE,right,3,unclear,1
4,10023,HSS,right,4,histamine,1
...,...,...,...,...,...,...
53742,876890618,R1-R6,right,53742,histamine,1
53743,966201979,R1-R6,right,53743,histamine,1
53744,967550825,R7d,right,53744,histamine,1
53745,983968288,R8d,right,53745,histamine,1
