In [1]:
## This script generates a list of 4Ns and their properties, traced originally from mlSEZt cells. 
## It's based on a more general script that can find inputs or outputs to any cell.
## Acknowledgement to Barbara Noro (Axel lab) for providing most of the initial code!
## Input your own token below.

# Import modules
import pandas as pd
import numpy as np
import sys

# Install and set up neuprint
!pip install neuprint-python 

from neuprint import Client

# select dataset
DATASET = 'hemibrain:v1.2.1'

# token to connect to neuPrint
TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlbWFpbCI6ImFkMzAzMEBjb2x1bWJpYS5lZHUiLCJsZXZlbCI6Im5vYXV0aCIsImltYWdlLXVybCI6Imh0dHBzOi8vbGg0Lmdvb2dsZXVzZXJjb250ZW50LmNvbS8tNmwtYTVaUUNRSDgvQUFBQUFBQUFBQUkvQUFBQUFBQUFBQUEvQU1adXVjbE5TU2JvTG1qVFhjemxfODFBdHNWbjdaUklwdy9zOTYtYy9waG90by5qcGc_c3o9NTA_c3o9NTAiLCJleHAiOjE3OTc5MTY2Nzl9.SapNdcu-zu2MchE4jdZ3_645YadCJKguokXXBWnL1Ko"

c = Client('neuprint.janelia.org', dataset=DATASET, token=TOKEN)

print("\n##########\n")
print(f"NeuPrint dataset = {DATASET}")

from neuprint import NeuronCriteria as NC
# for all possible NeuronCriteria options, see:
# https://connectome-neuprint.github.io/neuprint-python/docs/neuroncriteria.html

from neuprint import fetch_adjacencies
# for full list of options, see:
# https://connectome-neuprint.github.io/neuprint-python/docs/queries.html#neuprint.queries.fetch_adjacencies

from neuprint.utils import merge_neuron_properties

from neuprint import fetch_roi_hierarchy
from neuprint import NeuronCriteria as NC
from neuprint import fetch_neurons
from neuprint import fetch_adjacencies
from neuprint import merge_neuron_properties


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/

##########

NeuPrint dataset = hemibrain:v1.2.1


In [2]:
# define input neurons (3Ns - manually copied from the 3N identification notebook)
NEURON_ID = [420973599,
 359892669,
 575197482,
 421313563,
 421650982,
 360236724,
 420623873,
 574710121,
 604735525,
 573346324,
 329919036,
 390271033,
 393340402,
 297519736,
 391311186,
 359240144,
 485430336,
 578521941,
 607820937,
 298254384,
 5813011119,
 608534097,
 359926923,
 483711811,
 5813040707,
 420965117,
 328861282,
 5813047255,
 329206628,
 452689494,
 451663172,
 5813019955]


NEURON_NAME = "3N"

# Provides the min weight for the set of neurons used as inputs
MIN_INPUT_WEIGHT = 3

# define whether to search for "output" (post-synaptic) or "input" (pre-synaptic) neurons for NEURON_IDs
SEARCH_TYPE = "output" 

# ROI search limitations
# ROI_SEARCH = either 'inside' or 'outside' of elements in LIMIT_ROI (LIMIT_ROI=['roi1','roi2',...])
ROI_SEARCH = 'outside' # i.e. include all regions NOT in the array below
LIMIT_ROI = [] # we don't want to exclude any regions, so leave this empty

# Limit input/output types
# e.g. LIMIT_TYPE = None (no limit)
#      LIMIT_TYPE ='PN' to include only input/output neurons with 'PN' in type name
# * note: use '|' to separate different types (e.g. 'PN|LN')
LIMIT_TYPE = None

# define minimum synaptic weight to consider
MIN_WEIGHT = 3 

# turn verbose mode to True to see intermediate outputs; False to suppress them
VERBOSE = True

In [3]:
# Define directionality based on SEARCH_TYPE (i.e. pre/post)
if SEARCH_TYPE == "input" :
    TYPE = 'type_pre'
    TYPE_R = 'type_post'
    BODYID = 'bodyId_pre'
    BODYID_R = 'bodyId_post'  
elif SEARCH_TYPE == "output" :
    TYPE = 'type_post'
    TYPE_R = 'type_pre'
    BODYID = 'bodyId_post'
    BODYID_R = 'bodyId_pre'

In [4]:
# Define pre/post-synaptic search criteria
CRITERIA = NC(bodyId=NEURON_ID)

if SEARCH_TYPE == "output" :
    SOURCES = CRITERIA
    TARGETS = None # =all neurons
elif SEARCH_TYPE == "input" :
    SOURCES = None # =all neurons
    TARGETS = CRITERIA

if VERBOSE == True :
    print("\nSources =\n\t", SOURCES)
    print("\nTargets =\n\t", TARGETS)


Sources =
	 NeuronCriteria("n", bodyId=[420973599, 359892669, 575197482, 421313563, 421650982, 360236724, 420623873, 574710121, 604735525, 573346324, 329919036, 390271033, 393340402, 297519736, 391311186, 359240144, 485430336, 578521941, 607820937, 298254384, 5813011119, 608534097, 359926923, 483711811, 5813040707, 420965117, 328861282, 5813047255, 329206628, 452689494, 451663172, 5813019955], label="Segment")

Targets =
	 None


In [5]:
# Find connections
neuron_df, connection_df = fetch_adjacencies(
        sources=SOURCES,
        targets=TARGETS,
        min_total_weight=MIN_WEIGHT)

# Limit by ROI - but in this case this shouldn't do anything since we're not limiting by ROI
connection_df = connection_df[~connection_df['roi'].isin(LIMIT_ROI)]

# Group output neurons that synapse at multiple ROIs
# (Output of fetch_adjacencies is separated by ROIs. To group them together, sum for each group.)
connection_df = connection_df.groupby(['bodyId_pre', 'bodyId_post'], as_index=False)['weight'].sum()

# add neuron 'type' from neuron_df to connection_df (makes 'type_pre' and 'type_post')
connection_df = merge_neuron_properties(neuron_df, connection_df, properties='type')

# Now group by target so we get a list of unique output cells.
g1 = [BODYID]
g2 = [BODYID,TYPE]

# First count number of input cells that connect to each output neuron
count_df = connection_df.groupby(g1).size().reset_index(name='count')

# find which neurons are connected
which_df = connection_df.groupby(g1)[BODYID_R].apply(lambda x: list(np.unique(x))).reset_index(name='neurons')
types_df = connection_df.groupby(g1)[TYPE_R].apply(lambda x: list(np.unique(x))).reset_index(name='neuron_types')

# Now group connection_df by bodyId_post = neuron IDs
# This is the list of unique output neurons - there are 201 cells
connection_df = connection_df.groupby(g2, as_index=False)['weight'].sum()

# calculate percent weight 
connection_df['pct_weight'] = 100. * connection_df['weight'] / connection_df['weight'].sum()

# add count and neuron lists to connection_df
connection_df = connection_df.merge(count_df, on=(g1)).merge(which_df, on=(g1)).merge(types_df,on=(g1))

# Sort the list by connection weights
connection_df = connection_df.sort_values('weight', ascending=False)

In [6]:
# Look at the list: note 1743 neurons
# This is the overall list of all 4Ns, which includes mlSEZt cells
connection_df

Unnamed: 0,bodyId_post,type_post,weight,pct_weight,count,neurons,neuron_types
43,297580589,SMP548,363,1.394705,16,"[329206628, 329919036, 359240144, 390271033, 3...","[AVLP024, LHAV1e1, SLP015_c, SLP015_e, SLP057,..."
311,360591860,SLP279,354,1.360126,8,"[328861282, 329919036, 390271033, 391311186, 3...","[LHAV1e1, LHCENT1, SLP015_e, SLP018, SLP179_b,..."
776,517506265,LHCENT4,297,1.141123,4,"[328861282, 360236724, 575197482, 5813019955]","[LHAV4l1, LHCENT1, SLP057, SMP389]"
808,542634818,DM1_lPN,247,0.949014,1,[328861282],[LHCENT1]
1554,5813020988,LHMB1,238,0.914435,2,"[328861282, 5813019955]","[LHCENT1, SLP057]"
...,...,...,...,...,...,...,...
1092,668489498,LHAV1d1,3,0.011526,1,[328861282],[LHCENT1]
1097,669878695,CL078,3,0.011526,1,[359892669],[SLP132]
1099,670297553,CRE103,3,0.011526,1,[328861282],[LHCENT1]
1102,671255587,mAL4,3,0.011526,1,[421650982],[aSP-g3B]


In [7]:
# Now filter the list to exclude mlSEZt cells
# Here we are excluding all cells categorized as mAL3a, mAL3b, or mAL4 in Neuprint
connection2_df = connection_df.loc[connection_df['type_post'] != 'mAL3B']
connection2_df = connection2_df.loc[connection2_df['type_post'] != 'mAL3A']
connection2_df = connection2_df.loc[connection2_df['type_post'] != 'mAL4']

# Look at the new list: note now there are 1720 neurons
connection2_df

Unnamed: 0,bodyId_post,type_post,weight,pct_weight,count,neurons,neuron_types
43,297580589,SMP548,363,1.394705,16,"[329206628, 329919036, 359240144, 390271033, 3...","[AVLP024, LHAV1e1, SLP015_c, SLP015_e, SLP057,..."
311,360591860,SLP279,354,1.360126,8,"[328861282, 329919036, 390271033, 391311186, 3...","[LHAV1e1, LHCENT1, SLP015_e, SLP018, SLP179_b,..."
776,517506265,LHCENT4,297,1.141123,4,"[328861282, 360236724, 575197482, 5813019955]","[LHAV4l1, LHCENT1, SLP057, SMP389]"
808,542634818,DM1_lPN,247,0.949014,1,[328861282],[LHCENT1]
1554,5813020988,LHMB1,238,0.914435,2,"[328861282, 5813019955]","[LHCENT1, SLP057]"
...,...,...,...,...,...,...,...
1090,668148078,LHAV4g8,3,0.011526,1,[328861282],[LHCENT1]
1092,668489498,LHAV1d1,3,0.011526,1,[328861282],[LHCENT1]
1097,669878695,CL078,3,0.011526,1,[359892669],[SLP132]
1099,670297553,CRE103,3,0.011526,1,[328861282],[LHCENT1]


In [8]:
# Now we will identify the output ROIs for these 4Ns.
# The following code takes the list of true 4Ns, finds their output ROIs, and adds them to a new merged dataframe (top_4N_qualities).
list(connection2_df['bodyId_post'])
criteria = NC(bodyId = list(connection2_df['bodyId_post']))
neuron_4N_df, roi_4N_counts_df = fetch_neurons(criteria)
output_of_4Ns_df = neuron_4N_df[['bodyId','outputRois']].copy()
output_of_4Ns_df.columns = ['bodyId_post', 'outputRois']
merged_4N_qualities = connection2_df.merge(output_of_4Ns_df, on='bodyId_post')

# Look at the new dataframe
merged_4N_qualities

Unnamed: 0,bodyId_post,type_post,weight,pct_weight,count,neurons,neuron_types,outputRois
0,297580589,SMP548,363,1.394705,16,"[329206628, 329919036, 359240144, 390271033, 3...","[AVLP024, LHAV1e1, SLP015_c, SLP015_e, SLP057,...","[AVLP(R), INP, LH(R), PLP(R), SCL(R), SIP(R), ..."
1,360591860,SLP279,354,1.360126,8,"[328861282, 329919036, 390271033, 391311186, 3...","[LHAV1e1, LHCENT1, SLP015_e, SLP018, SLP179_b,...","[INP, SCL(R), SIP(R), SLP(R), SMP(L), SMP(R), ..."
2,517506265,LHCENT4,297,1.141123,4,"[328861282, 360236724, 575197482, 5813019955]","[LHAV4l1, LHCENT1, SLP057, SMP389]","[AVLP(R), CA(R), INP, LH(R), MB(+ACA)(R), MB(R..."
3,542634818,DM1_lPN,247,0.949014,1,[328861282],[LHCENT1],"[AL(R), AL-DC4(R), AL-DM1(R), AL-DM4(R), AL-DP..."
4,5813020988,LHMB1,238,0.914435,2,"[328861282, 5813019955]","[LHCENT1, SLP057]","[CA(R), CRE(-ROB,-RUB)(R), CRE(R), INP, LH(R),..."
...,...,...,...,...,...,...,...,...
1715,668148078,LHAV4g8,3,0.011526,1,[328861282],[LHCENT1],[LH(R)]
1716,668489498,LHAV1d1,3,0.011526,1,[328861282],[LHCENT1],"[LH(R), PVLP(R), SLP(R), SNP(R), VLNP(R)]"
1717,669878695,CL078,3,0.011526,1,[359892669],[SLP132],"[AVLP(R), IB, ICL(R), INP, LH(R), PLP(R), SCL(..."
1718,670297553,CRE103,3,0.011526,1,[328861282],[LHCENT1],"[CRE(-ROB,-RUB)(R), CRE(-RUB)(L), CRE(L), CRE(..."


In [9]:
# Lastly, we want to calculate the % of total synaptic input each 4N receives from the population of top 3Ns.
# We are also going to just focus on 4Ns that receive at least 20 synapses from 3Ns.

# First extract the cell IDs.
strongsynapses_intArray = merged_4N_qualities['bodyId_post']

appended_data = []
strong_synapses_df = merged_4N_qualities

for top_4Ns in strongsynapses_intArray:
  criteria = NC(bodyId = top_4Ns)
  neuron_df_tmp, roi_counts_df_tmp = fetch_neurons(criteria)
  dfrow = neuron_df_tmp

  appended_data.append(dfrow)

appended_data = pd.concat(appended_data)

appended_data

proportions_of_3N_neurons = []
fraction_upstream_3N_synapses = []

for neuron in appended_data['bodyId']:
  #ID = neuron

  synapses_from_3N = strong_synapses_df.loc[strong_synapses_df['bodyId_post'] == neuron]
  synapse_weight_from_3N = synapses_from_3N.iloc[0,2]
  total_synapses_all = appended_data.loc[appended_data['bodyId'] == neuron]
  total_synapses_n = total_synapses_all.iloc[0,6] #Index 7th row of total_synapses_all - should correspond to upstream weight

  #Calculate proportion
  proportion_from_3N = 100*synapse_weight_from_3N/total_synapses_n

  proportions_of_3N_neurons = pd.DataFrame({'bodyId': [neuron], 'Percentage_Upstream_Synapses_from_3Ns': [proportion_from_3N]})
  fraction_upstream_3N_synapses.append(proportions_of_3N_neurons)

#fraction_upstream_3N_synapses gives us the percentage of upstream synapses coming from 3Ns for 3Ns, based on bodyId.
fraction_upstream_3N_synapses = pd.concat(fraction_upstream_3N_synapses)
#fraction_upstream_3N_synapses.sort_values(by='Percentage_Upstream_Synapses_from_3N', ascending=False)

fraction_upstream_3N_synapses.columns = ['bodyId_post', '%Synaptic Input from 3Ns']
merged_4N_qualities = merged_4N_qualities.merge(fraction_upstream_3N_synapses, on='bodyId_post')
top_4N_qualities = merged_4N_qualities[merged_4N_qualities['weight'] >= 20]


# Look at the final table with the % input from 3Ns in final column
# Note 322 top 3Ns that met our criteria of ≥ 20 synapses
top_4N_qualities

Unnamed: 0,bodyId_post,type_post,weight,pct_weight,count,neurons,neuron_types,outputRois,%Synaptic Input from 3Ns
0,297580589,SMP548,363,1.394705,16,"[329206628, 329919036, 359240144, 390271033, 3...","[AVLP024, LHAV1e1, SLP015_c, SLP015_e, SLP057,...","[AVLP(R), INP, LH(R), PLP(R), SCL(R), SIP(R), ...",6.292252
1,360591860,SLP279,354,1.360126,8,"[328861282, 329919036, 390271033, 391311186, 3...","[LHAV1e1, LHCENT1, SLP015_e, SLP018, SLP179_b,...","[INP, SCL(R), SIP(R), SLP(R), SMP(L), SMP(R), ...",8.432587
2,517506265,LHCENT4,297,1.141123,4,"[328861282, 360236724, 575197482, 5813019955]","[LHAV4l1, LHCENT1, SLP057, SMP389]","[AVLP(R), CA(R), INP, LH(R), MB(+ACA)(R), MB(R...",4.553120
3,542634818,DM1_lPN,247,0.949014,1,[328861282],[LHCENT1],"[AL(R), AL-DC4(R), AL-DM1(R), AL-DM4(R), AL-DP...",2.365900
4,5813020988,LHMB1,238,0.914435,2,"[328861282, 5813019955]","[LHCENT1, SLP057]","[CA(R), CRE(-ROB,-RUB)(R), CRE(R), INP, LH(R),...",3.469894
...,...,...,...,...,...,...,...,...,...
317,609548390,AVLP315,20,0.076843,3,"[359892669, 483711811, 5813019955]","[SLP057, SLP132, SLP187]","[AVLP(R), INP, LH(R), PLP(R), PVLP(R), SCL(R),...",0.535189
318,360246734,LHAD3a2_a,20,0.076843,1,[328861282],[LHCENT1],"[LH(R), SIP(R), SLP(R), SNP(R)]",6.269592
319,792364888,Z_vPNml1,20,0.076843,2,"[452689494, 608534097]","[SLP215, SMP550]","[AL(R), AL-V(R), AL-VP5(R), AVLP(R), GNG, INP,...",4.454343
320,643401816,SMP055,20,0.076843,2,"[452689494, 5813011119]","[SLP216, SMP550]","[AOTU(R), ATL(R), CAN(R), CRE(-ROB,-RUB)(R), C...",0.936330


In [11]:
# This code block saves top_4N_qualities dataframe as a .csv file in your google drive.

from google.colab import drive
drive.mount('drive')
top_4N_qualities.to_csv('/content/drive/My Drive/top_4N_qualities.csv', encoding='utf-8', index=False)

Mounted at drive
