In [1]:
import pandas as pd
import neuprint as neu
import numpy as np

In [2]:
# edit this box to include your starting information. 

# client allows the access to your neuprint account. keep emdata1.int.janelia.org:11000, and add your own 
# neuprint authentication token to the second parameter in this statement. 
client = neu.Client('emdata1.int.janelia.org:11000', '')

# in this example, we are getting the top 20 (limit) UPstream (direction) neurons with the highest synapse count
# connected by at least 1 synapse (weight_min) to each neuron in the body_ID_List. 
# So say if our first neuron in the list has 1015 upstream neurons. This code will only pull out
# the top 20 of those that have the most t-bars that connect to your first neuron, and do that for each neuron 
# on the body ID list, then combine the results into one list.

# At the very end of this page, there are three different CSVs you can download 
# that represent this data in different ways.

body_ID_List = [] # separate each body ID with commas 
weight_min = 1
limit = 20
direction = "up" #("up" or "down")

In [3]:
def createQueryDataframe(body_ID_List, weight_min, limit):
    count = 0
    #iterate through the body ID list get the top connecting bodies by heaviest weight. 

    
    for body in body_ID_List:
        if direction == "down": # find neurons downstream from your list
            q1 = ('MATCH (n:`hemibrain-Neuron`)-[w:ConnectsTo]->(o:`hemibrain-Neuron`)'
                    ' WHERE n.bodyId = '+ str(body) + ' AND w.weightHP > ' + str(weight_min) + ''
                    ' RETURN n.bodyId AS input_bodyID, n.name AS input_name, w.weightHP AS weight_HP,'
                    ' o.bodyId AS output_bodyID, o.name AS output_name' 
                    ' ORDER BY weight_HP DESC'
                    ' LIMIT ' + str(limit) + '')
        if direction == "up": # find neurons upstream from your list
             q1 = ('MATCH (n:`hemibrain-Neuron`)<-[w:ConnectsTo]-(o:`hemibrain-Neuron`)'
                    ' WHERE n.bodyId = '+ str(body) + ' AND w.weightHP > ' + str(weight_min) + ''
                    ' RETURN n.bodyId AS output_bodyID, n.name AS output_name, w.weightHP AS weight_HP,'
                    ' o.bodyId AS input_bodyID, o.name AS input_name' 
                    ' ORDER BY weight_HP DESC'
                    ' LIMIT ' + str(limit) + '')           

        results = client.fetch_custom(q1)

        #if this is the first iteration, create a final data data frame
        if count == 0:
            final_data = results
        #if this is not the first iteration, just add the new data underneath the current data
        if count > 0:
            final_data = pd.concat([final_data, results], ignore_index = True)
        count = count + 1

    #return the final results with all data in one data frame
    return final_data


In [4]:
# returns your dataframe, which contains all of starting neurons and their top 20 connecting neurons. 
data = createQueryDataframe(body_ID_List, weight_min, limit)
data


Unnamed: 0,output_bodyID,output_name,weight_HP,input_bodyID,input_name
0,612371421,MBON01(y5B'2a)(aSMPB)_R,323,425790257,APL_R(1)
1,612371421,MBON01(y5B'2a)(aSMPB)_R,144,1078693835,MBON09(y3B'1)(aLAL3)_R_001
2,612371421,MBON01(y5B'2a)(aSMPB)_R,131,612371421,MBON01(y5B'2a)(aSMPB)_R
3,612371421,MBON01(y5B'2a)(aSMPB)_R,119,1204452079,DPM(y5)
4,612371421,MBON01(y5B'2a)(aSMPB)_R,104,1048215779,MBON09(y3B'1)(aLAL3)_L_002
5,612371421,MBON01(y5B'2a)(aSMPB)_R,95,1172713521,KCy-d
6,612371421,MBON01(y5B'2a)(aSMPB)_R,95,1016835041,MBON09(y3B'1)(aLAL3)_L_001
7,612371421,MBON01(y5B'2a)(aSMPB)_R,77,799586652,MBON05(y4>y1y2)(igL)_L
8,612371421,MBON01(y5B'2a)(aSMPB)_R,68,695513428,APL_R(4)
9,612371421,MBON01(y5B'2a)(aSMPB)_R,68,510987433,(dlLHT)_R


In [5]:
# returns above but organized by the output body ID so that it is easier to see repeats
if direction == "down": 
    output_sort = data.sort_values(by=['output_bodyID'])
    output_sort
if direction == "up": 
    output_sort = data.sort_values(by=['input_bodyID'])
    output_sort
output_sort

Unnamed: 0,output_bodyID,output_name,weight_HP,input_bodyID,input_name
150,613079053,MBON04(B'2mp_bilateral)(aSMPB)_R,510,268731005,(L)SMP-CRE
1056,673702721,MBON15(a'1)(SFS)_R,9,268731005,(L)SMP-CRE
1012,642664141,MBON15(a'1)(SFS)_R,10,268731005,(L)SMP-CRE
941,457175171,MBON15-like(a'1a'2)(SFS)_R,6,268731005,(L)SMP-CRE
1420,423774471,MBON19(a2p3p)(SFS)_R_002,7,298254517,(AMDF)_L
466,768555687,MBON10(B'1)(igL)_R_001,8,298258513,SMP-SIP(agL2)_R
261,5813061512,MBON06(B1>a)(igL)_R,42,298595394,PAM10(B1)(aSMP)_R
1376,517854468,MBON19(a2p3p)(SFS)_R_001,13,298603963,(hLHT1)_R
1415,423774471,MBON19(a2p3p)(SFS)_R_002,8,298603963,(hLHT1)_R
1384,517854468,MBON19(a2p3p)(SFS)_R_001,12,298616588,(AMDF)_R


In [6]:
# sorts the above data to list each output body ID only once, with the weight column representing the weight of
# each neuron to all MBONs combined 

if direction == "down": 
    agg_data = output_sort.groupby('output_bodyID').agg({'weight_HP': 'sum',
                                                        'output_name': 'first',   
                                                       })  
if direction == "up": 
    agg_data = output_sort.groupby('input_bodyID').agg({'weight_HP': 'sum',
                                                        'input_name': 'first',   
                                                       })  
agg_data

Unnamed: 0_level_0,weight_HP,input_name
input_bodyID,Unnamed: 1_level_1,Unnamed: 2_level_1
268731005,535,(L)SMP-CRE
298254517,7,(AMDF)_L
298258513,8,SMP-SIP(agL2)_R
298595394,42,PAM10(B1)(aSMP)_R
298603963,21,(hLHT1)_R
298616588,23,(AMDF)_R
299289811,57,SMP-CRE
299613480,27,PAM10(B1)(aSMP)_R
299621755,32,(AMDF)_L
300972942,91,MBON14(a3)_R_002


In [7]:
# run this to save the first unfiltered data frame to a CSV. It will show up in the same folder where you saved
# this script. 
# this CSV has columns of starting neuron body ID, starting neuron name, weight connecting it to the connecting 
# neuron,and connecting neuron name. They are organized by the starting body ID, which is in the same order that  
# you put them initially in body_ID_List. There may be repeats in connecting neurons because multiple 
# starting neurons may connect to the same neurons. 
data.to_csv('unfiltered_dataframe.csv', sep='\t')

In [8]:
# run this to save the second sorted data frame to a CSV. It will show up in the same folder where you saved
# this script. 
# this CSV is basically the same as above, but the results are sorted by the connected body IDs instead of the 
# starting body IDs. 
output_sort.to_csv('sorted_dataframe.csv', sep='\t')

In [9]:
# run this to save the third aggregated data frame to a CSV.It will show up in the same folder where you saved
# this script. 
# this CSV contains only one copy of each connecting neuron and their summed weight too all starting bodies. 
agg_data.to_csv('aggregate_dataframe.csv', sep='\t')