### ROI Prioritized Orphan Link
- this code takes data from the file OL_roi_continuation_analysis
- runs the orphan link query based on desired level of completedness, prioritizes bodies in ROIs with high continuation rates
- enter desired level of completeness below in a decimal between 0.0-1.0 (completion_goal)
- enter the predicted merge rate of orphans (merge_rate)
- enter the direction, "up" to get upstream orphans of your body IDs, or "down" to get downstream orphans of your bodies. Format : direction = "up" or direction = "down"
- enter the body ID list in the format body_id_list = [1, 2, 3, 4]

In [1]:
completion_goal = .45

merge_rate = .60

direction = "up"

body_id_list = [5813063239, 603785283, 850717220]

Enter your neuprint token here, in the format: token = 'abcde.12345'

get your 'auth token' here for most recent data: https://neuprint-test.janelia.org/account

In [2]:
token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlbWFpbCI6ImVtaWx5Lm0uam95Y2UxQGdtYWlsLmNvbSIsImxldmVsIjoicmVhZHdyaXRlIiwiaW1hZ2UtdXJsIjoiaHR0cHM6Ly9saDUuZ29vZ2xldXNlcmNvbnRlbnQuY29tLy1rQ3BqVXpRc3BuNC9BQUFBQUFBQUFBSS9BQUFBQUFBQUFBQS9BTVp1dWNrOEhwVlhrUHV4My1HZXRldjcwbXd1TFdqMVBnL3Bob3RvLmpwZz9zej01MD9zej01MCIsImV4cCI6MTc4MDI3NzU3M30.WaXfZwra0QId3alTewbxTqkvklJ8wQf9lkFXGuV4rCM'


In [3]:
prioritized_rois = ["AL(R)","MB(+ACA)(R)","VLNP(R)"]

In [4]:
import pandas as pd
import numpy as np
import neuprint as neu
import json

from neuprint import Client, fetch_custom
c = Client('neuprint-test.janelia.org', dataset='hemibrain', token = token, verify=True)

In [5]:
# configure the arrow direction for upstream or downstream
def direction_arrow(direction):
    if direction.lower() == "up" or "upstream":
        direction = "-[w:ConnectsTo]->"
    elif direction.lower() == "down" or "downstream":
        direction = "<-[w:ConnectsTo]-"
    else:
        raise TypeError("direction must be 'up' or 'down', you have entered " + direction)
    return direction
direction = direction_arrow(direction)   

## Completion percent query
- calculates the up/downstream completion percent, (total "traced" weight / total downstream weight)

In [6]:
def completion_percent(body):
    
    # find the total downstream neurons
    total_q = fetch_custom("""
    MATCH (a:Neuron)""" + direction + """(b:Segment) 
    WHERE a.bodyId = """ + str(body) + """ 
    RETURN sum(w.weight)""")
    
    total_weight = total_q.iloc[0,0]
    
    # find the downstream neurons with a status of traced or leaves 
    # alternatively could do no no statuses, but could be assign
    status_q = fetch_custom("""
    MATCH (a:Neuron)""" + direction + """(b:Segment)
    WHERE a.bodyId = """ + str(body) + """ AND (b.status CONTAINS "raced" OR b.status CONTAINS "eaves") 
    RETURN sum(w.weight)
    """) 
    
    total_completed_weight = status_q.iloc[0,0]
    
    
    completion_rate = total_completed_weight/total_weight
    
    return({'completion_rate':completion_rate, 'total_weight':total_weight, 'total_completed_weight':total_completed_weight})
    
completion_percent(5813063239)


{'completion_rate': 0.26527805069758303,
 'total_weight': 10178,
 'total_completed_weight': 2700}

## Orphans Query
- returns all the orphans up/downstream of your body and the ROIs they innervate

In [7]:
def orphan_query(body):
    
    orphans = fetch_custom("""
    MATCH (a:Neuron)""" + direction + """(b:Segment)
    WHERE a.bodyId = """ + str(body) + """ AND (b.status IS NULL OR b.status CONTAINS "ssign") 
    RETURN b.bodyId, (b.pre+b.post), b.roiInfo, b.size
    ORDER BY (b.pre+b.post) DESC, b.size desc
    """)
    
    return orphans

orphan_query(5813063239)

Unnamed: 0,b.bodyId,(b.pre+b.post),b.roiInfo
0,2070321500,18,"{""AL(R)"": {""post"": 18}, ""AL-VP3(R)"": {""post"": ..."
1,2627581366,10,"{""GNG"": {""pre"": 1, ""post"": 8, ""downstream"": 11..."
2,1226503253,10,"{""VLNP(R)"": {""post"": 10}, ""PLP(R)"": {""post"": 10}}"
3,2288580553,9,"{""AL(R)"": {""post"": 9}, ""AL-VP5(R)"": {""post"": 9}}"
4,853057088,9,"{""INP"": {""post"": 9}, ""SCL(R)"": {""post"": 9}}"
...,...,...,...
6059,697524064,1,"{""SNP(R)"": {""post"": 1}, ""SLP(R)"": {""post"": 1}}"
6060,1069947590,1,"{""INP"": {""post"": 1}, ""SCL(R)"": {""post"": 1}}"
6061,1069606392,1,"{""INP"": {""post"": 1}, ""SCL(R)"": {""post"": 1}}"
6062,728904247,1,"{""SNP(R)"": {""post"": 1}, ""SLP(R)"": {""post"": 1}}"


## Main Function: returns orphans, prioritized by size and ROI
- will first report all orphans with more than 4 total synapses or from regions with high continuation rates
- will then finish off with the rest of the orphans ordered by number of synapses and the voxel size of the orphan

In [10]:
def orphans(body_id_list):
    
    final_orphans = pd.DataFrame()
    
    for body in body_id_list:
        
        # create a running count for orphan weight. We will continue adding bodies until 
        # (orphan_weight + total_completed_weight) / total_weight >= completion_goal
        orphan_weight = 0
        
        # create a list of orphans for this body to include and to exclude
        # we will add the excluded orphans to the OL list in the end
        # only if we there are not enough prioritized orphans to reach the completion goal
        included_orphans = []
        excluded_orphans = pd.DataFrame(columns = ['b.bodyId', '(b.pre+b.post)'])
        
        # pull the full orphan list for the body
        full_orphan_list = orphan_query(body)
        
        # retreive completion numbers 
        comp_dict = completion_percent(body)
        completion_rate = comp_dict['completion_rate']
        total_weight = comp_dict['total_weight']
        total_completed_weight = comp_dict['total_completed_weight']
        
        
        # we will calculate the number of new synapses that need to be traced out
        # to reach out goal. we will use the assumed merge rate parameter.  
        syn_goal = ((completion_goal*total_weight)-total_completed_weight)/merge_rate
        
        # go orphan by orphan and add
        for orphan, row in full_orphan_list.iterrows():
            
            roi_dict = full_orphan_list.loc[orphan,"b.roiInfo"]
            
            # go row by row and add to the OL list if it has 4+ synapses or is in a high merge ROI
            if orphan_weight < syn_goal:
                if full_orphan_list.iloc[orphan,1] > 3:
                    included_orphans.append(row["b.bodyId"])
                    orphan_weight += full_orphan_list.iloc[orphan,1]
                    continue
                elif (not(set(roi_dict)&set(prioritized_rois))==set()): # if an roi is a prioritized one
                    included_orphans.append(row["b.bodyId"])
                    orphan_weight += full_orphan_list.iloc[orphan,1]
                    continue
                else:
                    new_row = {'b.bodyId':full_orphan_list.iloc[orphan,0], '(b.pre+b.post)': full_orphan_list.iloc[orphan,1]}
                    excluded_orphans = excluded_orphans.append(new_row, ignore_index=True)
                    
        # go row by row and add deprioritized ROIs/small orphans by size 
        
            
        for orphan, row in excluded_orphans.iterrows():
            current_completion_rate = (orphan_weight + total_completed_weight)/total_weight
            if orphan_weight < syn_goal:
                included_orphans.append(excluded_orphans.iloc[orphan,0])
                orphan_weight += excluded_orphans.iloc[orphan,1]
                
            else:
                break
        orphans_df = pd.DataFrame({body:included_orphans})
        
        final_orphans = pd.concat([final_orphans, orphans_df], axis=1).drop_duplicates().fillna('')
        
    return (final_orphans)
orphans=orphans(body_id_list)



In [11]:
# view the results by running this cell

orphans

(     5813063239    603785283     850717220 
 0    2070321500  2.102034e+09  2.038950e+09
 1    2627581366  7.575907e+08  2.102034e+09
 2    1226503253  6.951590e+08  1.066524e+09
 3    2288580553  2.100342e+09  1.004788e+09
 4     853057088  2.225824e+09  8.185994e+08
 ..          ...           ...           ...
 463   788923749           NaN           NaN
 464   853057179           NaN           NaN
 465   788923448           NaN           NaN
 466   729950158           NaN           NaN
 467   822716987           NaN           NaN
 
 [468 rows x 3 columns],)

In [None]:
# download the results by running this cell. 
# The file will be saved in whatever file this jupyter notebook file is stored in. (If run in MyBinder, will be in Jupyter Notebook file navigator)  
# The red text below will be the name that this file is saved as, so you can change it to whatever name you need. 

orphans.to_csv("orphan_list.csv")