## Orphan Query
Original: Marissa Dreher <br>
Modifications: Chelsea Alvarado

#### Import libraries and packages
Use https://neuprint-pre.janelia.org/ as server for production work. If you need to access older datasets make sure to use the appropiate 'dataset' value i.e. 'hemibrain'

In [None]:
import pandas as pd
import numpy as np
import neuprint as neu
import json

from neuprint import Client
c = Client('', dataset='', token='', verify = False) 


#### Setup

In [2]:
d = pd.DataFrame() # creates and empty dataframe for use later on
total_goal_percent = 0.45 # total_goal_percent = goal percent for total output completeness

#Use this line to import list of body IDs from json file 
#body_list = json.load(open("Core_CX_neurons.json"))

body_list = [] # body_list = list of body IDs, can be imported from json file in format [###, ###, ###]

### Completeness Query (All ROIs)

In [None]:
total_percent_query = f"WITH {body_list} AS IDS \
            UNWIND IDS as ID \
            MATCH (n:Neuron)-[w:ConnectsTo]->(m) \
            WHERE n.bodyId = ID \
            WITH n, sum(w.weight) AS totalW \
            MATCH (n)-[w:ConnectsTo]->(m) \
            WHERE (m.status=~\".*raced\" OR m.status=\"Leaves\") \
            WITH n, totalW, sum(w.weight) AS completeW \
            RETURN n.bodyId, n.instance, completeW, totalW, (completeW*1.0)/(totalW)*100 AS percent"

percentages = c.fetch_custom(total_percent_query)
percentages

#### Export out results

In [None]:
percentages.to_csv("INSERT FILE NAME.csv", index = False)

### Orphan Queries 
sum_query = the total number of outputs (TOTAL_WEIGHT) and the number of outputs to complete bodies (COMPLETE_WEIGHT)

orphan_query = fragments downstream of body ID. First query includes orphans, second query excludes orphans 

In [3]:
sum_query = (
    'MATCH (input:Neuron)-[w:ConnectsTo]->(output) \n'
    'WHERE input.bodyId = {ID} \n'
    'WITH input, sum(w.weight) AS TOTAL_WEIGHT \n'
    'MATCH (input)-[w:ConnectsTo]->(output) \n'
    'WHERE output.status =~ ".*raced" OR output.status = "Leaves" \n'
    'RETURN input.bodyId, TOTAL_WEIGHT, sum(w.weight) AS COMPLETE_WEIGHT')
    
#Can add or remove statuses first WHERE clause to change results
orphan_query = (
    'MATCH (input:`Neuron`)-[w:ConnectsTo]->(output) \n'
    'WHERE input.bodyId = {ID} AND NOT output.status =~ ".*raced" AND NOT output.status = "Leaves" AND NOT output.status = "Orphan hotknife" AND NOT output.status = "Unimportant" AND NOT output.status = "Orphan" AND NOT output.status = "Orphan-artifact"'
    'RETURN input.bodyId, output.bodyId AS ID, output.status, w.weight AS W, output.post AS POST \n'
    'ORDER BY W DESC, POST DESC \n'
    'UNION \n'
    'MATCH (input:`Neuron`)-[w:ConnectsTo]->(output) \n'     
    'WHERE input.bodyId = {ID} AND output.status IS NULL AND w.weight = 1 \n'
    'RETURN input.bodyId, output.bodyId AS ID, output.status, w.weight AS W, output.post AS POST \n'
    'ORDER BY W DESC, POST DESC')

In [4]:
for body_id in body_list:
    OL_list = []
    num = 0
    
    q = sum_query.format(ID=body_id)
    counts = c.fetch_custom(q)
    #print(q)
    
    total = counts.at[0, 'TOTAL_WEIGHT']
    complete = counts.at[0, 'COMPLETE_WEIGHT']
    goal = (total*total_goal_percent) - complete

    p = orphan_query.format(ID=body_id)
    orphans = c.fetch_custom(p)
   
    for idx, row in orphans.iterrows():
        if num < goal:
            OL_list.append(row["ID"])
            num += row["W"]
        else:
            continue
    
    #length = len(OL_list) - 1
    #orphans = orphans.truncate(after=length)
    #print(orphans)
    
    #Write downstream orphans to file
    #orphans.to_csv("Downstream_orphans_withNums.csv")
    
    orphs = pd.DataFrame({body_id : OL_list})
    
    d = pd.concat([d,orphs], axis=1)

d = d.drop_duplicates()
d = d.fillna('')
d

#### Export Results
Make sure to change the file name! It will overwrite any previous results you wrote before. The output file will be stored in the location of this notebook.

In [None]:
#Write downstream orphans to list for multiple body IDs
d.to_csv("INSERT FILE NAME.csv", index = False)