# Proofread_nuclei

In [1]:
# libraries 1
import numpy as np
import pyperclip
import pandas as pd
import pyperclip
from cloudvolume import CloudVolume, view, Bbox
from nglui import statebuilder,annotation,easyviewer,parser
from nglui.statebuilder import *
from nglui.nglite import *
import json

import sys
import os
from datetime import datetime
from caveclient import CAVEclient

sys.path.append(os.path.abspath("../segmentation"))
import authentication_utils as auth
import rootID_lookup as IDlook
sys.path.append(os.path.abspath("../synapses"))
import connectivity_utils

In [2]:
datastack_name = 'fanc_production_mar2021'
client = CAVEclient(datastack_name)

In [3]:
df = pd.read_csv('../Output/soma_info_Aug2021ver5.csv', header=0)

## Create links for proofreading nuclei/somas

Good example: https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/master/examples/statebuilder_examples.ipynb

In [None]:
# we only need xyz for nuclei/somas..., and make them into lines
xyz_df = df.reindex(columns=['nuc_xyz', 'soma_xyz'])
nuc_xyz_df = df['nuc_xyz'].str.strip('()').str.split(',',expand=True)
soma_xyz_df = df['soma_xyz'].str.strip('()').str.split(',',expand=True)
xyz_df['nuc_xyz'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['soma_xyz'] = soma_xyz_df.astype(int).values.tolist()

In [None]:
nuc_xyz_df = nuc_xyz_df.set_axis(['Col_x', 'Col_y', 'Col_z'], axis=1)
sorted_z = nuc_xyz_df.astype(int).sort_values(by=['Col_z'], ascending=True)
sorted_xz = sorted_z.astype(int).sort_values(by=['Col_x'], ascending=True)
sorted_indices = sorted_xz.astype(int).sort_values(by=['Col_y'], ascending=True).index
xyz_df = xyz_df.reindex(sorted_indices).reset_index(drop=True)

In [None]:
# ..., and make them into lines
lines = LineMapper(point_column_a='nuc_xyz', point_column_b='soma_xyz')

In [None]:
# make Neuroglancer link
imgTokyo = ImageLayerConfig(name = 'FANCv4-jp',
                                    source = auth.get_cv_path('Image_Tokyo')['url'])
img = ImageLayerConfig(name = 'FANCv4',
                                    source = auth.get_cv_path('Image')['url'])
seg = SegmentationLayerConfig(name = 'seg_Mar2021_proofreading',
                                    source = auth.get_cv_path('FANC_production_segmentation')['url'])     
nuc_Aug = ImageLayerConfig(name = 'nuc',
                                    source = auth.get_cv_path('nuclei_map_Aug2021')['url'])      
nuc_seg_Aug = ImageLayerConfig(name = 'nuc_seg',
                                    source = auth.get_cv_path('nuclei_seg_Aug2021')['url'])        

ann = AnnotationLayerConfig(name='nuc_soma_Aug2021',
                            mapping_rules=lines,
                            tags=['neuron', 'glia','false_positive','soma_need_check'],
                            active = True)

In [None]:
view_options = {"layout": "xy"}

memory_options = {"gpuMemoryLimit": 4000000000,
                  "systemMemoryLimit": 8000000000,
                  "concurrentDownloads": 64,
                  "jsonStateServer": "https://global.daf-apis.com/nglstate/api/v1/post"}


sb = StateBuilder(layers=[imgTokyo, seg, img, nuc_Aug, nuc_seg_Aug, ann],
                  resolution=[4.3,4.3,45],
                  view_kws=view_options)

In [None]:
LINK=[]
k=500
minidfs = [xyz_df.loc[i:i+k-1, :] for i in range(0, len(xyz_df), k)]
for dftmp in minidfs:
    # csb = ChainedStateBuilder([sb, vs])                             
    state = json.loads(sb.render_state(dftmp, return_as='json'))
    state.update(memory_options)
    jsn_id = client.state.upload_state_json(state)
    output = client.state.build_neuroglancer_url(jsn_id, auth.get_cv_path('neuroglancer_base')['url'])
    # output =auth.get_cv_path('neuroglancer_base')['url'] + '?json_url={path}{state_id}'.format(path=auth.get_cv_path('json_server')['url'],state_id=client.state.upload_state_json(state))
    LINK.append(output)

In [None]:
# save into csv
LINK2 = pd.DataFrame(LINK)
LINK2.to_csv('../Output/links_20210903.csv', index=False, header=False)

## Extract tags from proodread csv

functions from https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/36f03cab5ccff8c52b0faba8beff7ab77398ef48/src/nglui/parser/base.py

In [4]:
# df['is_neuron']=""
# df['is_glia']=""
# df['is_false_positive']=""
# df['is_duplicated']=""
# df.to_csv('../Output/proofread_nuc_temp.csv', index=False)

In [5]:
df_progress = pd.read_csv("../Output/proofread_nuc_temp.csv", header=0)
prfrd1 = pd.read_table("../Output/2nd_proofreading.tsv", usecols = ['new link'])

In [7]:
rsplitted = prfrd1['new link'].dropna(how='all').str.rsplit('/', 1)
new_id = list()
for i in rsplitted.index:
    new_id.append(rsplitted.loc[i][1])

print(len(new_id))

11


In [8]:
mylist = []

for i in range(len(new_id)):
    state_id = int(new_id[i])
    state = client.state.get_state_json(state_id)

    # extract info from json state
    nuc_tags = parser.tag_dictionary(state,parser.annotation_layers(state)[0])
    anno_lists = parser.line_annotations(state,parser.annotation_layers(state)[0], tags=True)

    temp = pd.DataFrame({'anno_points_A': anno_lists[0],
                     'anno_points_B': anno_lists[1],
                     'anno_tags': anno_lists[2]})

    # convert [] to [0]
    for j in range(len(temp)):
        if (len(temp.iloc[j,2]) == 0) or (len(temp.iloc[j,2]) >= 2): # make dup to 0 for now....
            temp.iloc[j,2] = [0]
            # temp['anno_tags']

    temp['anno_tags'] = np.concatenate(temp['anno_tags']).astype(int)
    temp['anno_tags'] = temp['anno_tags'].replace(nuc_tags)

    mylist.append(temp)


In [13]:
df_new = pd.concat(mylist).reset_index()

In [14]:
df_new

Unnamed: 0,index,anno_points_A,anno_points_B,anno_tags
0,0,"[33440, 74896, 1797]","[33968, 74928, 1892]",0
1,1,"[33680, 75136, 1796]","[33744, 75200, 1787]",0
2,2,"[33168, 77808, 1953]","[32992, 78384, 1909]",neuron
3,3,"[32528, 78240, 2043]","[33360, 78384, 2028]",neuron
4,4,"[32608, 78896, 1959]","[33312, 79008, 1911]",neuron
...,...,...,...,...
1750,157,"[10576, 124464, 2012]","[10480, 124784, 1970]",neuron
1751,158,"[9952, 124480, 1973]","[10048, 124496, 2017]",neuron
1752,159,"[23072, 124496, 3967]","[22368, 124304, 4011]",neuron
1753,160,"[12208, 124512, 3593]","[13200, 124304, 3660]",0


In [15]:
print('neuron are {}'.format(sum(df_new['anno_tags']=='neuron')))
print('glia are {}'.format(sum(df_new['anno_tags']=='glia')))
print('false_positive are {}'.format(sum(df_new['anno_tags']=='false_positive')))
print('come_back_to_me_later are {}'.format(sum(df_new['anno_tags']==0)))
print('in total {}'.format(len(df_new)))

neuron are 1396
glia are 158
false_positive are 10
come_back_to_me_later are 191
in total 1755


In [16]:
df_new.drop("index", axis=1).to_csv('../Output/2nd_proofread_extracted.csv', index=False)

In [17]:
df_new = pd.read_csv("../Output/2nd_proofread_extracted.csv", header=0)

In [18]:
df_new

Unnamed: 0,anno_points_A,anno_points_B,anno_tags
0,"[33440, 74896, 1797]","[33968, 74928, 1892]",0
1,"[33680, 75136, 1796]","[33744, 75200, 1787]",0
2,"[33168, 77808, 1953]","[32992, 78384, 1909]",neuron
3,"[32528, 78240, 2043]","[33360, 78384, 2028]",neuron
4,"[32608, 78896, 1959]","[33312, 79008, 1911]",neuron
...,...,...,...
1750,"[10576, 124464, 2012]","[10480, 124784, 1970]",neuron
1751,"[9952, 124480, 1973]","[10048, 124496, 2017]",neuron
1752,"[23072, 124496, 3967]","[22368, 124304, 4011]",neuron
1753,"[12208, 124512, 3593]","[13200, 124304, 3660]",0


In [19]:
df_new2 = df_new[df_new['anno_tags']=='0']
print(len(df_new2))

191


In [20]:
xyz_df = df_new2.reindex(columns=['anno_points_A', 'anno_points_B']).reset_index()
nuc_xyz_df = df_new2['anno_points_A'].str.strip('[]').str.split(',',expand=True)
soma_xyz_df = df_new2['anno_points_B'].str.strip('[]').str.split(',',expand=True)
xyz_df['anno_points_A'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['anno_points_B'] = soma_xyz_df.astype(int).values.tolist()

In [52]:
lines = LineMapper(point_column_a='anno_points_A', point_column_b='anno_points_B')

In [53]:
# make Neuroglancer link
imgTokyo = ImageLayerConfig(name = 'FANCv4-jp',
                                    source = auth.get_cv_path('Image_Tokyo')['url'])
img = ImageLayerConfig(name = 'FANCv4',
                                    source = auth.get_cv_path('Image')['url'])
seg = SegmentationLayerConfig(name = 'seg_Mar2021_proofreading',
                                    source = auth.get_cv_path('FANC_production_segmentation')['url'])     
nuc_Aug = ImageLayerConfig(name = 'nuc',
                                    source = auth.get_cv_path('nuclei_map_Aug2021')['url'])      
nuc_seg_Aug = ImageLayerConfig(name = 'nuc_seg',
                                    source = auth.get_cv_path('nuclei_seg_Aug2021')['url'])        

ann = AnnotationLayerConfig(name='nuc_soma_Aug2021',
                            mapping_rules=lines,
                            tags=['neuron', 'glia','false_positive','soma_need_check'],
                            active = True)

In [54]:
view_options = {"layout": "xy"}

memory_options = {"gpuMemoryLimit": 4000000000,
                  "systemMemoryLimit": 8000000000,
                  "concurrentDownloads": 64,
                  "jsonStateServer": "https://global.daf-apis.com/nglstate/api/v1/post"}


sb = StateBuilder(layers=[imgTokyo, seg, img, nuc_Aug, nuc_seg_Aug, ann],
                  resolution=[4.3,4.3,45],
                  view_kws=view_options)

In [55]:
LINK=[]
k=500
minidfs = [xyz_df.loc[i:i+k-1, :] for i in range(0, len(xyz_df), k)]
for dftmp in minidfs:
    # csb = ChainedStateBuilder([sb, vs])                             
    state = json.loads(sb.render_state(dftmp, return_as='json'))
    state.update(memory_options)
    jsn_id = client.state.upload_state_json(state)
    output = client.state.build_neuroglancer_url(jsn_id, auth.get_cv_path('neuroglancer_base')['url'])
    # output =auth.get_cv_path('neuroglancer_base')['url'] + '?json_url={path}{state_id}'.format(path=auth.get_cv_path('json_server')['url'],state_id=client.state.upload_state_json(state))
    LINK.append(output)

In [56]:
# save into csv
LINK2 = pd.DataFrame(LINK)
LINK2.to_csv('../Output/links_20210927.csv', index=False, header=False)

## store progress

In [27]:
df_new = pd.read_csv("../Output/2nd_proofread_extracted.csv", header=0)
df_progress = pd.read_csv("../Output/proofread_nuc_temp.csv", header=0)

In [28]:
df_new2 = df_new[df_new.anno_tags != '0']

In [29]:
for i in range(len(df_new2)):
    nuc_loc_temp = df_new2['anno_points_A'].values[i].strip('[]')
    nuc_loc = '(' + nuc_loc_temp + ')'
    nuc_tag = df_new2['anno_tags'].values[i]
    if nuc_loc in df_progress['nuc_xyz'].values:
        idx = df_progress.index[df_progress['nuc_xyz'] == nuc_loc]
        if nuc_tag == 'neuron':
            df_progress.at[idx,'is_neuron'] = 'y'
        if nuc_tag == 'glia':
            df_progress.at[idx,'is_glia'] = 'y'
        if nuc_tag == 'false_positive':
            df_progress.at[idx,'is_false_positive'] = 'y'

In [32]:
df_come_back_to_me_later = df_progress[~(df_progress.is_neuron=='y') & ~(df_progress.is_glia=='y') & ~(df_progress.is_false_positive=='y')]

In [34]:
print('neuron are {}'.format(sum(df_progress.is_neuron=='y')))
print('glia are {}'.format(sum(df_progress.is_glia=='y')))
print('false_positive are {}'.format(sum(df_progress.is_false_positive=='y')))
print('come_back_to_me_later are {}'.format(len(df_come_back_to_me_later)))
print('in total {}'.format(len(df_progress)))

neuron are 11895
glia are 1168
false_positive are 399
come_back_to_me_later are 3614
in total 17076


In [35]:
df_progress.to_csv('../Output/proofread_nuc_temp.csv', index=False)

In [36]:
df_come_back_to_me_later

Unnamed: 0,nucID,center_xyz,nuc_xyz,nuc_svID,nuc_rootID,soma_xyz,soma_svID,soma_rootID,vol,voxel_size,bbx_min,bbx_max,is_neuron,is_glia,is_false_positive,is_duplicated
2,72694142819371754,"(40448, 196000, 1217)","(40352, 195888, 1211)",73401129131933600,0,"(40496, 195984, 1194)",73401129131901249,0,34440,15558,"(40224, 195760, 1197)","(40672, 196240, 1238)",,,,
3,72904562259788369,"(52608, 154120, 2647)","(52752, 154288, 2630)",73821967945732744,0,"(52704, 154224, 2623)",73821967945714519,0,35250,19660,"(52368, 153920, 2624)","(52848, 154320, 2671)",,,,
7,72341886915837985,"(19632, 169736, 1532)","(19712, 169744, 1516)",72696548471021476,0,"(19664, 169344, 1543)",72696548471058197,0,41366,23104,"(19424, 169440, 1511)","(19840, 170032, 1554)",,,,
11,72623705288605712,"(35088, 188960, 791)","(35312, 188784, 773)",73260185216824716,0,"(35312, 188832, 764)",73260185216827443,0,49856,15503,"(34832, 188656, 771)","(35344, 189264, 812)",,,,
12,72764443112506989,"(43800, 192072, 2160)","(43696, 192080, 2140)",73541729651164189,0,"(44208, 191936, 2188)",73541729717946344,0,50225,26091,"(43408, 191872, 2140)","(44192, 192272, 2181)",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17051,73115805797056584,"(61304, 159760, 2205)","(61072, 158832, 2171)",74103580092607665,0,"(62064, 160320, 2282)",74174017623471694,0,3779304,1042161,"(60048, 158672, 2117)","(62560, 160848, 2294)",,,,
17052,72975068442919006,"(55696, 161224, 2607)","(56352, 161152, 2547)",73962911525102336,0,"(56304, 160896, 2517)",73962911525064010,0,3841340,1275164,"(54512, 160064, 2518)","(56880, 162384, 2697)",,,,
17054,72270831312438020,"(12536, 127992, 2921)","(12240, 128512, 2816)",72413699776129279,0,"(12272, 128880, 2814)",72413699776139959,0,3923360,820464,"(11632, 126752, 2809)","(13440, 129232, 3033)",,,,
17068,72270831379546749,"(14104, 129624, 3146)","(13664, 130112, 3041)",72484137373838034,0,"(13184, 129856, 3040)",72484137373847327,0,4577001,1729075,"(12960, 128496, 3033)","(15248, 130752, 3260)",,,,


In [50]:
xyz_df = df_come_back_to_me_later.reindex(columns=['nuc_xyz', 'soma_xyz']).reset_index()
xyz_df = xyz_df.rename(columns={'nuc_xyz': 'anno_points_A', 'soma_xyz': 'anno_points_B'})
nuc_xyz_df = xyz_df['anno_points_A'].str.strip('()').str.split(',',expand=True)
soma_xyz_df = xyz_df['anno_points_B'].str.strip('()').str.split(',',expand=True)
xyz_df['anno_points_A'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['anno_points_B'] = soma_xyz_df.astype(int).values.tolist()

In [51]:
xyz_df

Unnamed: 0,index,anno_points_A,anno_points_B
0,2,"[40352, 195888, 1211]","[40496, 195984, 1194]"
1,3,"[52752, 154288, 2630]","[52704, 154224, 2623]"
2,7,"[19712, 169744, 1516]","[19664, 169344, 1543]"
3,11,"[35312, 188784, 773]","[35312, 188832, 764]"
4,12,"[43696, 192080, 2140]","[44208, 191936, 2188]"
...,...,...,...
3609,17051,"[61072, 158832, 2171]","[62064, 160320, 2282]"
3610,17052,"[56352, 161152, 2547]","[56304, 160896, 2517]"
3611,17054,"[12240, 128512, 2816]","[12272, 128880, 2814]"
3612,17068,"[13664, 130112, 3041]","[13184, 129856, 3040]"
