# Proofread_nuclei

In [1]:
# libraries 1
import numpy as np
import pyperclip
import pandas as pd
import pyperclip
from cloudvolume import CloudVolume, view, Bbox
from nglui import statebuilder,annotation,easyviewer,parser
from nglui.statebuilder import *
from nglui.nglite import *
import json

import sys
import os
from datetime import datetime
from caveclient import CAVEclient

sys.path.append(os.path.abspath("../segmentation"))
import authentication_utils as auth
import rootID_lookup as IDlook
sys.path.append(os.path.abspath("../synapses"))
import connectivity_utils

In [2]:
datastack_name = 'fanc_production_mar2021'
client = CAVEclient(datastack_name)

In [3]:
df = pd.read_csv('../Output/soma_info_Aug2021ver5.csv', header=0)

## Create links for proofreading nuclei/somas

Good example: https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/master/examples/statebuilder_examples.ipynb

In [None]:
# we only need xyz for nuclei/somas..., and make them into lines
xyz_df = df.reindex(columns=['nuc_xyz', 'soma_xyz'])
nuc_xyz_df = df['nuc_xyz'].str.strip('()').str.split(',',expand=True)
soma_xyz_df = df['soma_xyz'].str.strip('()').str.split(',',expand=True)
xyz_df['nuc_xyz'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['soma_xyz'] = soma_xyz_df.astype(int).values.tolist()

In [None]:
nuc_xyz_df = nuc_xyz_df.set_axis(['Col_x', 'Col_y', 'Col_z'], axis=1)
sorted_z = nuc_xyz_df.astype(int).sort_values(by=['Col_z'], ascending=True)
sorted_xz = sorted_z.astype(int).sort_values(by=['Col_x'], ascending=True)
sorted_indices = sorted_xz.astype(int).sort_values(by=['Col_y'], ascending=True).index
xyz_df = xyz_df.reindex(sorted_indices).reset_index(drop=True)

In [None]:
# ..., and make them into lines
lines = LineMapper(point_column_a='nuc_xyz', point_column_b='soma_xyz')

In [None]:
# make Neuroglancer link
imgTokyo = ImageLayerConfig(name = 'FANCv4-jp',
                                    source = auth.get_cv_path('Image_Tokyo')['url'])
img = ImageLayerConfig(name = 'FANCv4',
                                    source = auth.get_cv_path('Image')['url'])
seg = SegmentationLayerConfig(name = 'seg_Mar2021_proofreading',
                                    source = auth.get_cv_path('FANC_production_segmentation')['url'])     
nuc_Aug = ImageLayerConfig(name = 'nuc',
                                    source = auth.get_cv_path('nuclei_map_Aug2021')['url'])      
nuc_seg_Aug = ImageLayerConfig(name = 'nuc_seg',
                                    source = auth.get_cv_path('nuclei_seg_Aug2021')['url'])        

ann = AnnotationLayerConfig(name='nuc_soma_Aug2021',
                            mapping_rules=lines,
                            tags=['neuron', 'glia','false_positive','soma_need_check'],
                            active = True)

In [None]:
view_options = {"layout": "xy"}

memory_options = {"gpuMemoryLimit": 4000000000,
                  "systemMemoryLimit": 8000000000,
                  "concurrentDownloads": 64,
                  "jsonStateServer": "https://global.daf-apis.com/nglstate/api/v1/post"}


sb = StateBuilder(layers=[imgTokyo, seg, img, nuc_Aug, nuc_seg_Aug, ann],
                  resolution=[4.3,4.3,45],
                  view_kws=view_options)

In [None]:
LINK=[]
k=500
minidfs = [xyz_df.loc[i:i+k-1, :] for i in range(0, len(xyz_df), k)]
for dftmp in minidfs:
    # csb = ChainedStateBuilder([sb, vs])                             
    state = json.loads(sb.render_state(dftmp, return_as='json'))
    state.update(memory_options)
    jsn_id = client.state.upload_state_json(state)
    output = client.state.build_neuroglancer_url(jsn_id, auth.get_cv_path('neuroglancer_base')['url'])
    # output =auth.get_cv_path('neuroglancer_base')['url'] + '?json_url={path}{state_id}'.format(path=auth.get_cv_path('json_server')['url'],state_id=client.state.upload_state_json(state))
    LINK.append(output)

In [None]:
# save into csv
LINK2 = pd.DataFrame(LINK)
LINK2.to_csv('../Output/links_20210903.csv', index=False, header=False)

## Extract tags from proodread csv

functions from https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/36f03cab5ccff8c52b0faba8beff7ab77398ef48/src/nglui/parser/base.py

In [None]:
# df['is_neuron']=""
# df['is_glia']=""
# df['is_false_positive']=""
# df['is_duplicated']=""
# df.to_csv('../Output/proofread_nuc_temp.csv', index=False)

In [4]:
df_progress = pd.read_csv("../Output/proofread_nuc_temp.csv", header=0)
prfrd1 = pd.read_table("../Output/4th_proofreading.tsv", usecols = ['new link'])

In [5]:
rsplitted = prfrd1['new link'].dropna(how='all').str.rsplit('/', 1)
new_id = list()
for i in rsplitted.index:
    new_id.append(rsplitted.loc[i][1])

print(len(new_id))

2


In [6]:
mylist = []

for i in range(len(new_id)):
    state_id = int(new_id[i])
    state = client.state.get_state_json(state_id)

    # extract info from json state
    nuc_tags = parser.tag_dictionary(state,parser.annotation_layers(state)[0])
    anno_lists = parser.line_annotations(state,parser.annotation_layers(state)[0], tags=True)

    temp = pd.DataFrame({'anno_points_A': anno_lists[0],
                     'anno_points_B': anno_lists[1],
                     'anno_tags': anno_lists[2]})

    # convert [] to [0]
    for j in range(len(temp)):
        if (len(temp.iloc[j,2]) == 0) or (len(temp.iloc[j,2]) >= 2): # make dup to 0 for now....
            temp.iloc[j,2] = [0]
            # temp['anno_tags']

    temp['anno_tags'] = np.concatenate(temp['anno_tags']).astype(int)
    temp['anno_tags'] = temp['anno_tags'].replace(nuc_tags)

    mylist.append(temp)


In [7]:
df_new = pd.concat(mylist).reset_index()

In [8]:
df_new

Unnamed: 0,index,anno_points_A,anno_points_B,anno_tags
0,0,"[52752, 154288, 2630]","[52704, 154224, 2623]",glia
1,1,"[55632, 123584, 3593]","[54928, 123264, 3626]",glia
2,2,"[54128, 182192, 116]","[54368, 182224, 170]",0
3,3,"[32688, 195248, 887]","[33152, 195056, 862]",0
4,4,"[70960, 196496, 1480]","[70800, 197168, 1516]",neuron
...,...,...,...,...
553,53,"[36672, 195680, 847]","[37488, 194224, 833]",neuron
554,54,"[33040, 181632, 521]","[33120, 182176, 644]",neuron
555,55,"[35168, 190640, 741]","[34400, 190576, 856]",neuron
556,56,"[26448, 146496, 679]","[27008, 145600, 618]",0


In [9]:
print('neuron are {}'.format(sum(df_new['anno_tags']=='neuron')))
print('glia are {}'.format(sum(df_new['anno_tags']=='glia')))
print('false_positive are {}'.format(sum(df_new['anno_tags']=='false_positive')))
print('come_back_to_me_later are {}'.format(sum(df_new['anno_tags']==0)))
print('in total {}'.format(len(df_new)))

neuron are 374
glia are 158
false_positive are 1
come_back_to_me_later are 25
in total 558


In [10]:
df_new.drop("index", axis=1).to_csv('../Output/4th_proofread_extracted.csv', index=False)

In [11]:
df_new = pd.read_csv("../Output/4th_proofread_extracted.csv", header=0)

In [12]:
df_new

Unnamed: 0,anno_points_A,anno_points_B,anno_tags
0,"[52752, 154288, 2630]","[52704, 154224, 2623]",glia
1,"[55632, 123584, 3593]","[54928, 123264, 3626]",glia
2,"[54128, 182192, 116]","[54368, 182224, 170]",0
3,"[32688, 195248, 887]","[33152, 195056, 862]",0
4,"[70960, 196496, 1480]","[70800, 197168, 1516]",neuron
...,...,...,...
553,"[36672, 195680, 847]","[37488, 194224, 833]",neuron
554,"[33040, 181632, 521]","[33120, 182176, 644]",neuron
555,"[35168, 190640, 741]","[34400, 190576, 856]",neuron
556,"[26448, 146496, 679]","[27008, 145600, 618]",0


In [None]:
df_new2 = df_new[df_new['anno_tags']=='0']
print(len(df_new2))

In [None]:
xyz_df = df_new2.reindex(columns=['anno_points_A', 'anno_points_B']).reset_index()
nuc_xyz_df = df_new2['anno_points_A'].str.strip('[]').str.split(',',expand=True)
soma_xyz_df = df_new2['anno_points_B'].str.strip('[]').str.split(',',expand=True)
xyz_df['anno_points_A'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['anno_points_B'] = soma_xyz_df.astype(int).values.tolist()

In [22]:
lines = LineMapper(point_column_a='anno_points_A', point_column_b='anno_points_B')

In [23]:
# make Neuroglancer link
imgTokyo = ImageLayerConfig(name = 'FANCv4-jp',
                                    source = auth.get_cv_path('Image_Tokyo')['url'])
img = ImageLayerConfig(name = 'FANCv4',
                                    source = auth.get_cv_path('Image')['url'])
seg = SegmentationLayerConfig(name = 'seg_Mar2021_proofreading',
                                    source = auth.get_cv_path('FANC_production_segmentation')['url'])     
nuc_Aug = ImageLayerConfig(name = 'nuc',
                                    source = auth.get_cv_path('nuclei_map_Aug2021')['url'])      
nuc_seg_Aug = ImageLayerConfig(name = 'nuc_seg',
                                    source = auth.get_cv_path('nuclei_seg_Aug2021')['url'])        

ann = AnnotationLayerConfig(name='nuc_soma_Aug2021',
                            mapping_rules=lines,
                            tags=['neuron', 'glia','false_positive','soma_need_check'],
                            active = True)

In [24]:
view_options = {"layout": "xy"}

memory_options = {"gpuMemoryLimit": 4000000000,
                  "systemMemoryLimit": 8000000000,
                  "concurrentDownloads": 64,
                  "jsonStateServer": "https://global.daf-apis.com/nglstate/api/v1/post"}


sb = StateBuilder(layers=[imgTokyo, seg, img, nuc_Aug, nuc_seg_Aug, ann],
                  resolution=[4.3,4.3,45],
                  view_kws=view_options)

In [25]:
LINK=[]
k=500
minidfs = [xyz_df.loc[i:i+k-1, :] for i in range(0, len(xyz_df), k)]
for dftmp in minidfs:
    # csb = ChainedStateBuilder([sb, vs])                             
    state = json.loads(sb.render_state(dftmp, return_as='json'))
    state.update(memory_options)
    jsn_id = client.state.upload_state_json(state)
    output = client.state.build_neuroglancer_url(jsn_id, auth.get_cv_path('neuroglancer_base')['url'])
    # output =auth.get_cv_path('neuroglancer_base')['url'] + '?json_url={path}{state_id}'.format(path=auth.get_cv_path('json_server')['url'],state_id=client.state.upload_state_json(state))
    LINK.append(output)

In [26]:
# save into csv
LINK2 = pd.DataFrame(LINK)
LINK2.to_csv('../Output/links_20210928-2.csv', index=False, header=False)

## store progress

In [13]:
df_new = pd.read_csv("../Output/4th_proofread_extracted.csv", header=0)
df_progress = pd.read_csv("../Output/proofread_nuc_temp.csv", header=0)

In [14]:
df_new2 = df_new[df_new.anno_tags != '0']

In [15]:
for i in range(len(df_new2)):
    nuc_loc_temp = df_new2['anno_points_A'].values[i].strip('[]')
    nuc_loc = '(' + nuc_loc_temp + ')'
    nuc_tag = df_new2['anno_tags'].values[i]
    if nuc_loc in df_progress['nuc_xyz'].values:
        idx = df_progress.index[df_progress['nuc_xyz'] == nuc_loc]
        if nuc_tag == 'neuron':
            df_progress.at[idx,'is_neuron'] = 'y'
        if nuc_tag == 'glia':
            df_progress.at[idx,'is_glia'] = 'y'
        if nuc_tag == 'false_positive':
            df_progress.at[idx,'is_false_positive'] = 'y'

In [16]:
df_come_back_to_me_later = df_progress[~(df_progress.is_neuron=='y') & ~(df_progress.is_glia=='y') & ~(df_progress.is_false_positive=='y')]

In [17]:
print('neuron are {}'.format(sum(df_progress.is_neuron=='y')))
print('glia are {}'.format(sum(df_progress.is_glia=='y')))
print('false_positive are {}'.format(sum(df_progress.is_false_positive=='y')))
print('come_back_to_me_later are {}'.format(len(df_come_back_to_me_later)))
print('in total {}'.format(len(df_progress)))

neuron are 14668
glia are 1973
false_positive are 410
come_back_to_me_later are 25
in total 17076


In [18]:
df_progress.to_csv('../Output/proofread_nuc_temp.csv', index=False)

In [19]:
df_come_back_to_me_later

Unnamed: 0,nucID,center_xyz,nuc_xyz,nuc_svID,nuc_rootID,soma_xyz,soma_svID,soma_rootID,vol,voxel_size,bbx_min,bbx_max,is_neuron,is_glia,is_false_positive,is_duplicated
90,72975411369214760,"(54120, 182064, 142)","(54128, 182192, 116)",73893228700791395,0,"(54368, 182224, 170)",73893228767892060,0,102600,47263,"(53760, 181744, 114)","(54480, 182384, 171)",,,,
137,72623774008084111,"(33016, 195376, 890)","(32688, 195248, 887)",73119653954017647,0,"(33152, 195056, 862)",73190022698136274,0,118932,73674,"(32608, 195024, 864)","(33424, 195728, 917)",,,,
144,72623292837527563,"(36328, 166912, 259)","(36320, 167008, 247)",73259429033866071,0,"(36784, 166784, 244)",73259429033905439,0,121440,64264,"(35888, 166528, 236)","(36768, 167296, 282)",,,,
201,72623980434948725,"(33000, 208648, 1973)","(32624, 208784, 1955)",73120066874688851,0,"(33568, 208448, 1991)",73190435618846756,0,138159,65393,"(32496, 208240, 1952)","(33504, 209056, 1995)",,,,
212,72905317771379254,"(52520, 196584, 1267)","(52720, 196368, 1242)",73823341597072057,0,"(52176, 196480, 1235)",73823341597072086,0,141423,91551,"(52112, 196208, 1238)","(52928, 196960, 1297)",,,,
232,72763756320391890,"(42296, 149920, 3705)","(42208, 149728, 3717)",73469987322872635,0,"(42192, 149696, 3668)",73469987322805086,0,145962,97781,"(41888, 149488, 3679)","(42704, 150352, 3732)",,,,
237,72975480222908464,"(55680, 185224, 507)","(55808, 185104, 485)",73963735085318806,0,"(55840, 185152, 473)",73963735085284958,0,146880,68181,"(55296, 184864, 473)","(56064, 185584, 541)",,,,
280,72553473983381894,"(29800, 198616, 815)","(29840, 198672, 805)",73049422648627904,0,"(29488, 198768, 843)",73049422648713131,0,157685,86562,"(29360, 198240, 785)","(30240, 198992, 846)",,,,
281,72623567648326621,"(35104, 183096, 217)","(34944, 182896, 196)",73259978789974551,0,"(34880, 183232, 181)",73259978789904052,0,157920,66186,"(34720, 182720, 182)","(35488, 183472, 252)",,,,
326,72763756320392242,"(42848, 149608, 3765)","(42848, 149616, 3735)",73469987389718096,0,"(43248, 149776, 3793)",73540356134060955,0,167994,88936,"(42416, 149200, 3735)","(43280, 150016, 3796)",,,,


In [20]:
xyz_df = df_come_back_to_me_later.reindex(columns=['nuc_xyz', 'soma_xyz']).reset_index()
xyz_df = xyz_df.rename(columns={'nuc_xyz': 'anno_points_A', 'soma_xyz': 'anno_points_B'})
nuc_xyz_df = xyz_df['anno_points_A'].str.strip('()').str.split(',',expand=True)
soma_xyz_df = xyz_df['anno_points_B'].str.strip('()').str.split(',',expand=True)
xyz_df['anno_points_A'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['anno_points_B'] = soma_xyz_df.astype(int).values.tolist()

In [21]:
xyz_df

Unnamed: 0,index,anno_points_A,anno_points_B
0,90,"[54128, 182192, 116]","[54368, 182224, 170]"
1,137,"[32688, 195248, 887]","[33152, 195056, 862]"
2,144,"[36320, 167008, 247]","[36784, 166784, 244]"
3,201,"[32624, 208784, 1955]","[33568, 208448, 1991]"
4,212,"[52720, 196368, 1242]","[52176, 196480, 1235]"
5,232,"[42208, 149728, 3717]","[42192, 149696, 3668]"
6,237,"[55808, 185104, 485]","[55840, 185152, 473]"
7,280,"[29840, 198672, 805]","[29488, 198768, 843]"
8,281,"[34944, 182896, 196]","[34880, 183232, 181]"
9,326,"[42848, 149616, 3735]","[43248, 149776, 3793]"
