# Proofread_nuclei

In [1]:
# libraries 1
import numpy as np
import pyperclip
import pandas as pd
import pyperclip
from cloudvolume import CloudVolume, view, Bbox
from nglui import statebuilder,annotation,easyviewer,parser
from nglui.statebuilder import *
from nglui.nglite import *
import json

import sys
import os
from datetime import datetime
from caveclient import CAVEclient

sys.path.append(os.path.abspath("../segmentation"))
import authentication_utils as auth
import rootID_lookup as IDlook
sys.path.append(os.path.abspath("../synapses"))
import connectivity_utils

In [2]:
datastack_name = 'fanc_production_mar2021'
client = CAVEclient(datastack_name)

In [3]:
df = pd.read_csv('../Output/soma_info_Aug2021ver5.csv', header=0)

## Create links for proofreading nuclei/somas

Good example: https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/master/examples/statebuilder_examples.ipynb

In [None]:
# we only need xyz for nuclei/somas..., and make them into lines
xyz_df = df.reindex(columns=['nuc_xyz', 'soma_xyz'])
nuc_xyz_df = df['nuc_xyz'].str.strip('()').str.split(',',expand=True)
soma_xyz_df = df['soma_xyz'].str.strip('()').str.split(',',expand=True)
xyz_df['nuc_xyz'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['soma_xyz'] = soma_xyz_df.astype(int).values.tolist()

In [None]:
nuc_xyz_df = nuc_xyz_df.set_axis(['Col_x', 'Col_y', 'Col_z'], axis=1)
sorted_z = nuc_xyz_df.astype(int).sort_values(by=['Col_z'], ascending=True)
sorted_xz = sorted_z.astype(int).sort_values(by=['Col_x'], ascending=True)
sorted_indices = sorted_xz.astype(int).sort_values(by=['Col_y'], ascending=True).index
xyz_df = xyz_df.reindex(sorted_indices).reset_index(drop=True)

In [None]:
# ..., and make them into lines
lines = LineMapper(point_column_a='nuc_xyz', point_column_b='soma_xyz')

In [None]:
# make Neuroglancer link
imgTokyo = ImageLayerConfig(name = 'FANCv4-jp',
                                    source = auth.get_cv_path('Image_Tokyo')['url'])
img = ImageLayerConfig(name = 'FANCv4',
                                    source = auth.get_cv_path('Image')['url'])
seg = SegmentationLayerConfig(name = 'seg_Mar2021_proofreading',
                                    source = auth.get_cv_path('FANC_production_segmentation')['url'])     
nuc_Aug = ImageLayerConfig(name = 'nuc',
                                    source = auth.get_cv_path('nuclei_map_Aug2021')['url'])      
nuc_seg_Aug = ImageLayerConfig(name = 'nuc_seg',
                                    source = auth.get_cv_path('nuclei_seg_Aug2021')['url'])        

ann = AnnotationLayerConfig(name='nuc_soma_Aug2021',
                            mapping_rules=lines,
                            tags=['neuron', 'glia','false_positive','soma_need_check'],
                            active = True)

In [None]:
view_options = {"layout": "xy"}

memory_options = {"gpuMemoryLimit": 4000000000,
                  "systemMemoryLimit": 8000000000,
                  "concurrentDownloads": 64,
                  "jsonStateServer": "https://global.daf-apis.com/nglstate/api/v1/post"}


sb = StateBuilder(layers=[imgTokyo, seg, img, nuc_Aug, nuc_seg_Aug, ann],
                  resolution=[4.3,4.3,45],
                  view_kws=view_options)

In [None]:
LINK=[]
k=500
minidfs = [xyz_df.loc[i:i+k-1, :] for i in range(0, len(xyz_df), k)]
for dftmp in minidfs:
    # csb = ChainedStateBuilder([sb, vs])                             
    state = json.loads(sb.render_state(dftmp, return_as='json'))
    state.update(memory_options)
    jsn_id = client.state.upload_state_json(state)
    output = client.state.build_neuroglancer_url(jsn_id, auth.get_cv_path('neuroglancer_base')['url'])
    # output =auth.get_cv_path('neuroglancer_base')['url'] + '?json_url={path}{state_id}'.format(path=auth.get_cv_path('json_server')['url'],state_id=client.state.upload_state_json(state))
    LINK.append(output)

In [None]:
# save into csv
LINK2 = pd.DataFrame(LINK)
LINK2.to_csv('../Output/links_20210903.csv', index=False, header=False)

## Extract tags from proodread csv

functions from https://github.com/seung-lab/NeuroglancerAnnotationUI/blob/36f03cab5ccff8c52b0faba8beff7ab77398ef48/src/nglui/parser/base.py

In [None]:
# df['is_neuron']=""
# df['is_glia']=""
# df['is_false_positive']=""
# df.to_csv('../Output/proofread_nuc_temp.csv', index=False)

In [96]:
df_progress = pd.read_csv("../Output/proofread_nuc_temp.csv", header=0)
prfrd1 = pd.read_table("../Output/1st_proofreading.tsv", usecols = ['new link'])

In [97]:
rsplitted = prfrd1['new link'].dropna(how='all').str.rsplit('/', 1)
new_id = list()
for i in rsplitted.index:
    new_id.append(rsplitted.loc[i][1])

In [98]:
mylist = []

for i in range(len(new_id)):
    state_id = int(new_id[i])
    state = client.state.get_state_json(state_id)

    # extract info from json state
    nuc_tags = parser.tag_dictionary(state,parser.annotation_layers(state)[0])
    anno_lists = parser.line_annotations(state,parser.annotation_layers(state)[0], tags=True)

    temp = pd.DataFrame({'anno_points_A': anno_lists[0],
                     'anno_points_B': anno_lists[1],
                     'anno_tags': anno_lists[2]})

    # convert [] to [0]
    for j in range(len(temp)):
        if len(temp.iloc[j,2]) == 0:
            temp.iloc[j,2] = [0]
            # temp['anno_tags']

    temp['anno_tags'] = np.concatenate(temp['anno_tags']).astype(int)
    temp['anno_tags'] = temp['anno_tags'].replace(nuc_tags)

    mylist.append(temp)


In [99]:
df_new = pd.concat(mylist).reset_index()

In [100]:
df_new

Unnamed: 0,index,anno_points_A,anno_points_B,anno_tags
0,0,"[38272, 74224, 1954]","[39072, 73456, 1921]",false_positive
1,1,"[32752, 74304, 1739]","[33056, 73984, 1788]",false_positive
2,2,"[31296, 74672, 1492]","[31200, 74928, 1530]",false_positive
3,3,"[33440, 74896, 1797]","[33968, 74928, 1892]",0
4,4,"[33680, 75136, 1796]","[33744, 75200, 1787]",0
...,...,...,...,...
17071,71,"[42000, 218880, 279]","[42336, 219392, 312]",0
17072,72,"[58736, 219120, 1143]","[58288, 218736, 1101]",false_positive
17073,73,"[46160, 219424, 236]","[46784, 219520, 189]",glia
17074,74,"[53904, 220144, 1319]","[54576, 220320, 1361]",0


In [107]:
print('neuron are {}'.format(sum(df_new['anno_tags']=='neuron')))
print('glia are {}'.format(sum(df_new['anno_tags']=='glia')))
print('false_positive are {}'.format(sum(df_new['anno_tags']=='false_positive')))
print('come_back_to_me_later are {}'.format(sum(df_new['anno_tags']==0)))

neuron are 10499
glia are 1010
false_positive are 389
come_back_to_me_later are 5178


In [111]:
df_new.drop("index", axis=1).to_csv('../Output/1st_proofread_extracted.csv', index=False)

In [112]:
df_new = pd.read_csv("../Output/1st_proofread_extracted.csv", header=0)

In [113]:
df_new

Unnamed: 0,anno_points_A,anno_points_B,anno_tags
0,"[38272, 74224, 1954]","[39072, 73456, 1921]",false_positive
1,"[32752, 74304, 1739]","[33056, 73984, 1788]",false_positive
2,"[31296, 74672, 1492]","[31200, 74928, 1530]",false_positive
3,"[33440, 74896, 1797]","[33968, 74928, 1892]",0
4,"[33680, 75136, 1796]","[33744, 75200, 1787]",0
...,...,...,...
17071,"[42000, 218880, 279]","[42336, 219392, 312]",0
17072,"[58736, 219120, 1143]","[58288, 218736, 1101]",false_positive
17073,"[46160, 219424, 236]","[46784, 219520, 189]",glia
17074,"[53904, 220144, 1319]","[54576, 220320, 1361]",0


In [121]:
df_new2 = df_new[df_new['anno_tags']=='0']

In [123]:
xyz_df = df_new2.reindex(columns=['anno_points_A', 'anno_points_B'])
nuc_xyz_df = df_new2['anno_points_A'].str.strip('[]').str.split(',',expand=True)
soma_xyz_df = df_new2['anno_points_B'].str.strip('[]').str.split(',',expand=True)
xyz_df['anno_points_A'] = nuc_xyz_df.astype(int).values.tolist()
xyz_df['anno_points_B'] = soma_xyz_df.astype(int).values.tolist()

In [126]:
lines = LineMapper(point_column_a='anno_points_A', point_column_b='anno_points_B')

In [128]:
# make Neuroglancer link
imgTokyo = ImageLayerConfig(name = 'FANCv4-jp',
                                    source = auth.get_cv_path('Image_Tokyo')['url'])
img = ImageLayerConfig(name = 'FANCv4',
                                    source = auth.get_cv_path('Image')['url'])
seg = SegmentationLayerConfig(name = 'seg_Mar2021_proofreading',
                                    source = auth.get_cv_path('FANC_production_segmentation')['url'])     
nuc_Aug = ImageLayerConfig(name = 'nuc',
                                    source = auth.get_cv_path('nuclei_map_Aug2021')['url'])      
nuc_seg_Aug = ImageLayerConfig(name = 'nuc_seg',
                                    source = auth.get_cv_path('nuclei_seg_Aug2021')['url'])        

ann = AnnotationLayerConfig(name='nuc_soma_Aug2021',
                            mapping_rules=lines,
                            tags=['neuron', 'glia','false_positive','soma_need_check'],
                            active = True)

In [129]:
view_options = {"layout": "xy"}

memory_options = {"gpuMemoryLimit": 4000000000,
                  "systemMemoryLimit": 8000000000,
                  "concurrentDownloads": 64,
                  "jsonStateServer": "https://global.daf-apis.com/nglstate/api/v1/post"}


sb = StateBuilder(layers=[imgTokyo, seg, img, nuc_Aug, nuc_seg_Aug, ann],
                  resolution=[4.3,4.3,45],
                  view_kws=view_options)

In [130]:
LINK=[]
k=500
minidfs = [xyz_df.loc[i:i+k-1, :] for i in range(0, len(xyz_df), k)]
for dftmp in minidfs:
    # csb = ChainedStateBuilder([sb, vs])                             
    state = json.loads(sb.render_state(dftmp, return_as='json'))
    state.update(memory_options)
    jsn_id = client.state.upload_state_json(state)
    output = client.state.build_neuroglancer_url(jsn_id, auth.get_cv_path('neuroglancer_base')['url'])
    # output =auth.get_cv_path('neuroglancer_base')['url'] + '?json_url={path}{state_id}'.format(path=auth.get_cv_path('json_server')['url'],state_id=client.state.upload_state_json(state))
    LINK.append(output)

In [131]:
# save into csv
LINK2 = pd.DataFrame(LINK)
LINK2.to_csv('../Output/links_20210924.csv', index=False, header=False)

In [None]:
# 

In [4]:
df_new = pd.read_csv("../Output/1st_proofread_extracted.csv", header=0)
df_progress = pd.read_csv("../Output/proofread_nuc_temp.csv", header=0)

In [13]:
df_new2 = df_new[df_new.anno_tags != '0']

In [14]:
df_progress

Unnamed: 0,nucID,center_xyz,nuc_xyz,nuc_svID,nuc_rootID,soma_xyz,soma_svID,soma_rootID,vol,voxel_size,bbx_min,bbx_max,is_neuron,is_glia,is_false_positive
0,73115668492321345,"(64344, 155528, 2733)","(64384, 155376, 2719)",74244180477771882,0,"(64544, 155488, 2710)",74244180477775191,0,32625,13640,"(64144, 155296, 2711)","(64544, 155760, 2756)",,,
1,72341681025843555,"(17024, 157440, 2665)","(16960, 157296, 2652)",72625768014154957,0,"(17024, 157440, 2716)",72625768081127404,0,33072,33028,"(16832, 157232, 2639)","(17216, 157648, 2692)",,,
2,72694142819371754,"(40448, 196000, 1217)","(40352, 195888, 1211)",73401129131933600,0,"(40496, 195984, 1194)",73401129131901249,0,34440,15558,"(40224, 195760, 1197)","(40672, 196240, 1238)",,,
3,72904562259788369,"(52608, 154120, 2647)","(52752, 154288, 2630)",73821967945732744,0,"(52704, 154224, 2623)",73821967945714519,0,35250,19660,"(52368, 153920, 2624)","(52848, 154320, 2671)",,,
4,72481250081768943,"(27208, 86824, 2002)","(27376, 86896, 2005)",72975274937283119,0,"(26976, 86432, 1999)",72975274937264610,0,35875,13179,"(27008, 86544, 1982)","(27408, 87104, 2023)",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17071,73114912242532398,"(61208, 110496, 1436)","(60624, 109584, 1417)",74101930422595776,0,"(59904, 111184, 1355)",74101999142062002,0,5548608,1285649,"(59856, 108960, 1351)","(62560, 112032, 1522)",,,
17072,72903943314735186,"(49280, 117624, 915)","(48496, 116896, 969)",73679992634339249,0,"(50816, 118176, 958)",73750361378481409,0,5691350,1745882,"(47664, 116336, 828)","(50896, 118912, 1003)",,,
17073,72270762727178308,"(14352, 124520, 3228)","(14112, 123456, 3312)",72483931349688631,0,"(14320, 125184, 3091)",72484000001882134,0,5712660,1641011,"(13216, 123328, 3093)","(15488, 125712, 3363)",,,
17074,72200462366935570,"(9072, 128072, 2373)","(8176, 127328, 2290)",72272962019227514,0,"(9664, 128624, 2502)",72343330897623729,0,5809152,1831149,"(8080, 126608, 2245)","(10064, 129536, 2501)",,,


In [None]:
for i in range(len(df_new2)):
    nuc_loc_temp = df_new2['anno_points_A'].values[i].strip('[]')
    nuc_loc = '(' + nuc_loc_temp + ')'
    nuc_tag = df_new2['anno_tags'].values[i]
    if nuc_loc in df_progress['nuc_xyz'].values:
        matched = df_progress[df_progress['nuc_xyz'] == nuc_loc].index

        matched2 = matched.dropna(subset=column_name[3:8],how='all') # remove rows I haven't proofread yet
        matched3 = pd.DataFrame(matched2)
        if len(matched3) > 0:
            last_matched = matched3.iloc[-1,:]
            notes = matched2[column_name[7]].str.cat(sep=', ')
            out.loc[i,column_name[3:8]] = last_matched.loc[column_name[3:8]]
            out.at[out.index[i],column_name[2]] = str(last_matched.saving_pt).strip("''")
            out.at[out.index[i],column_name[7]] = notes