In [1]:
!pip install caveclient



In [2]:
from caveclient import CAVEclient
import numpy as np
import pandas as pd

In [3]:
annoDf = pd.read_csv('../data/dbcells-dump.csv')

In [4]:
annoDf.head()

Unnamed: 0,dbcellid,x,y,z,goog14r0seg1_baseid,google_aggloid,dbcelltype,dbsubcelltype,dbsubcelltype2,dbsubcelltype3,nrvoxels,circulardepth
0,49363,0,0,0,0,0,UNKNOWN0,0,0,0,0,780
1,49364,0,0,0,0,0,UNKNOWN0,0,0,0,0,780
2,49365,0,0,0,0,0,UNKNOWN0,0,0,0,0,780
3,49366,0,0,0,0,0,UNKNOWN0,0,0,0,0,780
4,49367,0,0,0,0,0,UNKNOWN0,0,0,0,0,780


In [5]:
annoDf.shape

(49377, 12)

In [6]:
columns = ["type", "dbcellid", "x", "y", "z", "xmin", "ymin", "zmin", "xmax", "ymax", "zmax", "volume", "dbcelltype_code"]
# 1: 1 for neuron/glia cell body, 2 for blood vessel nucleus
# 2: ID in the VAST segmentation file (for neurons/glia this should be
# consistent with the 'dbid' value in other tables)
# 3-5: X,Y,Z anchor point within cell body or nucleus, in pixels at 8x8x33 nm
# 6-11: Xmin,Ymin,Zmin,Xmax,Ymax,Zmax of bounding box of cell body or
# nucleus, in pixels at 8x8x33 nm
# 12: volume estimate of cell body or nucleus in cubic micrometers, at
# _very low resolution_ (mip6 every 128th section for neurons/glia, mip7
# every 16th section for bv nuclei, manually painted)
# 13: cell type code (manually classified):
cbDf = pd.read_csv('../data/h01_cell_body_nucleus_matrix_may16_2023.csv', names=columns)

In [7]:
cbDf.head()

Unnamed: 0,type,dbcellid,x,y,z,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
0,1,1,259891,165404,1408,258240,163392,896,262335,167167,1551,8825.15116,1
1,1,2,261308,171181,1024,259968,169856,512,262463,172543,1295,5967.219524,1
2,1,3,256788,171573,1408,255552,170688,1152,258367,173119,1679,3984.051929,1
3,1,4,254675,164220,1280,253376,163072,1152,255679,165375,1679,3402.721395,1
4,1,5,258308,160604,1536,257472,159744,1408,259327,161791,1807,2142.618255,1


In [8]:
cbDf.tail()

Unnamed: 0,type,dbcellid,x,y,z,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
57307,2,8209,154579,154504,4224,154240,154048,4128,154879,154943,4335,80.832627,11
57308,2,8210,277041,149728,992,276800,148352,864,277759,149823,1007,90.798293,15
57309,2,8211,276946,148654,992,276864,147840,704,277631,148799,1007,81.939923,15
57310,2,8212,273233,181627,5280,273152,181248,5248,273727,182463,5295,40.969961,12
57311,2,8213,448208,165140,1600,448000,164928,1600,448639,165631,1839,84.154515,11


In [9]:
cbDf.shape

(57312, 13)

In [10]:
cbDf[cbDf.type==1].shape

(49108, 13)

In [11]:
cbDf[cbDf.type==2].shape

(8204, 13)

## Check data join

In [15]:
df = pd.merge(annoDf, cbDf, on=['dbcellid', 'x', 'y', 'z'], how="inner")
df

Unnamed: 0,dbcellid,x,y,z,goog14r0seg1_baseid,google_aggloid,dbcelltype,dbsubcelltype,dbsubcelltype2,dbsubcelltype3,...,circulardepth,type,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
0,36167,334046,102775,0,0,0,PYRAMIDAL,0,0,0,...,918,1,332800,101568,0,335167,104127,15,1118.369219,1
1,4702,173580,201477,0,5579334176,5579334176,UNCLASSIFIEDNEURON,0,0,0,...,2350,1,172352,200192,0,175103,203199,271,3388.326543,10
2,7469,248108,170760,0,4577384303,4577384303,UNKNOWN2,0,0,0,...,1789,1,246656,169856,0,249407,171967,15,1125.012996,10
3,36165,332066,99901,0,1872623538,2221907190,UNCLASSIFIEDNEURON,16,0,0,...,914,1,331328,99136,0,332863,101311,143,1151.588106,3
4,43352,320388,56535,0,329473037,518394751,PYRAMIDAL,0,0,0,...,721,1,319744,54976,0,321407,57535,143,1141.622440,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49066,17501,235985,191385,1921,22977375653,40591466610,ASTROCYTE,0,0,0,...,1975,1,235264,190592,1792,236671,192511,2063,1081.828442,4
49067,48840,205304,143482,5249,65497765963,91991908616,ASTROCYTE,0,0,0,...,1821,1,204800,142656,5120,205951,144319,5263,524.858425,4
49068,38326,319163,249034,131,6035046399,7054269922,UNCLASSIFIEDNEURON,16,0,0,...,1644,1,317632,248128,128,320959,250111,399,2183.588217,10
49069,21002,267462,136502,2691,38772897807,47623659810,MG_OPC,0,0,0,...,1483,1,267136,135872,2688,267711,137215,2831,195.991437,6


In [56]:
df = pd.merge(annoDf, cbDf[cbDf.type==2], on=['dbcellid', 'x', 'y', 'z'], how="inner")
df

Unnamed: 0,dbcellid,x,y,z,goog14r0seg1_baseid,google_aggloid,dbcelltype,dbsubcelltype,dbsubcelltype2,dbsubcelltype3,...,circulardepth,type,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code


In [16]:
excluded_df = annoDf[-annoDf['dbcellid'].isin(df['dbcellid'])]

In [19]:
excluded_df.shape

(306, 12)

In [17]:
excluded_df.iloc[100:120]

Unnamed: 0,dbcellid,x,y,z,goog14r0seg1_baseid,google_aggloid,dbcelltype,dbsubcelltype,dbsubcelltype2,dbsubcelltype3,nrvoxels,circulardepth
16849,48982,77163,250952,3840,77954476538,77954491334,BLOODVESSELCELL,0,0,0,32,2880
16850,49038,92786,252781,3840,78013231072,78013231072,BLOODVESSELCELL,0,0,0,32,2919
17020,47347,161402,214437,3840,76734504308,76734504308,BLOODVESSELCELL,0,0,0,40,2488
17483,49052,355447,74492,3840,72053556287,72053556287,BLOODVESSELCELL,0,0,0,73,638
18022,49167,374161,165047,4096,75096667522,75096667522,BLOODVESSELCELL,0,0,0,19,961
18033,46777,95492,254992,4096,51504638581,69264446136,BLOODVESSELCELL,0,0,0,22,2939
18139,49040,250657,260533,4096,78193204226,78251447758,BLOODVESSELCELL,0,0,0,34,2189
18377,47373,96756,255000,4096,78086134272,78086149343,BLOODVESSELCELL,0,0,0,45,2939
18379,49115,104369,194954,4096,76048377238,76048377238,BLOODVESSELCELL,0,0,0,45,2459
18401,47822,94231,253829,4096,78013309131,78013309131,BLOODVESSELCELL,0,0,0,46,2929


In [20]:
excluded_df.dbcelltype.unique()

array(['UNKNOWN0', 'BLOODVESSELCELL', 'PYRAMIDAL', 'MG_OPC', 'OLIGO',
       'ASTROCYTE'], dtype=object)

In [18]:
df = pd.merge(annoDf, cbDf, on=['dbcellid', 'x', 'y', 'z'], how="left")
df

Unnamed: 0,dbcellid,x,y,z,goog14r0seg1_baseid,google_aggloid,dbcelltype,dbsubcelltype,dbsubcelltype2,dbsubcelltype3,...,circulardepth,type,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
0,49363,0,0,0,0,0,UNKNOWN0,0,0,0,...,780,,,,,,,,,
1,49364,0,0,0,0,0,UNKNOWN0,0,0,0,...,780,,,,,,,,,
2,49365,0,0,0,0,0,UNKNOWN0,0,0,0,...,780,,,,,,,,,
3,49366,0,0,0,0,0,UNKNOWN0,0,0,0,...,780,,,,,,,,,
4,49367,0,0,0,0,0,UNKNOWN0,0,0,0,...,780,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49372,17501,235985,191385,1921,22977375653,40591466610,ASTROCYTE,0,0,0,...,1975,1.0,235264.0,190592.0,1792.0,236671.0,192511.0,2063.0,1081.828442,4.0
49373,48840,205304,143482,5249,65497765963,91991908616,ASTROCYTE,0,0,0,...,1821,1.0,204800.0,142656.0,5120.0,205951.0,144319.0,5263.0,524.858425,4.0
49374,38326,319163,249034,131,6035046399,7054269922,UNCLASSIFIEDNEURON,16,0,0,...,1644,1.0,317632.0,248128.0,128.0,320959.0,250111.0,399.0,2183.588217,10.0
49375,21002,267462,136502,2691,38772897807,47623659810,MG_OPC,0,0,0,...,1483,1.0,267136.0,135872.0,2688.0,267711.0,137215.0,2831.0,195.991437,6.0


In [None]:
# df.to_csv('merged_df.csv', index=False)

# Create data for nucleus_table

In [68]:
df = cbDf[cbDf.type==1].copy()

In [69]:
# upload random annotations
nrow = df.shape[0]
pt_position = df.apply(lambda row : np.array([row['x'], row['y'], row['z']]), axis=1)
bb_start_position = df.apply(lambda row : np.array([row['xmin'], row['ymin'], row['zmin']]), axis=1)
bb_end_position = df.apply(lambda row : np.array([row['xmax'], row['ymax'], row['zmax']]), axis=1)

data = pd.DataFrame.from_dict({
    'id': df.dbcellid.tolist(),
    'valid': np.repeat(True, nrow),
    'volume': df.volume.tolist(),
    'pt_position': pt_position,
    'bb_start_position': bb_start_position,
    'bb_end_position': bb_end_position
})

In [89]:
len(data.id.unique())

49108

In [70]:
data.shape

(49108, 6)

In [88]:
duplicate_ids = data[data.duplicated('id')]
duplicate_ids

Unnamed: 0,id,valid,volume,pt_position,bb_start_position,bb_end_position


In [87]:
duplicate_ids = cbDf[cbDf.duplicated('dbcellid')]
duplicate_ids

Unnamed: 0,type,dbcellid,x,y,z,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
49108,2,1,224320,234816,544,224064,234304,416,224703,235263,655,120.695292,11
49109,2,2,220352,235008,32,220096,234624,32,220927,235391,143,63.115887,11
49110,2,3,271296,132352,4640,270912,131968,4448,271743,132735,4783,95.227478,17
49111,2,4,272704,158848,160,272128,158528,0,273535,159167,271,107.407737,17
49112,2,5,271616,152000,352,271360,151168,192,271871,152639,399,114.051514,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57307,2,8209,154579,154504,4224,154240,154048,4128,154879,154943,4335,80.832627,11
57308,2,8210,277041,149728,992,276800,148352,864,277759,149823,1007,90.798293,15
57309,2,8211,276946,148654,992,276864,147840,704,277631,148799,1007,81.939923,15
57310,2,8212,273233,181627,5280,273152,181248,5248,273727,182463,5295,40.969961,12


In [50]:
data[data.id==1]

Unnamed: 0,id,valid,volume,pt_position,bb_start_position,bb_end_position
0,1,True,8825.15116,"[259891.0, 165404.0, 1408.0]","[258240.0, 163392.0, 896.0]","[262335.0, 167167.0, 1551.0]"
49108,1,True,120.695292,"[224320.0, 234816.0, 544.0]","[224064.0, 234304.0, 416.0]","[224703.0, 235263.0, 655.0]"


In [51]:
cbDf[cbDf.dbcellid==1]

Unnamed: 0,type,dbcellid,x,y,z,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
0,1,1,259891,165404,1408,258240,163392,896,262335,167167,1551,8825.15116,1
49108,2,1,224320,234816,544,224064,234304,416,224703,235263,655,120.695292,11


In [75]:
data

Unnamed: 0,id,valid,volume,pt_position,bb_start_position,bb_end_position
0,1,True,8825.151160,"[259891.0, 165404.0, 1408.0]","[258240.0, 163392.0, 896.0]","[262335.0, 167167.0, 1551.0]"
1,2,True,5967.219524,"[261308.0, 171181.0, 1024.0]","[259968.0, 169856.0, 512.0]","[262463.0, 172543.0, 1295.0]"
2,3,True,3984.051929,"[256788.0, 171573.0, 1408.0]","[255552.0, 170688.0, 1152.0]","[258367.0, 173119.0, 1679.0]"
3,4,True,3402.721395,"[254675.0, 164220.0, 1280.0]","[253376.0, 163072.0, 1152.0]","[255679.0, 165375.0, 1679.0]"
4,5,True,2142.618255,"[258308.0, 160604.0, 1536.0]","[257472.0, 159744.0, 1408.0]","[259327.0, 161791.0, 1807.0]"
...,...,...,...,...,...,...
49103,49388,True,194.884141,"[271886.0, 57646.0, 5120.0]","[271552.0, 56960.0, 4992.0]","[272191.0, 58175.0, 5135.0]"
49104,49389,True,200.420622,"[262818.0, 57594.0, 3328.0]","[262528.0, 57088.0, 3200.0]","[263295.0, 58047.0, 3343.0]"
49105,49390,True,215.922770,"[323024.0, 273066.0, 4480.0]","[322560.0, 272512.0, 4480.0]","[323839.0, 273599.0, 4623.0]"
49106,49391,True,77.510738,"[94468.0, 203091.0, 0.0]","[94208.0, 202752.0, 0.0]","[94783.0, 203455.0, 15.0]"


In [76]:
cbDf[cbDf.type==2]

Unnamed: 0,type,dbcellid,x,y,z,xmin,ymin,zmin,xmax,ymax,zmax,volume,dbcelltype_code
49108,2,1,224320,234816,544,224064,234304,416,224703,235263,655,120.695292,11
49109,2,2,220352,235008,32,220096,234624,32,220927,235391,143,63.115887,11
49110,2,3,271296,132352,4640,270912,131968,4448,271743,132735,4783,95.227478,17
49111,2,4,272704,158848,160,272128,158528,0,273535,159167,271,107.407737,17
49112,2,5,271616,152000,352,271360,151168,192,271871,152639,399,114.051514,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57307,2,8209,154579,154504,4224,154240,154048,4128,154879,154943,4335,80.832627,11
57308,2,8210,277041,149728,992,276800,148352,864,277759,149823,1007,90.798293,15
57309,2,8211,276946,148654,992,276864,147840,704,277631,148799,1007,81.939923,15
57310,2,8212,273233,181627,5280,273152,181248,5248,273727,182463,5295,40.969961,12


In [90]:
data.to_hdf('../data/h01_nucleus_table_data.h5', key='data', mode='w')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block3_values] [items->Index(['pt_position', 'bb_start_position', 'bb_end_position'], dtype='object')]

  data.to_hdf('../data/h01_nucleus_table_data.h5', key='data', mode='w')


In [91]:
test = pd.read_hdf('../data/h01_nucleus_table_data.h5', 'data')

In [92]:
test.shape

(49108, 6)

In [27]:
test.head()

Unnamed: 0,id,valid,volume,pt_position,bb_start_position,bb_end_position
0,1,True,8825.15116,"[259891.0, 165404.0, 1408.0]","[258240.0, 163392.0, 896.0]","[262335.0, 167167.0, 1551.0]"
1,2,True,5967.219524,"[261308.0, 171181.0, 1024.0]","[259968.0, 169856.0, 512.0]","[262463.0, 172543.0, 1295.0]"
2,3,True,3984.051929,"[256788.0, 171573.0, 1408.0]","[255552.0, 170688.0, 1152.0]","[258367.0, 173119.0, 1679.0]"
3,4,True,3402.721395,"[254675.0, 164220.0, 1280.0]","[253376.0, 163072.0, 1152.0]","[255679.0, 165375.0, 1679.0]"
4,5,True,2142.618255,"[258308.0, 160604.0, 1536.0]","[257472.0, 159744.0, 1408.0]","[259327.0, 161791.0, 1807.0]"


In [28]:
test.iloc[0].pt_position

array([259891., 165404.,   1408.])

# Create cell_type_local table

In [29]:
df = pd.merge(annoDf, cbDf, on=['dbcellid', 'x', 'y', 'z'], how="left")

In [31]:
# upload random annotations
nrow = df.shape[0]
pt_position = df.apply(lambda row : np.array([row['x'], row['y'], row['z']]), axis=1)
classification_system = df.apply(lambda row: ', '.join([str(row['dbsubcelltype']), str(row['dbsubcelltype2']), str(row['dbsubcelltype3'])]), axis=1)

cellData = pd.DataFrame.from_dict({
    'valid': np.repeat(True, nrow),
    'classification_system': classification_system,
    'cell_type': df.dbcelltype.tolist(),
    'pt_position': pt_position,
})

In [32]:
cellData

Unnamed: 0,valid,classification_system,cell_type,pt_position
0,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
1,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
2,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
3,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
4,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
...,...,...,...,...
49372,True,"0, 0, 0",ASTROCYTE,"[235985, 191385, 1921]"
49373,True,"0, 0, 0",ASTROCYTE,"[205304, 143482, 5249]"
49374,True,"16, 0, 0",UNCLASSIFIEDNEURON,"[319163, 249034, 131]"
49375,True,"0, 0, 0",MG_OPC,"[267462, 136502, 2691]"


In [102]:
cellData.to_hdf('../data/dbcells_dump_table_data.h5', key='data', mode='w')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['classification_system', 'cell_type', 'pt_position'], dtype='object')]

  cellData.to_hdf('../data/dbcells_dump_table_data.h5', key='data', mode='w')


# CAVEclient

In [33]:
url = "https://global.brain-wire-test.org/"
datastack = "h01_c3_flat"
token = "3ac8d9f0de1b192dba9c085114f0f811"

In [34]:
client = CAVEclient(datastack_name=datastack, server_address=url, auth_token=token)

In [35]:
client.info.get_datastacks()

['h01_c3_flat']

In [36]:
# get existing tables
all_tables = client.annotation.get_tables()
all_tables

['nucleus_test_table2',
 'nucleus_test_table',
 'nucleus_table',
 'dbcells_dump',
 'test_table',
 'test_table2']

## Upload nucleus data

In [85]:
import time
chunk_size = 10000  # Number of rows per chunk
i = 40000 # 0 

# for i in range(start, len(data), chunk_size):
chunk = data.iloc[i:i+chunk_size]
client.annotation.post_annotation_df(table_name='nucleus_table',
                                     df = chunk,
                                     position_columns=["pt_position", "bb_start_position", "bb_end_position"])
#     time.sleep(10)

HTTPError: 500 Server Error: INTERNAL SERVER ERROR for url: https://local.brain-wire-test.org/annotation/api/v2/aligned_volume/h01/table/nucleus_table/annotations content:b'{"message": "Internal Server Error"}\n'

## Upload annotation data

In [93]:
cellData.head()

Unnamed: 0,valid,classification_system,cell_type,pt_position
0,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
1,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
2,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
3,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"
4,True,"0, 0, 0",UNKNOWN0,"[0, 0, 0]"


In [95]:
cellData.tail()

Unnamed: 0,valid,classification_system,cell_type,pt_position
49372,True,"0, 0, 0",ASTROCYTE,"[235985, 191385, 1921]"
49373,True,"0, 0, 0",ASTROCYTE,"[205304, 143482, 5249]"
49374,True,"16, 0, 0",UNCLASSIFIEDNEURON,"[319163, 249034, 131]"
49375,True,"0, 0, 0",MG_OPC,"[267462, 136502, 2691]"
49376,True,"0, 0, 0",INTERNEURON,"[391015, 90796, 3303]"


In [94]:
cellData.shape

(49377, 4)

In [100]:
import time
chunk_size = 10000  # Number of rows per chunk
i = 40000 # 0
# start = 0

# for i in range(start, len(data), chunk_size):
chunk = cellData.iloc[i:i+chunk_size]
client.annotation.post_annotation_df(table_name='dbcells_dump',
                                         df = chunk,
                                         position_columns=["pt_position"])
#     time.sleep(5)

HTTPError: 500 Server Error: INTERNAL SERVER ERROR for url: https://local.brain-wire-test.org/annotation/api/v2/aligned_volume/h01/table/dbcells_dump/annotations content:b'{"message": "Internal Server Error"}\n'

## Get Data

In [78]:
client.annotation.get_annotation(table_name="nucleus_table",
                                 annotation_ids=[10003])

[{'bb_start_position': [264768, 159232, 0],
  'valid': True,
  'pt_position': [265650, 160169, 0],
  'volume': 1247.922880512,
  'bb_end_position': [266559, 161535, 271],
  'id': 10003,
  'created': '2023-05-25 18:32:25.244568',
  'deleted': 'None',
  'superceded_id': None}]

In [101]:
client.annotation.get_annotation(table_name="dbcells_dump",
                                 annotation_ids=[1])

[{'valid': True,
  'pt_position': [0, 0, 0],
  'classification_system': '0, 0, 0',
  'cell_type': 'UNKNOWN0',
  'deleted': 'None',
  'superceded_id': None,
  'created': '2023-05-25 20:37:02.935764',
  'id': 1}]