# Collect Cave Changes

This notebook goes through cave-versioned datasets and collects IDs and number of changes for proofread neurons. This way we can collect neurons where there is likely to be a difference between different proofreading states

## Imports

In [4]:
from fafbseg import flywire
from tqdm import tqdm
import numpy as np
from caveclient import CAVEclient


## Flywire neurons

In [3]:
# There are two ways to access flywire data: via fafbsegs flywire, or via the cave client
# We will use the caveclient to collect proofread neurons and fafbseg for most other things
# Fafbseg only wraps caveclient

# Get annotation versions
flywire.get_materialization_versions()

Unnamed: 0,is_merged,status,id,datastack,expires_on,valid,version,time_stamp
0,True,AVAILABLE,847,flywire_fafb_public,2121-11-10 07:10:00,True,783,2023-09-30 05:10:00
1,True,AVAILABLE,718,flywire_fafb_public,2121-11-10 07:10:00,True,630,2023-03-21 08:10:00


In [5]:
client = CAVEclient('flywire_fafb_public')

# collect proofread neurons
proofreads = client.materialize.query_table('proofread_neurons')

In [8]:
proofreads

Unnamed: 0,id,created,superceded_id,valid,pt_supervoxel_id,pt_root_id,pt_position
0,32414,2023-06-19 06:44:57.863498+00:00,,t,79871411195357919,720575940620919646,"[538210, 198330, 105601]"
1,1125,2023-06-19 06:43:33.633089+00:00,,t,77195887289571552,720575940611775973,"[381459, 106763, 171723]"
2,32416,2023-06-19 06:44:57.865882+00:00,,t,78955586729523534,720575940618135198,"[486480, 133840, 123110]"
3,32418,2023-06-19 06:44:57.867936+00:00,,t,79589661206805006,720575940624783287,"[523906, 181893, 72760]"
4,32419,2023-06-19 06:44:57.869497+00:00,,t,79589661207074646,720575940630755276,"[523523, 181919, 80829]"
...,...,...,...,...,...,...,...
139250,104828,2023-06-19 06:48:15.001237+00:00,,t,83952592400822837,720575940626100238,"[776911, 183792, 174548]"
139251,104809,2023-06-19 06:48:14.983796+00:00,,t,84798048190170122,720575940626915216,"[824625, 246096, 194229]"
139252,83362,2023-06-19 06:47:20.709997+00:00,,t,75298954786869572,720575940645731620,"[272447, 288010, 156593]"
139253,104819,2023-06-19 06:48:14.992064+00:00,,t,84658822530652061,720575940629075755,"[815896, 335911, 204215]"


In [13]:
# For a random subset of neurons, collect number of changes

random_sample = proofreads.sample(1000)

random_sample["n_changes"] = None

# pt_root_id is the id in the most recent materialization
for id in tqdm(random_sample["pt_root_id"]):
    try:
        changes = flywire.get_edit_history(id)
        # only keep those changes after materialization version 630
        changes = changes[changes["timestamp"] > "2023-03-21 08:10:00"]
        # collect number of edits made during proofreading
        random_sample.loc[random_sample["pt_root_id"] == id, "n_changes"] = changes.shape[0]
    except:
        pass

100%|██████████| 1000/1000 [24:14<00:00,  1.45s/it] 


In [14]:
# Example
random_sample.sort_values('n_changes', ascending=False).head(20)

Unnamed: 0,id,created,superceded_id,valid,pt_supervoxel_id,pt_root_id,pt_position,n_changes
18322,132202,2024-01-08 19:46:44.490305+00:00,,t,85292690715072290,720575940622438516,"[852802, 370203, 105531]",139
12945,129717,2024-01-08 19:46:32.728972+00:00,,t,74452949577486536,720575940615359697,"[223600, 193580, 246355]",100
15668,131386,2024-01-08 19:46:33.633990+00:00,,t,73961536599253324,720575940620806736,"[193720, 262284, 244979]",56
76260,88087,2023-06-19 06:47:31.654616+00:00,,t,74876673803551106,720575940619381312,"[248253, 284021, 215554]",33
9776,17524,2023-06-19 06:44:21.121744+00:00,,t,79448855334809885,720575940627978782,"[515237, 177432, 184459]",30
22669,136731,2024-01-08 19:46:55.609024+00:00,,t,83744784703003374,720575940632508687,"[762293, 380349, 169846]",29
14739,129403,2024-01-08 19:46:32.557750+00:00,,t,84730221798024829,720575940613903587,"[820656, 395819, 107228]",29
22022,139417,2024-01-08 19:46:57.071772+00:00,,t,85431091875755951,720575940653344758,"[862174, 231852, 157165]",27
18861,137894,2024-01-08 19:46:56.236613+00:00,,t,84377759534713029,720575940636707951,"[799805, 361537, 85789]",21
106646,3629,2023-06-19 06:43:36.560689+00:00,,t,74805892742647914,720575940614377955,"[244592, 256992, 219667]",20


In [15]:
# Collect relevant info from 20 neurons with highest number of changes
root_ids = random_sample.sort_values('n_changes', ascending=False).head(20)[["pt_root_id", "pt_position", "n_changes"]]

# pt_root_id here is the root id for most recent materialization (783)
root_ids = root_ids.rename(columns={"pt_root_id" : "root_id_783"})

In [16]:
root_ids

Unnamed: 0,root_id_783,pt_position,n_changes
18322,720575940622438516,"[852802, 370203, 105531]",139
12945,720575940615359697,"[223600, 193580, 246355]",100
15668,720575940620806736,"[193720, 262284, 244979]",56
76260,720575940619381312,"[248253, 284021, 215554]",33
9776,720575940627978782,"[515237, 177432, 184459]",30
22669,720575940632508687,"[762293, 380349, 169846]",29
14739,720575940613903587,"[820656, 395819, 107228]",29
22022,720575940653344758,"[862174, 231852, 157165]",27
18861,720575940636707951,"[799805, 361537, 85789]",21
106646,720575940614377955,"[244592, 256992, 219667]",20


In [17]:
# pt_position represents the coordinates of soma center in nm values
# We collect positions of soma to look up corresponding root id in older materialization
positions = np.concatenate(root_ids.pt_position.values).reshape(20,3)

old_root_ids = flywire.locs_to_segments(positions, coordinates='nm', timestamp='mat_630')

# set root ids of old materialization
root_ids["root_id_630"] = old_root_ids

In [20]:
# Save everything
root_ids.to_csv('root_ids_proofread_neurons_02.csv')

## Microns Neurons

We access microns neurons only via caveclient

In [5]:
client = CAVEclient('minnie65_public')

In [7]:
# Collect proofread neurons
proofreads = client.materialize.query_table('proofreading_status_and_strategy', split_positions=True)

# To improve runtime, we look at 100 random neurons
proofreads = proofreads.sample(100)

proofreads["n_changes"] = None

# pt_root_id represents the id in the most recent materialization
proofread_ids = list(proofreads["pt_root_id"].values)

Table Owner Notice on proofreading_status_and_strategy: NOTE: this table supercedes 'proofreading_status_public_release'. For more details, see: www.microns-explorer.org/manifests/mm3-proofreading.


In [110]:
# for each id collect changes since oldest materialization (117) and save in proofreads dataframe
for id in tqdm(proofread_ids):
    changes = client.chunkedgraph.get_tabular_change_log(id)[id]
    # 1627776001 is unix timestamp for 1st August 2021 12:00:01. v117 materialization was released July 2021
    changes = changes[changes["timestamp"] > 1627776001]
    proofreads.loc[proofreads["pt_root_id"] == id, "n_changes"] = changes.shape[0]

100%|██████████| 100/100 [07:33<00:00,  4.54s/it]


In [111]:
# Examine results
proofreads.sort_values('n_changes', ascending=False)

Unnamed: 0,id,created,superceded_id,valid,pt_position_x,pt_position_y,pt_position_z,valid_id,status_dendrite,status_axon,strategy_dendrite,strategy_axon,pt_supervoxel_id,pt_root_id,n_changes
51,2,2024-06-03 19:45:52.502332+00:00,,t,190027,121508,20685,864691135335733481,t,t,dendrite_extended,axon_interareal,90924527114284243,864691135335733481,2377
54,5,2024-06-03 19:45:52.504877+00:00,,t,344768,115350,19786,864691135952122147,t,t,dendrite_extended,axon_interareal,112175063088115757,864691135952122147,1714
2,13,2024-06-03 19:45:52.511548+00:00,,t,303659,166262,17349,864691135082864887,t,t,dendrite_extended,axon_interareal,106552435166206532,864691135082864887,1183
6,17,2024-06-03 19:45:52.515434+00:00,,t,174921,137614,21123,864691136144674612,t,t,dendrite_extended,axon_interareal,88815663879226337,864691136144674612,1114
14,25,2024-06-03 19:45:52.521544+00:00,,t,187568,113216,21732,864691135488888378,t,t,dendrite_extended,axon_column_truncated,90571584016011523,864691135488888378,979
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1182,2024-06-03 19:45:53.309841+00:00,,t,173872,207776,22584,864691135915757926,t,f,dendrite_extended,none,88684341160399894,864691135915757926,44
889,1209,2024-06-03 19:45:53.328668+00:00,,t,177616,212304,22301,864691136119007512,t,f,dendrite_extended,none,89177540778107458,864691136119007512,35
891,1211,2024-06-03 19:45:53.329991+00:00,,t,188240,205552,20655,864691135919026096,t,f,dendrite_extended,none,90654390851161771,864691135919026096,34
205,1242,2024-06-03 19:45:53.350562+00:00,,t,169264,265936,21424,864691135135409433,t,f,dendrite_extended,none,88058856282265578,864691135135409433,20


In [112]:
# Collect relevant information from 20 neurons with most changes
root_ids = proofreads.sort_values('n_changes', ascending=False).head(20)[["pt_root_id", "pt_supervoxel_id", "pt_position_x", "pt_position_y", "pt_position_z", "n_changes"]]
root_ids = root_ids.rename(columns={"pt_root_id" : "root_id_1078"})

In [135]:
# during proofreading, pt_root_id changes, but supervoxel ids remain fixed. We therefore want to look up the root id corresponding
# to the soma center supervoxel in the oldest materialization. For more info, see https://www.biorxiv.org/content/10.1101/2023.07.26.550598v1
for supervoxel_id in tqdm(root_ids['pt_supervoxel_id']):
    root_ids.loc[root_ids['pt_supervoxel_id'] == supervoxel_id, 'root_id_117'] = client.chunkedgraph.get_root_id(supervoxel_id, timestamp=datetime(year=2021, month=6, day=1))

root_ids["root_id_117"] = root_ids["root_id_117"].astype(np.int64)

100%|██████████| 20/20 [00:05<00:00,  3.70it/s]


In [138]:
root_ids

Unnamed: 0,root_id_1078,pt_supervoxel_id,pt_position_x,pt_position_y,pt_position_z,n_changes,root_id_117
51,864691135335733481,90924527114284243,190027,121508,20685,2377,864691135660642800
54,864691135952122147,112175063088115757,344768,115350,19786,1714,864691135952122147
2,864691135082864887,106552435166206532,303659,166262,17349,1183,864691135609594119
6,864691136144674612,88815663879226337,174921,137614,21123,1114,864691135293126156
14,864691135488888378,90571584016011523,187568,113216,21732,979,864691135925753358
19,864691136023889209,87904100087286054,168181,161960,21511,935,864691136023889209
32,864691136990522517,93177907207497424,206577,133261,18640,838,864691136008689326
36,864691136137834877,106053051533554699,300576,115616,23080,788,864691135415666362
1316,864691135123617831,95641569704922650,224336,138864,22805,752,864691135502241717
48,864691136023980601,105004461172496411,292612,167390,24276,732,864691136023980601


In [141]:
root_ids.to_csv("root_ids_proofread_microns.csv")