# MICrONS NWB co-registration and visualization

This notebook uses the `dandi`-hosted [MICrONS functional data](https://dandiarchive.org/dandiset/000402/draft) and `bossdb`-hosted [MICrONS structural data](https://bossdb.org/microns/minnie) to examine and visualize the co-registered cells.


## Data Access

Here we use the DANDI client, CAVE client, and CloudVolume client to establish access to the functional data, annotation data, and structural data respectively. 

In [163]:
from dandi.dandiapi import DandiAPIClient
from caveclient import CAVEclient
from cloudvolume import CloudVolume

from fsspec.implementations.cached import CachingFileSystem
from fsspec import filesystem
from h5py import File
from pynwb import NWBHDF5IO
from pynwb.file import NWBFile


from tqdm import tqdm
import pandas as pd

In [4]:
cave = CAVEclient("minnie65_phase3_v1")

### Sub-task

Find the best NWB file to use

In [5]:
cave.materialize.get_tables()

['synapses_pni_2',
 'nucleus_detection_v0',
 'allen_minnie_extra_types',
 'aibs_soma_nuc_metamodel_preds_v117',
 'bodor_pt_target_proofread',
 'layer5_it_proofreading',
 'allen_v1_column_thalamic',
 'baylor_gnn_cell_type_fine_model_v2',
 'apl_functional_coreg',
 'nucleus_alternative_points',
 'allen_column_mtypes_v2',
 'column_atype',
 'connectivity_groups_v507',
 'new_table_test',
 'proofreading_functional_coreg_v2',
 'proofreading_functional_coreg',
 'bodor_pt_cells',
 'functional_coreg',
 'proofreading_apl_multisoma',
 'aibs_metamodel_mtypes_v661_v2',
 'proofreading_status_public_release',
 'allen_column_l5it_types',
 'allen_v1_column_types_slanted',
 'baylor_e_i_model_v1',
 'allen_column_mtypes_v1',
 'allen_v1_column_types_slanted_ref',
 'aibs_column_nonneuronal_ref',
 'nucleus_ref_neuron_svm',
 'apl_functional_coreg_v2',
 'coregistration_manual',
 'aibs_soma_nuc_exc_mtype_preds_v117',
 'baylor_log_reg_cell_type_coarse_v1',
 'apl_functional_coreg_forward_v3',
 'coregistration_manua

In [6]:
coreg = cave.materialize.query_table("apl_functional_coreg_forward_v5")

In [37]:
groups = coreg.groupby(['session', 'scan_idx'])
print("(session, scan) n_matches  n_seg_ids")
for key, item in groups:
    print(key, item.shape[0], item['pt_root_id'].unique().shape[0])

(session, scan) n_matches  n_seg_ids
(4, 7) 3499 3441
(4, 9) 3537 3466
(4, 10) 4291 4206
(5, 3) 3564 3507
(5, 6) 3886 3831
(5, 7) 3707 3654
(6, 2) 3634 3605
(6, 4) 3692 3661
(6, 6) 3515 3472
(6, 7) 3614 3579
(7, 3) 3585 3547
(7, 4) 5441 5385
(7, 5) 3068 3038
(8, 5) 4301 3872
(8, 7) 3020 2673
(8, 9) 3204 2809
(9, 3) 3514 2848
(9, 4) 3494 2804
(9, 6) 2307 2305


Session 7 scan 4 has the highest amount of matches

In [59]:
coreg.head()


Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
0,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,67007,2023-04-06 15:05:50.491194+00:00,t,255686,9,6,1913,2,8.941486,4.206369,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
1,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,65810,2023-04-06 15:05:49.502144+00:00,t,255686,9,4,7548,6,9.380808,-5.165012,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
2,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,54982,2023-04-06 15:05:15.938520+00:00,t,255686,8,7,4778,4,3.763804,5.341737,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
3,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,46412,2023-04-06 15:04:43.197444+00:00,t,255686,7,5,1946,2,4.071072,3.306988,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
4,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,11823,2023-04-06 15:03:01.038836+00:00,t,255686,5,3,1994,2,5.419972,6.644126,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"


### Subtask 3: Explore NWB File

In [39]:
dandiset_id = "000402"
file_path = "sub-17797/sub-17797_ses-7-scan-4_behavior+image+ophys.nwb" # file size ~67GB

# Get the location of the file on DANDI
with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(file_path)
    s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)

In [279]:
# Create a virtual filesystem based on the http protocol and use caching to save accessed data to RAM.
fs = filesystem("http")
file_system = fs.open(s3_url, "rb")
file = File(file_system, mode="r")
# Open the file with NWBHDF5IO
io = NWBHDF5IO(file=file, load_namespaces=True)

microns_data = io.read()

KeyboardInterrupt: 

In [None]:
microns_data

### Update co-registration to automated table

In [61]:
image_segmentation = microns_data.processing["ophys"].data_interfaces["ImageSegmentation"]
image_segmentation

In [72]:
ps4 = image_segmentation["PlaneSegmentation4"][:]

In [73]:
ps4

Unnamed: 0_level_0,image_mask,mask_type,cave_ids,pt_supervoxel_id,pt_root_id,pt_x_position,pt_y_position,pt_z_position
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,
2,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,
3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,
4,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,
5,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,
...,...,...,...,...,...,...,...,...
1619,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[8595.0],1.064055e+17,8.646911e+17,303041.0,120166.0,16756.0
1620,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[5129.0],9.015143e+16,8.646911e+17,184608.0,128560.0,25525.0
1621,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,
1622,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,


In [75]:
scan_unit = pd.read_pickle("ScanUnit.pkl")

In [125]:
session_units = scan_unit[(scan_unit['session']==7) & (scan_unit['scan_idx']==4) & (scan_unit['field'] == 4)]
#session_units = scan_unit[(scan_unit['session']==7) & (scan_unit['scan_idx']==4)]

In [120]:
ps4['mask_id']=ps4.index

In [126]:
print(session_units.shape)
print(ps4.shape)

(1623, 11)
(1623, 9)


In [130]:
ps4_units = ps4.merge(session_units, on='mask_id', how='left')

In [131]:
ps4_units

Unnamed: 0,image_mask,mask_type,cave_ids,pt_supervoxel_id,pt_root_id,pt_x_position,pt_y_position,pt_z_position,mask_id,session,scan_idx,unit_id,field,um_x,um_y,um_z,px_x,px_y,ms_delay
0,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,,1,7,4,4680,4,-268,-788,200,3,3,58
1,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,,2,7,4,4681,4,-189,-789,200,34,2,58
2,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,3,7,4,4682,4,-159,-765,200,46,12,59
3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,4,7,4,4683,4,-109,-779,200,66,6,58
4,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,5,7,4,4684,4,-126,-772,200,59,9,58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1618,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[8595.0],1.064055e+17,8.646911e+17,303041.0,120166.0,16756.0,1619,7,4,6298,4,-118,-529,200,63,106,62
1619,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[5129.0],9.015143e+16,8.646911e+17,184608.0,128560.0,25525.0,1620,7,4,6299,4,263,85,200,215,352,73
1620,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,1621,7,4,6300,4,223,61,200,199,342,72
1621,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,,1622,7,4,6301,4,-161,-778,200,46,7,58


In [95]:
ps4.head()

Unnamed: 0_level_0,image_mask,mask_type,cave_ids,pt_supervoxel_id,pt_root_id,pt_x_position,pt_y_position,pt_z_position,mask_id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,,1
2,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",artifact,[nan],,,,,,2
3,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,3
4,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,4
5,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",soma,[nan],,,,,,5


In [88]:
coreg.head()

Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
0,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,67007,2023-04-06 15:05:50.491194+00:00,t,255686,9,6,1913,2,8.941486,4.206369,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
1,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,65810,2023-04-06 15:05:49.502144+00:00,t,255686,9,4,7548,6,9.380808,-5.165012,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
2,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,54982,2023-04-06 15:05:15.938520+00:00,t,255686,8,7,4778,4,3.763804,5.341737,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
3,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,46412,2023-04-06 15:04:43.197444+00:00,t,255686,7,5,1946,2,4.071072,3.306988,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
4,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,11823,2023-04-06 15:03:01.038836+00:00,t,255686,5,3,1994,2,5.419972,6.644126,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"


In [132]:
coreg_units = coreg[(coreg['session']==7) & (coreg['scan_idx']==4) & (coreg['field'] == 4)]


In [133]:
coreg_units.head()

Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
111,294576,2020-09-28 22:44:55.293087+00:00,t,296.565309,90644426661199216,864691135272867601,42488,2023-04-06 15:04:39.942627+00:00,t,294576,7,4,5439,4,5.631002,5.42291,"[188320, 131408, 21605]","[nan, nan, nan]","[nan, nan, nan]"
239,259052,2020-09-28 22:45:19.613777+00:00,t,375.51489,87970071188074044,864691135737785220,42575,2023-04-06 15:04:40.013451+00:00,t,259052,7,4,5546,4,8.242632,5.887168,"[168752, 128832, 24985]","[nan, nan, nan]","[nan, nan, nan]"
504,155142,2020-09-28 22:44:32.830760+00:00,t,261.975081,82060333090743602,864691136518542052,43231,2023-04-06 15:04:40.543685+00:00,t,155142,7,4,6302,4,5.051677,6.651311,"[125744, 137936, 20829]","[nan, nan, nan]","[nan, nan, nan]"
521,155050,2020-09-28 22:42:21.332498+00:00,t,241.485742,82552914165610309,864691135257139887,42882,2023-04-06 15:04:40.260941+00:00,t,155050,7,4,5903,4,13.207301,0.91806,"[129488, 138224, 19615]","[nan, nan, nan]","[nan, nan, nan]"
528,155307,2020-09-28 22:41:36.928466+00:00,t,294.244188,82552502117044761,864691136108732088,42631,2023-04-06 15:04:40.059032+00:00,t,155307,7,4,5615,4,3.807639,5.660515,"[129168, 135152, 21555]","[nan, nan, nan]","[nan, nan, nan]"


In [137]:
coreg_units.columns


Index(['id_ref', 'created_ref', 'valid_ref', 'volume', 'pt_supervoxel_id',
       'pt_root_id', 'id', 'created', 'valid', 'target_id', 'session',
       'scan_idx', 'unit_id', 'field', 'residual', 'score', 'pt_position',
       'bb_start_position', 'bb_end_position'],
      dtype='object')

In [134]:
coreg_match = ps4_units.merge(coreg_units, on='unit_id')

In [136]:
coreg_match.columns

Index(['image_mask', 'mask_type', 'cave_ids', 'pt_supervoxel_id_x',
       'pt_root_id_x', 'pt_x_position', 'pt_y_position', 'pt_z_position',
       'mask_id', 'session_x', 'scan_idx_x', 'unit_id', 'field_x', 'um_x',
       'um_y', 'um_z', 'px_x', 'px_y', 'ms_delay', 'id_ref', 'created_ref',
       'valid_ref', 'volume', 'pt_supervoxel_id_y', 'pt_root_id_y', 'id',
       'created', 'valid', 'target_id', 'session_y', 'scan_idx_y', 'field_y',
       'residual', 'score', 'pt_position', 'bb_start_position',
       'bb_end_position'],
      dtype='object')

In [93]:
coreg_match.iloc[50]

image_mask            [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...
mask_type                                                          soma
cave_ids                                                       [6752.0]
pt_supervoxel_id_x                                 114145319742642592.0
pt_root_id_x                                       864691136378803712.0
pt_x_position                                                  359278.0
pt_y_position                                                  114596.0
pt_z_position                                                   24024.0
mask_id                                                              57
session_x                                                             7
scan_idx_x                                                            4
unit_id                                                            4736
field_x                                                               4
um_x                                                            

In [100]:
microns_data

In [182]:
dfs = update_microns_nwb_file(microns_data, used_cache_coregistration_table=True)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [01:04<00:00,  8.08s/it]


In [190]:
len(dfs)

8

In [192]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations

{'PlaneSegmentation1': PlaneSegmentation1 pynwb.ophys.PlaneSegmentation at 0x5141868880
 Fields:
   colnames: ['image_mask' 'mask_type']
   columns: (
     image_mask <class 'hdmf.common.table.VectorData'>,
     mask_type <class 'hdmf.common.table.VectorData'>
   )
   description: The output from segmenting field 1 contains the image masks (weights and mask classification) and the structural ids extracted from the CAVE database on 2023-02-13. To access the latest revision from the live resource see the notebook that is linked to the dandiset. The structual ids might not exist for all plane segmentations.
   id: id <class 'hdmf.common.table.ElementIdentifiers'>
   imaging_plane: ImagingPlane1 pynwb.ophys.ImagingPlane at 0x5142068752
 Fields:
   conversion: 1.0
   description: The imaging plane for field 1 at 0.00013 meters depth.
   device: Microscope pynwb.device.Device at 0x5142088592
 Fields:
   description: two-photon random access mesoscope
 
   excitation_lambda: 920.0
   imaging_

In [219]:
#microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations.pop("PlaneSegmentation1")

In [256]:
from pynwb.ophys import PlaneSegmentation

In [257]:
def create_new_plane_segmentation(old, df, descriptions):
    ps = PlaneSegmentation(
        name=old.name, 
        description=old.description, 
        imaging_plane=old.imaging_plane,
        id=df.index.tolist()
    )
    
    for col in df.columns:
        if col in old.colnames:
            old_col = find_column_by_name(old, col)
            ps.add_column(name=old_col.name, description=old_col.description, data=df[col].tolist())
        else:
            ps.add_column(name=col, description=descriptions[col], data=df[col].to_numpy())
    return ps
        

def find_column_by_name(table,col_name):
    for c in table.columns:
        if c.name == col_name:
            return c

In [225]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations["PlaneSegmentation2"].colnames

('image_mask',
 'mask_type',
 'cave_ids',
 'pt_supervoxel_id',
 'pt_root_id',
 'pt_x_position',
 'pt_y_position',
 'pt_z_position')

In [204]:
old_ps1 = microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations["PlaneSegmentation1"]

In [205]:
ps1 = PlaneSegmentation(name=old_ps1.name, description=old_ps1.description, imaging_plane=old_ps1.imaging_plane)

In [213]:
old_col = find_column_by_name(old_ps1, "image_mask")
old_col.description

'The image masks for each ROI.'

In [None]:
# for each col in your new df;
#     if new df column existed in old dataframe,  useold description
#     otherwise, make a new description
#     ps1.add_column(name=..., description=..., data=...)
    # numpy array or list

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].add(ps1)

In [249]:
description = {x: "Placeholder" for x in dfs[1].columns}

In [250]:
old_ps2 = microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations["PlaneSegmentation2"]
ps = create_new_plane_segmentation(old_ps2, dfs[1], description)

In [251]:
ps

In [241]:
len(dfs[1])

1311

In [None]:
dfs[1]

In [274]:
def update_microns_nwb_file(
    nwb: NWBFile,
    coregistration_table="apl_functional_coreg_forward_v5",
    scan_unit_path="./ScanUnit.pkl",
    add_scan_units_to_nwb=True,
    used_cache_coregistration_table=False,
    cache_coregistration_table_path= "./apl_functional_coreg_forward_v5.pkl",
    
):
    if used_cache_coregistration_table:
        coreg = pd.read_pickle(cache_coregistration_table_path)
    else:
        cave = CAVEclient("minnie65_phase3_v1")
        coreg = cave.materialize.query_table(coregistration_table)
        
    session, scan_idx = int(nwb.session_id.split('-')[0]), int(nwb.session_id.split('-')[2])
    scan_units = pd.read_pickle(scan_unit_path)
    scan_units = scan_unit[(scan_unit['session']==session) & (scan_unit['scan_idx']==scan_idx)]
    
    image_segmentation = nwb.processing["ophys"].data_interfaces["ImageSegmentation"]
    
    all_ps = list(image_segmentation.plane_segmentations)
    for ps_name in tqdm(all_ps):
        
        ps = image_segmentation.plane_segmentations.pop(ps_name)
        field = int(ps_name[-1])
        field_scan_units = scan_units[scan_units['field'] == field]
        ps_df = ps[:]
        ps_df['mask_id'] = ps_df.index
        ps_df_with_units = ps_df.merge(field_scan_units, on='mask_id', how='left').drop(columns=[
            'mask_id', 'session', 'scan_idx', 'field'
        ])
        
        coreg_units = coreg[
            (coreg['session']==session) & 
            (coreg['scan_idx']==scan_idx) & 
            (coreg['field'] == field)
        ][['target_id', 'unit_id']]
        
        if len(coreg_units):
            ps_df_with_units = ps_df_with_units.merge(coreg_units, on='unit_id').rename(
                columns={
                    'target_id': 'auto_match_cave_nuclei_id', 
                    'cave_ids': 'manual_match_cave_nuclei_id'
                }
            )
        
        description = {x: "Placeholder" for x in ps_df_with_units.columns}
        new_ps = create_new_plane_segmentation(ps, ps_df_with_units, description)
        image_segmentation.plane_segmentations.add(new_ps)
        
    return nwb

    

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"]

In [275]:
dfs = update_microns_nwb_file(microns_data, used_cache_coregistration_table=True)

 75%|████████████████████████████████████████████████████████████████████████████████████████████▎                              | 6/8 [00:32<00:10,  5.33s/it]


KeyError: 'unit_id'