# MICrONS NWB co-registration and visualization

This notebook uses the `dandi`-hosted [MICrONS functional data](https://dandiarchive.org/dandiset/000402/draft) and `bossdb`-hosted [MICrONS structural data](https://bossdb.org/microns/minnie) to examine and visualize the co-registered cells.


## Data Access

Here we use the DANDI client, CAVE client, and CloudVolume client to establish access to the functional data, annotation data, and structural data respectively. 

In [1]:
from dandi.dandiapi import DandiAPIClient
from caveclient import CAVEclient
from cloudvolume import CloudVolume

from fsspec.implementations.cached import CachingFileSystem
from fsspec import filesystem
from h5py import File
from pynwb import NWBHDF5IO
from pynwb.file import NWBFile


from tqdm import tqdm
import pandas as pd

import pynapple as nap


In [None]:
cave = CAVEclient("minnie65_phase3_v1")

### Sub-task

Find the best NWB file to use

In [3]:
cave.materialize.get_tables()

['allen_column_bodor_surround_inhib',
 'cg_cell_type_calls',
 'allen_v1_column_pyc_proof',
 'func_unit_em_match_release',
 'allen_class_type_svm_v0',
 'allen_subclass_type_svm_v0',
 'allen_v1_column_proofreading',
 'l5_mc_cg',
 'allen_soma_coarse_cell_class_model_v1',
 'synapses_pni_2',
 'nucleus_detection_v0',
 'allen_minnie_extra_types',
 'aibs_soma_nuc_metamodel_preds_v117',
 'bodor_pt_target_proofread',
 'layer5_it_proofreading',
 'aibs_metamodel_mtypes_v661',
 'allen_v1_column_thalamic',
 'baylor_gnn_cell_type_fine_model_v2',
 'apl_functional_coreg',
 'nucleus_alternative_points',
 'allen_column_mtypes_v2',
 'column_atype',
 'connectivity_groups_v507',
 'new_table_test',
 'proofreading_functional_coreg_v2',
 'proofreading_functional_coreg',
 'bodor_pt_cells',
 'functional_coreg',
 'proofreading_apl_multisoma',
 'aibs_metamodel_mtypes_v661_v2',
 'proofreading_status_public_release',
 'allen_column_l5it_types',
 'allen_v1_column_types_slanted',
 'baylor_e_i_model_v1',
 'allen_column

In [4]:
coreg = cave.materialize.query_table("apl_functional_coreg_forward_v5")

In [5]:
groups = coreg.groupby(['session', 'scan_idx'])
print("(session, scan) n_matches  n_seg_ids")
for key, item in groups:
    print(key, item.shape[0], item['pt_root_id'].unique().shape[0])

(session, scan) n_matches  n_seg_ids
(4, 7) 3499 3441
(4, 9) 3537 3466
(4, 10) 4291 4206
(5, 3) 3564 3507
(5, 6) 3886 3831
(5, 7) 3707 3654
(6, 2) 3634 3605
(6, 4) 3692 3661
(6, 6) 3515 3472
(6, 7) 3614 3579
(7, 3) 3585 3547
(7, 4) 5441 5385
(7, 5) 3068 3038
(8, 5) 4301 3872
(8, 7) 3020 2673
(8, 9) 3204 2809
(9, 3) 3514 2848
(9, 4) 3494 2804
(9, 6) 2307 2305


Session 7 scan 4 has the highest amount of matches

In [6]:
coreg.head()


Unnamed: 0,id_ref,created_ref,valid_ref,volume,pt_supervoxel_id,pt_root_id,id,created,valid,target_id,session,scan_idx,unit_id,field,residual,score,pt_position,bb_start_position,bb_end_position
0,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,67007,2023-04-06 15:05:50.491194+00:00,t,255686,9,6,1913,2,8.941486,4.206369,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
1,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,65810,2023-04-06 15:05:49.502144+00:00,t,255686,9,4,7548,6,9.380808,-5.165012,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
2,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,54982,2023-04-06 15:05:15.938520+00:00,t,255686,8,7,4778,4,3.763804,5.341737,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
3,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,46412,2023-04-06 15:04:43.197444+00:00,t,255686,7,5,1946,2,4.071072,3.306988,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"
4,255686,2020-09-28 22:40:42.632533+00:00,t,297.846047,88954888800920543,864691135568539372,11823,2023-04-06 15:03:01.038836+00:00,t,255686,5,3,1994,2,5.419972,6.644126,"[175760, 126480, 15504]","[nan, nan, nan]","[nan, nan, nan]"


### Subtask 3: Explore NWB File

In [7]:
dandiset_id = "000402"
file_path = "sub-17797/sub-17797_ses-7-scan-4_behavior+image+ophys.nwb" # file size ~67GB

# Get the location of the file on DANDI
with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(file_path)
    s3_url = asset.get_content_url(follow_redirects=1, strip_query=True)

In [None]:
# Create a virtual filesystem based on the http protocol and use caching to save accessed data to RAM.
fs = filesystem("http")
file_system = fs.open(s3_url, "rb")
file = File(file_system, mode="r")
# Open the file with NWBHDF5IO
io = NWBHDF5IO(file=file, load_namespaces=True)

microns_data = io.read()

In [None]:
microns_data

### Update co-registration to automated table

In [None]:
image_segmentation = microns_data.processing["ophys"].data_interfaces["ImageSegmentation"]
image_segmentation

In [None]:
ps4 = image_segmentation["PlaneSegmentation4"][:]

In [None]:
ps4

In [None]:
scan_unit = pd.read_pickle("ScanUnit.pkl")

In [None]:
session_units = scan_unit[(scan_unit['session']==7) & (scan_unit['scan_idx']==4) & (scan_unit['field'] == 4)]
#session_units = scan_unit[(scan_unit['session']==7) & (scan_unit['scan_idx']==4)]

In [None]:
ps4['mask_id']=ps4.index

In [None]:
print(session_units.shape)
print(ps4.shape)

In [None]:
ps4_units = ps4.merge(session_units, on='mask_id', how='left')

In [None]:
ps4_units

In [None]:
ps4.head()

In [None]:
coreg.head()

In [None]:
coreg_units = coreg[(coreg['session']==7) & (coreg['scan_idx']==4) & (coreg['field'] == 4)]


In [None]:
coreg_units.head()

In [None]:
coreg_units.columns


In [None]:
coreg_match = ps4_units.merge(coreg_units, on='unit_id')

In [None]:
coreg_match.columns

In [None]:
coreg_match.iloc[50]

In [None]:
microns_data

In [None]:
dfs = update_microns_nwb_file(microns_data, used_cache_coregistration_table=True)

In [None]:
len(dfs)

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations

In [None]:
#microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations.pop("PlaneSegmentation1")

In [None]:
from pynwb.ophys import PlaneSegmentation

In [None]:
def create_new_plane_segmentation(old, df, descriptions):
    ps = PlaneSegmentation(
        name=old.name, 
        description=old.description, 
        imaging_plane=old.imaging_plane,
        id=df.index.tolist()
    )
    
    for col in df.columns:
        if col in old.colnames:
            old_col = find_column_by_name(old, col)
            ps.add_column(name=old_col.name, description=old_col.description, data=df[col].tolist())
        else:
            ps.add_column(name=col, description=descriptions[col], data=df[col].tolist())
    return ps
        

def find_column_by_name(table,col_name):
    for c in table.columns:
        if c.name == col_name:
            return c

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations["PlaneSegmentation2"].colnames

In [None]:
old_ps1 = microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations["PlaneSegmentation1"]

In [None]:
ps1 = PlaneSegmentation(name=old_ps1.name, description=old_ps1.description, imaging_plane=old_ps1.imaging_plane)

In [None]:
old_col = find_column_by_name(old_ps1, "image_mask")
old_col.description

In [None]:
# for each col in your new df;
#     if new df column existed in old dataframe,  useold description
#     otherwise, make a new description
#     ps1.add_column(name=..., description=..., data=...)
    # numpy array or list

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].add(ps1)

In [None]:
description = {x: "Placeholder" for x in dfs[1].columns}

In [None]:
old_ps2 = microns_data.processing["ophys"].data_interfaces["ImageSegmentation"].plane_segmentations["PlaneSegmentation2"]
ps = create_new_plane_segmentation(old_ps2, dfs[1], description)

In [None]:
ps

In [None]:
len(dfs[1])

In [None]:
dfs[1]

In [None]:
def update_microns_nwb_file(
    nwb: NWBFile,
    coregistration_table="apl_functional_coreg_forward_v5",
    scan_unit_path="./ScanUnit.pkl",
    add_scan_units_to_nwb=True,
    used_cache_coregistration_table=False,
    cache_coregistration_table_path= "./apl_functional_coreg_forward_v5.pkl",
    
):
    if used_cache_coregistration_table:
        coreg = pd.read_pickle(cache_coregistration_table_path)
    else:
        cave = CAVEclient("minnie65_phase3_v1")
        coreg = cave.materialize.query_table(coregistration_table)
        
    session, scan_idx = int(nwb.session_id.split('-')[0]), int(nwb.session_id.split('-')[2])
    scan_units = pd.read_pickle(scan_unit_path)
    scan_units = scan_unit[(scan_unit['session']==session) & (scan_unit['scan_idx']==scan_idx)]
    
    image_segmentation = nwb.processing["ophys"].data_interfaces["ImageSegmentation"]
    
    all_ps = list(image_segmentation.plane_segmentations)
    for ps_name in tqdm(all_ps):
        
        ps = image_segmentation.plane_segmentations.pop(ps_name)
        field = int(ps_name[-1])
        field_scan_units = scan_units[scan_units['field'] == field]
        ps_df = ps[:]
        ps_df['mask_id'] = ps_df.index
        ps_df_with_units = ps_df.merge(field_scan_units, on='mask_id', how='left').drop(columns=[
            'mask_id', 'session', 'scan_idx', 'field'
        ])
        
        coreg_units = coreg[
            (coreg['session']==session) & 
            (coreg['scan_idx']==scan_idx) & 
            (coreg['field'] == field)
        ][['target_id', 'unit_id']]
        
        if len(coreg_units):
            ps_df_with_units = ps_df_with_units.merge(coreg_units, on='unit_id').rename(
                columns={
                    'target_id': 'auto_match_cave_nuclei_id', 
                    'cave_ids': 'manual_match_cave_nuclei_id'
                }
            )
        
        description = {x: "Placeholder" for x in ps_df_with_units.columns}
        new_ps = create_new_plane_segmentation(ps, ps_df_with_units, description)
        image_segmentation.plane_segmentations.add(new_ps)
        
    return nwb

    

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"]

In [None]:
dfs = update_microns_nwb_file(microns_data, used_cache_coregistration_table=True)

In [None]:
microns_data.processing["ophys"].data_interfaces["ImageSegmentation"]["PlaneSegmentation5"][:]

In [None]:
import pynapple as nap