# Cell table and cell matching example

In [15]:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

from visual_behavior.data_access import loading

## Here is the function to get the cell table.
If you don't pass any arguements, it will get the cell table for all released experiments.  
Optionally, you can pass a list of `ophys_experiment_id`s to get the cell table for a subset of experiments

In [3]:
cell_table = loading.get_cell_table()

ophys_session_table.csv: 100%|██████████| 165k/165k [00:00<00:00, 1.32MMB/s]
behavior_session_table.csv: 100%|██████████| 885k/885k [00:00<00:00, 3.47MMB/s] 
ophys_experiment_table.csv: 100%|██████████| 336k/336k [00:00<00:00, 2.27MMB/s]


In [4]:
len(cell_table)

92209

In [7]:
cell_table.sample(5)

Unnamed: 0,cell_roi_id,cell_specimen_id,ophys_experiment_id,x,y,width,height,valid_roi,mask_matrix,max_correction_up,max_correction_down,max_correction_right,max_correction_left,mask_image_plane,ophys_cell_segmentation_run_id
29671,1080880535,1086671328,885067826,20,378,19,23,True,"[[False, False, False, True, True, False, Fals...",10.0,8.0,6.0,16.0,1,1080726826
55911,1080746547,1086612815,944115804,302,314,15,15,True,"[[False, False, False, False, False, False, Fa...",14.0,7.0,6.0,8.0,0,1080677241
67268,1080750977,1086621772,973927944,419,70,13,17,True,"[[False, False, False, True, True, True, True,...",9.0,18.0,13.0,11.0,0,1080679835
16977,1080875717,1086537660,856096766,205,107,21,24,True,"[[False, False, False, False, False, False, Fa...",15.0,17.0,16.0,30.0,1,1080785783
20440,1080884452,1086666681,875045489,43,461,17,17,True,"[[False, False, False, False, True, False, Fal...",5.0,8.0,8.0,7.0,0,1080772389


## quick cell matching example 
Here's one method for getting matched cells across experiments. We'll simply get the intersection of two sets.  

### First we will get the experiment table

In [8]:
from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache as bpc

data_storage_directory = '/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/production_cache'

cache = bpc.from_s3_cache(cache_dir=data_storage_directory)
experiment_table = cache.get_ophys_experiment_table()

### Then randomly select one mouse

In [11]:
mouse_id = experiment_table.sample(random_state=0).iloc[0]['mouse_id']

### Then get all experiments for this mouse

In [17]:
sessions = experiment_table.query('mouse_id == @mouse_id').sort_values(by='date_of_acquisition')
sessions

Unnamed: 0_level_0,equipment_name,full_genotype,mouse_id,reporter_line,driver_line,sex,age_in_days,cre_line,indicator,session_number,prior_exposures_to_session_type,prior_exposures_to_image_set,prior_exposures_to_omissions,ophys_session_id,behavior_session_id,ophys_container_id,project_code,imaging_depth,targeted_structure,date_of_acquisition,session_type,file_id
ophys_experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
792813858,CAM2P.4,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,412366,Ai93(TITL-GCaMP6f),"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,147.0,Slc17a7-IRES2-Cre,GCaMP6f,1.0,0,11.0,1,792327341,792477679,814796612,VisualBehavior,375,VISp,2018-12-10 16:34:08.000000,OPHYS_1_images_A,859689264
794381992,CAM2P.4,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,412366,Ai93(TITL-GCaMP6f),"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,149.0,Slc17a7-IRES2-Cre,GCaMP6f,3.0,0,13.0,3,793857113,794071128,814796612,VisualBehavior,375,VISp,2018-12-12 16:00:43.000000,OPHYS_3_images_A,859689227
795076128,CAM2P.4,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,412366,Ai93(TITL-GCaMP6f),"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,150.0,Slc17a7-IRES2-Cre,GCaMP6f,4.0,0,0.0,4,794474159,794673280,814796612,VisualBehavior,375,VISp,2018-12-13 15:42:47.000000,OPHYS_4_images_B,859689336
795952471,CAM2P.4,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,412366,Ai93(TITL-GCaMP6f),"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,151.0,Slc17a7-IRES2-Cre,GCaMP6f,5.0,0,1.0,5,795217244,795431009,814796612,VisualBehavior,375,VISp,2018-12-14 16:14:12.000000,OPHYS_5_images_B_passive,859685777
796105304,CAM2P.4,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,412366,Ai93(TITL-GCaMP6f),"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,152.0,Slc17a7-IRES2-Cre,GCaMP6f,6.0,0,2.0,6,796019065,796031509,814796612,VisualBehavior,375,VISp,2018-12-15 16:59:41.000000,OPHYS_6_images_B,859681089
797255551,CAM2P.4,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,412366,Ai93(TITL-GCaMP6f),"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,154.0,Slc17a7-IRES2-Cre,GCaMP6f,2.0,1,14.0,7,797078933,797170547,814796612,VisualBehavior,375,VISp,2018-12-17 23:37:12.000000,OPHYS_2_images_A_passive,859689353


### Then select the first two experiments
We will find all matching cells across these two experiments

In [19]:
experiment_ids_to_match = sessions.index[:2]
experiment_ids_to_match

Int64Index([792813858, 794381992], dtype='int64', name='ophys_experiment_id')

### Get the `cell_specimen_id`s for both of our experiments of interest, print the counts

In [24]:
cells_in_exp_0 = cell_table.query('ophys_experiment_id == {}'.format(experiment_ids_to_match[0]))['cell_specimen_id']
cells_in_exp_1 = cell_table.query('ophys_experiment_id == {}'.format(experiment_ids_to_match[1]))['cell_specimen_id']

print('there are {} cells in exp {} and {} cells in exp {}'.format(
    len(cells_in_exp_0), 
    experiment_ids_to_match[0],
    len(cells_in_exp_1),
    experiment_ids_to_match[1],
))

there are 208 cells in exp 792813858 and 199 cells in exp 794381992


### Get the number of matched cells
Matched cells are the intersection of the two sets

In [28]:
matched_cells = set(cells_in_exp_0).intersection(set(cells_in_exp_1))
print('there are {} matched cells across these two experiments'.format(len(matched_cells)))

there are 148 matched cells across these two experiments


### Print the full set of matched cells

In [29]:
print(matched_cells)

{1086536704, 1086535168, 1086537218, 1086508544, 1086539275, 1086512140, 1086505486, 1086495246, 1086495762, 1086506005, 1086502426, 1086521373, 1086524448, 1086512680, 1086497322, 1086518828, 1086528046, 1086523438, 1086521907, 1086540341, 1086496822, 1086507576, 1086499901, 1086539840, 1086506561, 1086497861, 1086534214, 1086501958, 1086500430, 1086495828, 1086516823, 1086538330, 1086503013, 1086528616, 1086499433, 1086496361, 1086525033, 1086514795, 1086509168, 1086504560, 1086510196, 1086522998, 1086501497, 1086511235, 1086494852, 1086505098, 1086533776, 1086514329, 1086498971, 1086503069, 1086500001, 1086532769, 1086519977, 1086507180, 1086539950, 1086505653, 1086496440, 1086530232, 1086496955, 1086503614, 1086495935, 1086526146, 1086524622, 1086539471, 1086529744, 1086515918, 1086506194, 1086527192, 1086511833, 1086496761, 1086500576, 1086495461, 1086528229, 1086502634, 1086529260, 1086536430, 1086499074, 1086528777, 1086514958, 1086497041, 1086509330, 1086503191, 1086496536, 108

In [31]:
import os
savedir = '/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/production_cache'
cell_table.to_csv(os.path.join(savedir, 'cell_table.csv'), index=False)