In [1]:
import os, pickle
import numpy as np
import pandas as pd
from tqdm import tqdm as tqdm

In [2]:
from allensdk.core.brain_observatory_cache import BrainObservatoryCache

In [3]:
#DATA_DIR = '/data/allen-brain-observatory/visual-coding-2p/' # use the following path for AWS
#DATA_DIR = 'BrainObservatoryData'
DATA_DIR = '/storage/abarbu/allen-brain-observatory/visual-coding-2p/'
MANIFEST_FILE = os.path.join(DATA_DIR, 'manifest.json')
boc = BrainObservatoryCache(manifest_file=MANIFEST_FILE)

In [4]:
BAD_CONTAINER_IDS = [511510998, 511510681, 517328083, 527676429, 527550471, 530243910, 570278595, 571039045, 
                     585905043, 587695553, 596780703, 598134911, 599587151, 605113106]

In [5]:
experiments = boc.get_ophys_experiments(session_types=['three_session_B'])

In [6]:
next(iter(experiments))

{'id': 645474010,
 'imaging_depth': 375,
 'targeted_structure': 'VISl',
 'cre_line': 'Rbp4-Cre_KL100',
 'reporter_line': 'Ai93(TITL-GCaMP6f)',
 'acquisition_age_days': 112,
 'experiment_container_id': 643590699,
 'session_type': 'three_session_B',
 'donor_name': '344122',
 'specimen_name': 'Rbp4-Cre_KL100;Camk2a-tTA;Ai93-344122',
 'fail_eye_tracking': False}

#### Notes on Dataset Structure
* 119 Images (118 Natural Scenes; 1 Gray Screen)
* 5950 Trials (50 Trials Per Image)
* 6 Visual Areas
* 12 Cre Lines
* ~ 4/5 Cortical Layers, approximated by Imaging Depth

#### Notes on Dictionary Structure
* [Visual Area] > [Cre Line] > [Imaging Depth]

In [7]:
brain_data_dir = 'neural_data/'

In [8]:
response_dict_bytrial_bydepth = pickle.load(open(brain_data_dir + 'response_dict_bytrial_bydepth.pkl', 'rb'))
response_dict_bydepth = pickle.load(open(brain_data_dir + 'response_dict_average_bydepth.pkl', 'rb'))
response_dict_bytrial = pickle.load(open(brain_data_dir + 'response_dict_bytrial.pkl', 'rb'))
response_dict = pickle.load(open(brain_data_dir + 'response_dict_average.pkl', 'rb'))

#### Cell Specimen Metadata

In [9]:
len(experiments), len(boc.get_experiment_containers())

(456, 456)

In [10]:
cell_specimens = pd.DataFrame(boc.get_cell_specimens())

In [11]:
cell_specimens.columns

Index(['p_run_mod_sg', 'tlr1_id', 'image_sel_ns', 'tlr1_name', 'g_dsi_dg',
       'p_ns', 'specimen_id', 'p_dg', 'time_to_peak_sg', 'pref_dir_dg',
       'dsi_dg', 'rf_overlap_index_lsn', 'rf_distance_lsn', 'tld1_name',
       'osi_dg', 'pref_image_ns', 'experiment_container_id', 'peak_dff_sg',
       'osi_sg', 'rf_center_on_y_lsn', 'time_to_peak_ns', 'rf_chi2_lsn',
       'failed_experiment_container', 'reliability_ns', 'tld1_id',
       'reliability_sg', 'imaging_depth', 'run_mod_ns', 'rf_center_on_x_lsn',
       'area', 'peak_dff_dg', 'p_sg', 'rf_area_on_lsn', 'tld2_name',
       'pref_ori_sg', 'rf_area_off_lsn', 'p_run_mod_ns', 'pref_phase_sg',
       'rf_center_off_y_lsn', 'pref_tf_dg', 'tfdi_dg', 'cell_specimen_id',
       'reliability_dg', 'tld2_id', 'run_mod_dg', 'p_run_mod_dg',
       'reliability_nm1_a', 'reliability_nm1_c', 'reliability_nm1_b',
       'rf_center_off_x_lsn', 'peak_dff_ns', 'donor_full_genotype', 'all_stim',
       'run_mod_sg', 'pref_sf_sg', 'sfdi_sg', 'g_osi

In [12]:
print(cell_specimens.loc[0,:])

p_run_mod_sg                                                                 NaN
tlr1_id                                                                265943423
image_sel_ns                                                                 NaN
tlr1_name                                                     Ai93(TITL-GCaMP6f)
g_dsi_dg                                                                     NaN
p_ns                                                                         NaN
specimen_id                                                            502185555
p_dg                                                                         NaN
time_to_peak_sg                                                              NaN
pref_dir_dg                                                                  NaN
dsi_dg                                                                       NaN
rf_overlap_index_lsn                                                         NaN
rf_distance_lsn             

In [13]:
len(np.unique(np.array([exp['donor_name'] for exp in experiments])))

256

In [14]:
len(cell_specimens['specimen_id'].unique())

256

In [15]:
len(cell_specimens['cell_specimen_id'].unique())

63251

In [16]:
len(cell_specimens['donor_full_genotype'].unique())

22

In [17]:
cell_specimens['area'].unique()

array(['VISp', 'VISal', 'VISl', 'VISpm', 'VISam', 'VISrl'], dtype=object)

In [18]:
[(col_name, len(cell_specimens[col_name].unique())) for col_name in cell_specimens.columns if len(cell_specimens[col_name].unique()) == 256]

[('specimen_id', 256)]

In [19]:
cell_specimens['tld1_name'].unique()

array(['Scnn1a-Tg3-Cre', 'Rbp4-Cre_KL100', 'Rorb-IRES2-Cre',
       'Cux2-CreERT2', 'Emx1-IRES-Cre', 'Nr5a1-Cre', 'Sst-IRES-Cre',
       'Vip-IRES-Cre', 'Ntsr1-Cre_GN220', 'Fezf2-CreER',
       'Slc17a7-IRES2-Cre', 'Tlx3-Cre_PL56', 'Pvalb-IRES-Cre'],
      dtype=object)

In [20]:
cell_specimens['imaging_depth'].unique()

array([350, 375, 275, 175, 335, 300, 325, 320, 265, 250, 365, 285, 550,
       400, 276, 625, 570, 390, 195, 225, 200, 205, 185])

In [21]:
cell_specimens[~cell_specimens['p_ns'].isna()].shape

(40064, 60)

In [22]:
cell_specimens.shape

(63251, 60)

#### Experiment Information + Structure

In [23]:
next(iter(experiments))

{'id': 645474010,
 'imaging_depth': 375,
 'targeted_structure': 'VISl',
 'cre_line': 'Rbp4-Cre_KL100',
 'reporter_line': 'Ai93(TITL-GCaMP6f)',
 'acquisition_age_days': 112,
 'experiment_container_id': 643590699,
 'session_type': 'three_session_B',
 'donor_name': '344122',
 'specimen_name': 'Rbp4-Cre_KL100;Camk2a-tTA;Ai93-344122',
 'fail_eye_tracking': False}

In [24]:
events = boc.get_ophys_experiment_events(645474010)
data = boc.get_ophys_experiment_data(645474010)
stim_table = data.get_stimulus_table('natural_scenes')

In [25]:
events.shape

(25, 117484)

In [26]:
data.get_metadata()

{'sex': 'female',
 'targeted_structure': 'VISl',
 'ophys_experiment_id': 645474010,
 'experiment_container_id': 643590699,
 'excitation_lambda': '910 nanometers',
 'indicator': 'GCaMP6f',
 'fov': '400x400 microns (512 x 512 pixels)',
 'genotype': 'Rbp4-Cre_KL100/wt;Camk2a-tTA/wt;Ai93(TITL-GCaMP6f)/wt',
 'session_start_time': datetime.datetime(2017, 10, 31, 10, 17, 36),
 'session_type': 'three_session_B',
 'specimen_name': 'Rbp4-Cre_KL100;Camk2a-tTA;Ai93-344122',
 'cre_line': 'Rbp4-Cre_KL100/wt',
 'imaging_depth_um': 375,
 'age_days': 113,
 'device': 'Nikon A1R-MP multiphoton microscope',
 'device_name': 'CAM2P.4',
 'pipeline_version': '3.0'}

In [27]:
data.get_cell_specimen_ids()

array([662220084, 662220119, 662220063, 662220185, 662220021, 662220048,
       662220126, 662220112, 662220105, 662220098, 662220091, 662220001,
       662220053, 662220008, 662220209, 662219987, 662220191, 662220037,
       662220173, 662219952, 662219966, 662220221, 662219980, 662219994,
       662219973])

In [28]:
data.number_of_cells

25

In [29]:
len(data.get_cell_specimen_ids())

25

In [30]:
# number of trials (119 images * 50 trials) by number of neurons in sample from area VISl, cre_line Rbp4 at imaging depth of 375
# this array is the response of each cell to each trial, unaveraged
response_dict_bytrial_bydepth['VISl']['Rbp4'][375][0][0].shape

(5950, 25)

In [31]:
# the second dimension of response_dict_bytrial is an array that lists the images number for each of the 119 images + 50 trials
response_dict_bytrial_bydepth['VISl']['Rbp4'][375][0][1][0:-1:50]

array([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,
        11.,  12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,
        22.,  23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,
        33.,  34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,
        44.,  45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,
        55.,  56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,
        66.,  67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,
        77.,  78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,
        88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,
        99., 100., 101., 102., 103., 104., 105., 106., 107., 108., 109.,
       110., 111., 112., 113., 114., 115., 116., 117.,  -1.])

In [32]:
# number of images (119 images) by number of neurons in sample from area VISl, cre_line Rbp4 at imaging depth of 375
response_dict_bydepth['VISl']['Rbp4'][375][0].shape

(119, 25)

In [33]:
df = cell_specimens
#df[(df['area'] == 'VISl') & (df['tld1_name'] == 'Rbp4-Cre_KL100') & (df['imaging_depth'] == 375)].shape
df[(df['area'] == 'VISl') & (df['tld1_name'].str.contains('Rbp4')) & (df['imaging_depth'] == 375)]['experiment_container_id'].unique()

array([511510763, 511511089, 572606380, 573378109, 575771818, 601790879,
       643590699])

In [34]:
print(len(response_dict_bydepth['VISl']['Rbp4'][375]))
print([response_dict_bydepth['VISl']['Rbp4'][375][i].shape[1] for i in range(len(response_dict_bydepth['VISl']['Rbp4'][375]))])
np.array([response_dict_bydepth['VISl']['Rbp4'][375][i].shape[1] for i in range(len(response_dict_bydepth['VISl']['Rbp4'][375]))]).sum()

7
[25, 27, 54, 36, 45, 68, 53]


308

In [35]:
response_dict['VISl']['Rbp4']['layer5'].shape

(119, 308)

In [36]:
response_dict_bytrial['VISl']['Rbp4']['layer5'].shape

(5950, 308)

In [37]:
sample_area = next(iter(response_dict_bydepth))
sample_creline = next(iter(response_dict_bydepth[sample_area]))
sample_depth = next(iter(response_dict_bydepth[sample_area][sample_creline]))
sample_response_dict = response_dict_bydepth[sample_area][sample_creline][sample_depth][0]

print(response_dict_bydepth.keys())
print(response_dict_bydepth[sample_area].keys())
print(response_dict_bydepth[sample_area][sample_creline].keys())

dict_keys(['VISl', 'VISpm', 'VISam', 'VISp', 'VISrl', 'VISal'])
dict_keys(['Rbp4', 'Slc17a7', 'Emx1', 'Nr5a1', 'Fezf2', 'Rorb', 'Ntsr1', 'Cux2', 'Vip', 'Sst', 'Pvalb', 'Tlx3'])
dict_keys([375])


In [38]:
print(response_dict.keys())
print(response_dict[list(response_dict.keys())[0]].keys())

dict_keys(['VISl', 'VISpm', 'VISam', 'VISp', 'VISrl', 'VISal'])
dict_keys(['Rbp4', 'Slc17a7', 'Emx1', 'Nr5a1', 'Fezf2', 'Rorb', 'Ntsr1', 'Cux2', 'Vip', 'Sst', 'Pvalb', 'Tlx3'])


In [39]:
response_dict['VISl']['Rbp4']['layer5'].shape

(119, 308)