In [None]:
import os, pickle
import numpy as np
import pandas as pd
from copy import deepcopy
from tqdm import tqdm as tqdm

In [None]:
from allensdk.core.brain_observatory_cache import BrainObservatoryCache

In [None]:
DATA_DIR = '/storage/abarbu/allen-brain-observatory/visual-coding-2p/'
MANIFEST_FILE = os.path.join(DATA_DIR, 'manifest.json')
boc = BrainObservatoryCache(manifest_file=MANIFEST_FILE)

In [None]:
BAD_CONTAINER_IDS = [511510998, 511510681, 517328083, 527676429, 527550471, 530243910, 570278595, 571039045, 
                     585905043, 587695553, 596780703, 598134911, 599587151, 605113106]

In [None]:
experiments = boc.get_ophys_experiments(session_types=['three_session_B'])

In [None]:
next(iter(experiments))

#### Notes on Dataset Structure
* 119 Images (118 Natural Scenes; 1 Gray Screen)
* 5950 Trials (50 Trials Per Image)
* 6 Visual Areas
* 12 Cre Lines
* ~ 4/5 Cortical Layers, approximated by Imaging Depth

### Brain Observatory Responses by Cell

#### Parsing Procedural Example

In [None]:
experiment = deepcopy(next(iter(experiments)))
experiment_id = experiment.pop('id')
experiment['experiment_id'] = experiment_id

events = boc.get_ophys_experiment_events(experiment_id)
data = boc.get_ophys_experiment_data(experiment_id)
stim_table = data.get_stimulus_table('natural_scenes')
frames = np.unique(stim_table.frame)

experiment_metadata = data.get_metadata()
cell_specimens = data.get_cell_specimen_ids()

metadata_dict = {}
for cell_specimen_index, cell_specimen_id in enumerate(cell_specimens):
        metadata_dict[cell_specimen_id] = {**experiment, **experiment_metadata}

In [None]:
metadata_dict[next(iter(metadata_dict))]

In [None]:
response_dict = {}

all_trials = False

if not all_trials: 
    response = np.zeros((len(frames), events.shape[0]))

for frame in frames:
    frame_table = stim_table[stim_table.frame==frame]
    trials = np.array([np.sum(events[:,row.start:row.end], axis=1) for i, row in frame_table.iterrows()])
    
    if not all_trials:
        response[frame] = np.mean(trials, axis=0)  

if not all_trials:
    for cell_specimen_index, cell_specimen_id in enumerate(cell_specimens):
        response_dict[cell_specimen_id] = response[:,cell_specimen_index]

In [None]:
response.shape, response_dict[next(iter(response_dict))].shape

#### Dataset Parsing

In [None]:
data_saver_dir = 'response_arrays/'

In [None]:
def get_response_data_from_experiments(all_trials=False, return_metadata=False):
    experiments = boc.get_ophys_experiments(session_types=['three_session_B'])
        
    response_dict = {}
    metadata_dict = {}
    with tqdm(experiments) as pbar:
        for experiment in experiments:
            if experiment['experiment_container_id'] in BAD_CONTAINER_IDS:
                continue
            experiment_id = experiment.pop('id')
            experiment['experiment_id'] = experiment_id
            pbar.set_description("Processing Experiment {}; {} Trials"
                                 .format(experiment_id, 'Averaging' if not all_trials else 'Appending All'))
            

            events = boc.get_ophys_experiment_events(experiment_id)
            data = boc.get_ophys_experiment_data(experiment_id)
            stim_table = data.get_stimulus_table('natural_scenes')
            frames = np.unique(stim_table.frame)
            
            experiment_metadata = data.get_metadata()
            cell_specimens = data.get_cell_specimen_ids()

            if all_trials:
                response_temp_list = []
            if not all_trials: 
                response = np.zeros((len(frames), events.shape[0]))

            for frame in frames:
                frame_table = stim_table[stim_table.frame==frame]
                trials = np.array([np.sum(events[:,row.start:row.end], axis=1) for i, row in frame_table.iterrows()])

                if all_trials:
                    response_temp_list.append(trials)
                if not all_trials:
                    response[frame] = np.mean(trials, axis=0)  

            if all_trials:
                frames_sort_index = np.argsort(frames)

                response = [response_temp_list[i] for i in frames_sort_index]
                trials = [frames[i]*np.ones(response_temp_list[frames[i]].shape[0]) for i in frames_sort_index]

                # put grey screen (frame == -1) at the end of the array, should be sorted to first
                grey = response.pop(0)
                response.append(grey)  

                grey_trial = trials.pop(0)
                trials.append(grey_trial)
                trials = np.hstack(trials)
                response = np.vstack(response)  # [stim_table.shape[0], events.shape[0]]

            for cell_specimen_index, cell_specimen_id in enumerate(cell_specimens):
                response_dict[cell_specimen_id] = response[:,cell_specimen_index]
                metadata_dict[cell_specimen_id] = {**experiment, **experiment_metadata}
            
            pbar.update(1)

    return response_dict

In [None]:
output_file = os.path.join(output_dir, data_saver_dir, 'response_bytrial_bycell.pkl')
if os.path.exists(output_file):
    response_dict_bytrial = pickle.load(open(output_file, 'rb'))

if not os.path.exists(output_file):
    response_dict_bytrial = get_response_data_from_experiments(all_trials=True)
    
    with open(output_file, 'wb') as file:
        pickle.dump(response_dict, file)

In [None]:
output_file = os.path.join(output_dir, data_saver_dir, 'response_average_bycell.pkl')
if os.path.exists(output_file):
    response_dict_avg = pickle.load(open(output_file, 'rb'))

if not os.path.exists(output_file):
    response_dict_avg = get_response_data_from_experiments(all_trials=False)
    
    with open(output_file, 'wb') as file:
        pickle.dump(response_dict_avg, file)

In [None]:
output_file = os.path.join(output_dir, 'cell_response_average.pkl')
if not os.path.exists(output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(response_dict_avg, file)

In [None]:
def get_cell_metadata_from_experiments():
    experiments = boc.get_ophys_experiments(session_types=['three_session_B'])
        
    metadata_dict = {}
    with tqdm(experiments) as pbar:
        for experiment in experiments:
            if experiment['experiment_container_id'] in BAD_CONTAINER_IDS:
                continue
            experiment_id = experiment.pop('id')
            experiment['experiment_id'] = experiment_id 

            data = boc.get_ophys_experiment_data(experiment_id)
            experiment_metadata = data.get_metadata()
            cell_specimens = data.get_cell_specimen_ids()

            for cell_specimen_index, cell_specimen_id in enumerate(cell_specimens):
                metadata_dict[cell_specimen_id] = {**experiment, **experiment_metadata}
            
            pbar.update(1)

    return metadata_dict

In [None]:
output_file = os.path.join(output_dir, 'experiment_data.pkl')
if os.path.exists(output_file):
    metadata_dict = pickle.load(open(output_file, 'rb'))

if not os.path.exists(output_file):
    metadata_dict = get_cell_metadata_from_experiments()
    
    with open(output_file, 'wb') as file:
        pickle.dump(metadata_dict, file)

Cell_specimens is a database that contains many useful pre-computed metrics, aggregated by the Allen Institute.

In [None]:
cell_specimen_data = pd.DataFrame(boc.get_cell_specimens())

In [None]:
metadata_dictlist = []
for cell_id in metadata_dict:
    metadata_dict[cell_id]['cell_specimen_id'] = cell_id
    metadata_dictlist.append(metadata_dict[cell_id])
metadata_df = pd.DataFrame(metadata_dictlist)

metadata_df['area'] = metadata_df['targeted_structure']

def which_layer(depth):
    if depth < 200:
        return 'layer23'
    if depth >= 200 and depth < 300:
        return 'layer4'
    if depth >= 300 and depth < 500:
        return 'layer5'
    if depth >= 500:
        return 'layer6'
    
metadata_df['layer'] = metadata_df['imaging_depth'].apply(lambda x: which_layer(x))
metadata_df = metadata_df.merge(cell_specimen_data, 
                                on = list(set.intersection(set(cell_specimen_data.columns), set(metadata_df.columns))))
metadata_df['neural_site'] = metadata_df['area'] + '_' +  metadata_df['layer']

output_file = os.path.join(output_dir, 'cell_metadata.csv')
if not os.path.exists(output_file):
    metadata_df.to_csv(output_file, index = None)

In [None]:
response_dflist_avg = []
for cell_id in tqdm(response_dict_avg):
    number_of_stimuli = len(response_dict_avg[cell_id])
    incoming_df = pd.DataFrame({'cell_specimen_id': [cell_id] * number_of_stimuli, 
                                'stimulus': range(1,number_of_stimuli+1), 
                                'response': response_dict_avg[cell_id]})
    response_dflist_avg.append(incoming_df)
response_df_avg = pd.concat(response_dflist_avg)

In [None]:
cell_data_combo_df = response_df_avg.merge(metadata_df, on='cell_specimen_id')

### Brain Observatory Responses by Site

#### Notes on Dictionary Structure
* [Visual Area] > [Cre Line] > [Imaging Depth]

In [None]:
output_dir = './'
data_saver_dir = 'response_data/'

In [None]:
def get_response_data_from_experiments(all_trials=False):

    response_dict = {}
    with tqdm(experiments) as pbar:
        for experiment in experiments:
            if experiment['experiment_container_id'] in BAD_CONTAINER_IDS:
                continue
            experiment_id = experiment['id']
            #print("Processing Experiment ", experiment_id)
            pbar.set_description("Processing Experiment {}; {} Trials"
                                 .format(experiment_id, 'Averaging' if not all_trials else 'Appending All'))
            

            cre = experiment['cre_line'].split('-')[0]
            depth = experiment['imaging_depth']
            area = experiment['targeted_structure']

            response_dict[area] = response_dict.get(area, {})
            response_dict[area][cre] = response_dict[area].get(cre, {})
            response_dict[area][cre][depth] = response_dict[area][cre].get(depth, [])

            events = boc.get_ophys_experiment_events(experiment_id)
            data = boc.get_ophys_experiment_data(experiment_id)
            stim_table = data.get_stimulus_table('natural_scenes')
            frames = np.unique(stim_table.frame)

            if all_trials:
                response_temp_list = []
            if not all_trials: 
                response = np.zeros((len(frames), events.shape[0]))

            for frame in frames:
                frame_table = stim_table[stim_table.frame==frame]
                trials = np.array([np.sum(events[:,row.start:row.end], axis=1) for i, row in frame_table.iterrows()])

                if all_trials:
                    response_temp_list.append(trials)
                if not all_trials:
                    response[frame] = np.mean(trials, axis=0)  

            if all_trials:
                frames_sort_index = np.argsort(frames)

                response = [response_temp_list[i] for i in frames_sort_index]
                trials = [frames[i]*np.ones(response_temp_list[frames[i]].shape[0]) for i in frames_sort_index]

                # put grey screen (frame == -1) at the end of the array, should be sorted to first
                grey = response.pop(0)
                response.append(grey)  

                grey_trial = trials.pop(0)
                trials.append(grey_trial)
                trials = np.hstack(trials)
                response = np.vstack(response)  # [stim_table.shape[0], events.shape[0]]

                # add list of trial ids here
                response_dict[area][cre][depth].append((response, trials))

            if not all_trials:
                response_dict[area][cre][depth].append(response)
            
            pbar.update(1)

    return response_dict

In [None]:
output_file = os.path.join(output_dir, data_saver_dir, 'response_bytrial_bydepth.pkl')
if os.path.exists(output_file):
    response_dict_bytrial_bydepth = pickle.load(open(output_file, 'rb'))

if not os.path.exists(output_file):
    response_dict_bytrial_bydepth = get_response_data_from_experiments(all_trials=True)
    with open(output_file, 'rb') as file:
        pickle.dump(file)

In [None]:
output_file = os.path.join(output_dir, data_saver_dir, 'response_average_bydepth.pkl')
if os.path.exists(output_file):
    response_dict = pickle.load(open(output_file, 'rb'))

if not os.path.exists(output_file):
    response_dict = get_response_data_from_experiments(all_trials=False)
    
    with open(output_file, 'wb') as file:
        pickle.dump(response_dict, file)

In [None]:
def response_dict_by_layer_from_depth(response_dict):
    
    def which_layer(depth):
        if depth < 200:
            return 'layer23'
        if depth >= 200 and depth < 300:
            return 'layer4'
        if depth >= 300 and depth < 500:
            return 'layer5'
        if depth >= 500:
            return 'layer6'
        
        
    sample_area = next(iter(response_dict))
    sample_creline = next(iter(response_dict[sample_area]))
    sample_depth = next(iter(response_dict[sample_area][sample_creline]))
    sample_response_dict = response_dict[sample_area][sample_creline][sample_depth][0]
    
    all_trials = type(sample_response_dict)==tuple     
    if all_trials:
        print("Condition: Trials Unconcatenated")
    if not all_trials:
        print("Condition: Trials Averaged")

    trials_list = []
    new_response_dict = {}
    
    for area in response_dict.keys():
        new_response_dict[area] = {}
        for cre in response_dict[area].keys():

            depth_dict = {'layer23': [],
                          'layer4': [],
                          'layer5': [],
                          'layer6': []}
            
            for depth in response_dict[area][cre].keys():
            
                if cre=='Nr5a1' or cre=='Scnn1a':
                    depth_dict['layer4'] += response_dict[area][cre][depth]
                else:
                    depth_dict[which_layer(depth)] += response_dict[area][cre][depth]
                
                
            new_response_dict[area][cre] = {}
            
            for layer in depth_dict.keys():
                if len(depth_dict[layer])!=0: 

                    if all_trials:
                        trials = [r[1] for r in depth_dict[layer]]
                        trials = np.vstack(trials)
                        trials_list.append(trials[0])
                        depth_dict[layer] = np.hstack([r[0] for r in depth_dict[layer]])
                        
                    if not all_trials:
                        depth_dict[layer] = np.hstack(depth_dict[layer])

                    new_response_dict[area][cre][layer] = depth_dict[layer]

    if all_trials:
        trials = np.vstack(trials_list)
        trials_list.append(trials[0])
        
        return new_response_dict
    
    if not all_trials:
        return new_response_dict

In [None]:
response_dict_bytrial_bysite = response_dict_by_layer_from_depth(response_dict_bytrial)

output_file = os.path.join(output_dir, data_saver_dir, 'response_bytrial_bysite.pkl')
if not os.path.exists(output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(response_dict_bytrial, file)

In [None]:
response_dict_bytrial_bysite = response_dict_by_layer_from_depth(response_dict)

output_file = os.path.join(output_dir, data_saver_dir, 'response_average_bysite.pkl')
if not os.path.exists(output_file):
    with open(output_file, 'wb') as file:
        pickle.dump(response_dict, file)