In [12]:
import os
import sys
import numpy as np
import pandas as pd
import csv

In [2]:
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache
from allensdk.brain_observatory.ecephys.visualization import raster_plot

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# cache directory path, it determines where downloaded data will be stored
output_dir = '/ecephys_cache_dir/'
manifest_path = os.path.join(output_dir, "manifest.json")
cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)
print(cache.get_all_session_types())

['brain_observatory_1.1', 'functional_connectivity']


In [8]:
download_from_warehouse = False

In [10]:
dataset_folder = 'Q:\\Personal\\Irina\\projects\\isttc\\results\\allen_mice\\dataset\\'

### Download data to local drive 

already done

In [9]:
# load all sessions to local drive 
if download_from_warehouse:
    for session_id, row in brain_observatory_type_sessions.iterrows():
    
        truncated_file = True
        directory = os.path.join(output_dir + '/session_' + str(session_id))
    
        while truncated_file:
            session = cache.get_session_data(session_id)
            try:
                print(session_id)
                print(session.specimen_name)
                truncated_file = False
            except OSError:
                shutil.rmtree(directory)
                print(" Truncated spikes file, re-downloading")

### Get sessions data

Sessions are already loaded on local drive.

In [7]:
# cache directory path, it determines where downloaded data will be stored
# output_dir = '/ecephys_cache_dir/'
# manifest_path = os.path.join(output_dir, "manifest.json")
# cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)
# print(cache.get_all_session_types())

# functional connecivity dataset contains 30 min spontaneous activity block
sessions = cache.get_session_table()
print('len sessions: {}'.format(len(sessions)))

brain_observatory_type_sessions = sessions[sessions["session_type"] == "functional_connectivity"]
print('len brain_observatory_type_sessions = functional_connectivity: {}'.format(len(brain_observatory_type_sessions)))
print(brain_observatory_type_sessions.keys())

brain_observatory_type_sessions.tail(3)

len sessions: 58
len brain_observatory_type_sessions = functional_connectivity: 26
Index(['published_at', 'specimen_id', 'session_type', 'age_in_days', 'sex',
       'full_genotype', 'unit_count', 'channel_count', 'probe_count',
       'ecephys_structure_acronyms'],
      dtype='object')


Unnamed: 0_level_0,published_at,specimen_id,session_type,age_in_days,sex,full_genotype,unit_count,channel_count,probe_count,ecephys_structure_acronyms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
839557629,2019-10-03T00:00:00Z,821469666,functional_connectivity,115.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,450,1853,5,"[APN, NOT, MB, DG, CA1, VISam, nan, VISpm, LGd..."
840012044,2019-10-03T00:00:00Z,820866121,functional_connectivity,116.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,758,2298,6,"[APN, DG, CA1, VISam, nan, LP, VISpm, VISp, LG..."
847657808,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt,874,2298,6,"[APN, NOT, DG, HPF, ProS, CA1, VISam, nan, MB,..."


### Get single units 

using 30 min of spontaneous activity (animals were shown grey screen)

**todo:**  figure out quality cutoffs;
for explanation why low amplitude cutoff is good for ITs 
see also https://allensdk.readthedocs.io/en/latest/_static/examples/nb/ecephys_quality_metrics.html#Amplitude-cutoff
the idea is to get units that are more "complete", 0.01 means 1% of spikes is missing from the units

In [14]:
output_filename = dataset_folder + 'allen_func_conn_30min_spont.csv'
output_log = dataset_folder + 'dataload_log.txt'
verbose = True

In [15]:
old_stdout = sys.stdout
sys.stdout = open(output_log, 'w')

for session_id in brain_observatory_type_sessions.index.values:

    print('############################')
    print('processing session {}'.format(session_id))
    
    # load session
    session = cache.get_session_data(session_id)
    
    # load units
    units_df = session.units
    print('len units df {}'.format(len(units_df)))
    
    # load stimulus presentation 
    presentations = session.get_stimulus_table("spontaneous")
    spont_period_id = presentations.query('duration > 1200').index.values[0]
    print('Spontaneous period {}'.format(presentations.loc[spont_period_id, :]))
    
    # load spikes from stimulus period
    spikes_df = session.presentationwise_spike_times(
        stimulus_presentation_ids=spont_period_id,  
        unit_ids=units_df.index.values
    )
    
    # make df
    spikes_df['time_since_stimulus_presentation_onset_str'] = spikes_df.time_since_stimulus_presentation_onset.astype(str)
    spikes_wide_df = spikes_df.groupby(by='unit_id', as_index=False).agg(spike_times=('time_since_stimulus_presentation_onset_str', ','.join))
    
    units_df_subset = units_df[['ecephys_structure_acronym']].copy()
    units_df_subset.reset_index(inplace=True)
    units_df_subset['specimen_id'] = brain_observatory_type_sessions.loc[session_id, :]['specimen_id']
    units_df_subset['session_id'] = session_id
    
    units_merged_df = pd.merge(units_df_subset, spikes_wide_df, on='unit_id', how='inner')
    
    # write to file 
    spikes_out_dict = units_merged_df.to_dict(orient='index')
    print('Writing to csv...')
    with open(output_filename, 'a', newline='') as f:
        writer = csv.writer(f)
        for k,v in spikes_out_dict.items():
            if verbose:
                print('Writing unit {}'.format(v['unit_id']))
            #    spikes_l = spike_train.tolist()
            row = [v['specimen_id']] + [v['session_id']] + [v['unit_id']] + [v['ecephys_structure_acronym']] + list(map(float, v['spike_times'].split(',')))
            writer.writerow(row)

sys.stdout = old_stdout

  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)


### Some usefull line

In [5]:
# By default, the AllenSDK applies filters so only units above a set of thresholds are returned.
# The default filter values are as follows:

# isi_violations < 0.5
# amplitude_cutoff < 0.1
# presence_ratio > 0.9
# units = cache.get_units()

units = cache.get_units(amplitude_cutoff_maximum = np.inf,
                        presence_ratio_minimum = -np.inf,
                        isi_violations_maximum = np.inf)
print(units.keys())
len(units)

Index(['waveform_PT_ratio', 'waveform_amplitude', 'amplitude_cutoff',
       'cumulative_drift', 'd_prime', 'waveform_duration',
       'ecephys_channel_id', 'firing_rate', 'waveform_halfwidth',
       'isi_violations', 'isolation_distance', 'L_ratio', 'max_drift',
       'nn_hit_rate', 'nn_miss_rate', 'presence_ratio',
       'waveform_recovery_slope', 'waveform_repolarization_slope',
       'silhouette_score', 'snr', 'waveform_spread', 'waveform_velocity_above',
       'waveform_velocity_below', 'ecephys_probe_id', 'local_index',
       'probe_horizontal_position', 'probe_vertical_position',
       'anterior_posterior_ccf_coordinate', 'dorsal_ventral_ccf_coordinate',
       'left_right_ccf_coordinate', 'ecephys_structure_id',
       'ecephys_structure_acronym', 'ecephys_session_id', 'lfp_sampling_rate',
       'name', 'phase', 'sampling_rate', 'has_lfp_data', 'date_of_acquisition',
       'published_at', 'specimen_id', 'session_type', 'age_in_days', 'sex',
       'genotype'],
      d

99180

In [6]:
units.query('session_type == "functional_connectivity"')

Unnamed: 0_level_0,waveform_PT_ratio,waveform_amplitude,amplitude_cutoff,cumulative_drift,d_prime,waveform_duration,ecephys_channel_id,firing_rate,waveform_halfwidth,isi_violations,...,phase,sampling_rate,has_lfp_data,date_of_acquisition,published_at,specimen_id,session_type,age_in_days,sex,genotype
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
950910045,0.620779,83.773950,0.007797,79.88,6.679510,0.233501,849990704,21.291408,0.233501,0.030471,...,3a,29999.961516,True,2018-11-05T21:14:59Z,2019-10-03T00:00:00Z,754488979,functional_connectivity,142.0,M,wt/wt
950910066,0.538164,78.260000,0.086261,0.00,6.567355,0.233501,849990710,0.000304,0.727973,0.000000,...,3a,29999.961516,True,2018-11-05T21:14:59Z,2019-10-03T00:00:00Z,754488979,functional_connectivity,142.0,M,wt/wt
950910088,0.577194,97.634745,0.008885,71.62,4.431527,0.315913,849990720,59.572913,0.206030,0.003882,...,3a,29999.961516,True,2018-11-05T21:14:59Z,2019-10-03T00:00:00Z,754488979,functional_connectivity,142.0,M,wt/wt
950910119,0.387275,71.113770,0.024558,514.29,5.020218,0.206030,849990736,0.579948,0.151089,0.338488,...,3a,29999.961516,True,2018-11-05T21:14:59Z,2019-10-03T00:00:00Z,754488979,functional_connectivity,142.0,M,wt/wt
950910126,0.541963,72.180615,0.500000,117.01,5.082387,0.260972,849990738,14.659472,0.151089,0.051741,...,3a,29999.961516,True,2018-11-05T21:14:59Z,2019-10-03T00:00:00Z,754488979,functional_connectivity,142.0,M,wt/wt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
951190693,0.425345,95.730000,0.109200,0.00,3.829374,0.879062,867384640,0.003963,0.219765,0.000000,...,PXI,30000.264062,True,2019-04-08T19:30:50Z,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt
951190812,0.361276,340.833559,0.240907,0.00,5.193001,0.755444,867384640,0.011992,0.164824,0.000000,...,PXI,30000.264062,True,2019-04-08T19:30:50Z,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt
951190814,0.380243,107.452982,0.450888,8.26,4.159769,0.673032,867384640,0.052133,0.206030,0.000000,...,PXI,30000.264062,True,2019-04-08T19:30:50Z,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt
951190860,0.384349,136.924461,0.029022,9.15,5.393753,0.714238,867384640,0.070730,0.219765,0.000000,...,PXI,30000.264062,True,2019-04-08T19:30:50Z,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt
