In [1]:
import os

import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt

In [2]:
from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache
from allensdk.brain_observatory.ecephys.visualization import raster_plot

  from .autonotebook import tqdm as notebook_tqdm


### Get sessions data

Sessions are already loaded on local drive.

In [3]:
# cache directory path, it determines where downloaded data will be stored
output_dir = '/ecephys_cache_dir/'
manifest_path = os.path.join(output_dir, "manifest.json")
cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)
print(cache.get_all_session_types())

# functional connecivity dataset contains 30 min spontaneous activity block
sessions = cache.get_session_table()
print('len sessions: {}'.format(len(sessions)))

brain_observatory_type_sessions = sessions[sessions["session_type"] == "functional_connectivity"]
print('len brain_observatory_type_sessions = functional_connectivity: {}'.format(len(brain_observatory_type_sessions)))
print(brain_observatory_type_sessions.keys())

brain_observatory_type_sessions.tail(3)

['brain_observatory_1.1', 'functional_connectivity']
len sessions: 58
len brain_observatory_type_sessions = functional_connectivity: 26
Index(['published_at', 'specimen_id', 'session_type', 'age_in_days', 'sex',
       'full_genotype', 'unit_count', 'channel_count', 'probe_count',
       'ecephys_structure_acronyms'],
      dtype='object')


Unnamed: 0_level_0,published_at,specimen_id,session_type,age_in_days,sex,full_genotype,unit_count,channel_count,probe_count,ecephys_structure_acronyms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
839557629,2019-10-03T00:00:00Z,821469666,functional_connectivity,115.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,450,1853,5,"[APN, NOT, MB, DG, CA1, VISam, nan, VISpm, LGd..."
840012044,2019-10-03T00:00:00Z,820866121,functional_connectivity,116.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,758,2298,6,"[APN, DG, CA1, VISam, nan, LP, VISpm, VISp, LG..."
847657808,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt,874,2298,6,"[APN, NOT, DG, HPF, ProS, CA1, VISam, nan, MB,..."


In [None]:
# By default, the AllenSDK applies filters so only units above a set of thresholds are returned.
# The default filter values are as follows:

# isi_violations < 0.5
# amplitude_cutoff < 0.1
# presence_ratio > 0.9
# units = cache.get_units()

# units = cache.get_units(amplitude_cutoff_maximum = np.inf,
#                         presence_ratio_minimum = -np.inf,
#                         isi_violations_maximum = np.inf)
# print(units.keys())
# len(units)

# units.query('session_type == "functional_connectivity"')
# units['genotype'].unique()

In [None]:
# # load all sessions to local drive 
# for session_id, row in brain_observatory_type_sessions.iterrows():

#     truncated_file = True
#     directory = os.path.join(output_dir + '/session_' + str(session_id))

#     while truncated_file:
#         session = cache.get_session_data(session_id)
#         try:
#             print(session.specimen_name)
#             truncated_file = False
#         except OSError:
#             shutil.rmtree(directory)
#             print(" Truncated spikes file, re-downloading")

### Get single units 

using 30 min of spontaneous activity (animals were shown grey screen)

**todo:**  figure out quality cutoffs;
for explanation why low amplitude cutoff is good for ITs 
see also https://allensdk.readthedocs.io/en/latest/_static/examples/nb/ecephys_quality_metrics.html#Amplitude-cutoff
the idea is to get units that are more "complete", 0.01 means 1% of spikes is missing from the units

In [None]:
brain_observatory_type_sessions.index.values

In [4]:
session_id_ = brain_observatory_type_sessions.index.values[0]
print('processing session {}'.format(session_id_))

brain_observatory_type_sessions.loc[session_id_,:]

processing session 766640955


published_at                                               2019-10-03T00:00:00Z
specimen_id                                                           744912849
session_type                                            functional_connectivity
age_in_days                                                               133.0
sex                                                                           M
full_genotype                                                             wt/wt
unit_count                                                                  842
channel_count                                                              2233
probe_count                                                                   6
ecephys_structure_acronyms    [MB, APN, NOT, DG, CA1, VISam, nan, PF, TH, LP...
Name: 766640955, dtype: object

In [5]:
session_ = cache.get_session_data(session_id_)
units = session_.units
print(len(units))
print(units.keys())

  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)
  return func(args[0], **pargs)


842
Index(['waveform_PT_ratio', 'waveform_amplitude', 'amplitude_cutoff',
       'cluster_id', 'cumulative_drift', 'd_prime', 'firing_rate',
       'isi_violations', 'isolation_distance', 'L_ratio', 'local_index',
       'max_drift', 'nn_hit_rate', 'nn_miss_rate', 'peak_channel_id',
       'presence_ratio', 'waveform_recovery_slope',
       'waveform_repolarization_slope', 'silhouette_score', 'snr',
       'waveform_spread', 'waveform_velocity_above', 'waveform_velocity_below',
       'waveform_duration', 'filtering', 'probe_channel_number',
       'probe_horizontal_position', 'probe_id', 'probe_vertical_position',
       'structure_acronym', 'ecephys_structure_id',
       'ecephys_structure_acronym', 'anterior_posterior_ccf_coordinate',
       'dorsal_ventral_ccf_coordinate', 'left_right_ccf_coordinate',
       'probe_description', 'location', 'probe_sampling_rate',
       'probe_lfp_sampling_rate', 'probe_has_lfp_data'],
      dtype='object')


In [6]:
print(session_.stimulus_names)
presentations_ = session_.get_stimulus_table("spontaneous")
presentations_

  return func(args[0], **pargs)
  return func(args[0], **pargs)


['spontaneous', 'gabors', 'flashes', 'drifting_gratings_contrast', 'natural_movie_one_more_repeats', 'natural_movie_one_shuffled', 'drifting_gratings_75_repeats', 'dot_motion']


Unnamed: 0_level_0,start_time,stop_time,stimulus_name,duration,stimulus_condition_id
stimulus_presentation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,15.289644,75.356384,spontaneous,60.06674,0
3646,987.351691,1276.359894,spontaneous,289.008203,0
3797,1574.85929,1636.660934,spontaneous,61.801644,0
4338,2176.612116,2297.212884,spontaneous,120.600769,0
40639,4397.968317,6200.474424,spontaneous,1802.506107,0
40940,7100.226217,7101.227054,spontaneous,1.000837,0


In [7]:
# todo find stimulus_presentation_id, use duration? 
spikes_df = session_.presentationwise_spike_times(
    stimulus_presentation_ids=40639,  
    unit_ids=units.index.values
)

In [8]:
spikes_df.head(3)

Unnamed: 0_level_0,stimulus_presentation_id,unit_id,time_since_stimulus_presentation_onset
spike_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4397.968838,40639,950919460,0.000521
4397.969123,40639,950934657,0.000806
4397.969223,40639,950935725,0.000906


In [9]:
spikes_df['spike_time_str'] = spikes_df.index.astype(str)
spikes_df.head(3)

Unnamed: 0_level_0,stimulus_presentation_id,unit_id,time_since_stimulus_presentation_onset,spike_time_str
spike_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4397.968838,40639,950919460,0.000521,4397.96883773954
4397.969123,40639,950934657,0.000806,4397.969122841402
4397.969223,40639,950935725,0.000906,4397.9692228416725


In [11]:
spikes_df['time_since_stimulus_presentation_onset_str'] = spikes_df.time_since_stimulus_presentation_onset.astype(str)
spikes_df.head(3)

Unnamed: 0_level_0,stimulus_presentation_id,unit_id,time_since_stimulus_presentation_onset,spike_time_str,time_since_stimulus_presentation_onset_str
spike_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4397.968838,40639,950919460,0.000521,4397.96883773954,0.0005208333659538
4397.969123,40639,950934657,0.000806,4397.969122841402,0.0008059352285272
4397.969223,40639,950935725,0.000906,4397.9692228416725,0.0009059354988494


In [12]:
#spikes_out = spikes_df.groupby(by='unit_id', as_index=False).agg(spike_times=('spike_time_str', ','.join))
spikes_out = spikes_df.groupby(by='unit_id', as_index=False).agg(spike_times=('time_since_stimulus_presentation_onset_str', ','.join))

In [13]:
spikes_out.head(3)

Unnamed: 0,unit_id,spike_times
0,950911624,"0.1639950337848859,0.4707620225708524,1.278496..."
1,950911641,"0.025394888249138603,0.14652834877779242,0.291..."
2,950911657,"0.02152821752224554,0.1527283552877634,0.28076..."


In [None]:
units[['ecephys_structure_acronym']]

In [14]:
spikes_out_dict = spikes_out.to_dict(orient='index')
spikes_out_dict[0]

{'unit_id': 950911624,
 'spike_times': '0.1639950337848859,0.4707620225708524,1.2784962040595929,1.6501299276242207,1.885163507753532,1.995896957361765,2.2903972665990295,2.361464007889481,2.669197664356034,3.5075652113446267,3.743032125262289,4.476166228418151,4.9629000728427854,5.042800156740668,5.336267131560817,5.412300544732716,5.544534016916259,5.573467380630973,5.789734274387229,5.941934434204086,6.199434704590203,6.321701499642586,6.44703496458169,6.701235231503233,7.028868908865661,7.088502304816757,7.1377690232156965,7.199969088528633,7.232469122654948,7.339302568168023,7.430869330983114,7.477102712863598,7.53236943756292,7.644502888641,7.680002925917506,7.836103089829521,7.991836586689715,8.07247000469215,8.17037010749118,8.22433683082545,8.286603562874916,8.470870423029737,8.670637299459486,8.744670710531864,8.809704112152758,8.96300427312417,9.097771081302199,9.18350450465914,9.25583791394547,9.296537956682187,9.32283798429853,9.37373803774608,9.420171419836151,9.476004811

In [15]:
verbose = False
output_filename_ = 'Q:\\Personal\\Irina\\projects\\isttc\\' + 'allen_test_one_session_' + str(session_id_) + '_v2.csv'
with open(output_filename_, 'a', newline='') as f:
    writer = csv.writer(f)
    for k,v in spikes_out_dict.items():
        if verbose:
            print('Writing unit {}'.format(v['unit_id']))
        #    spikes_l = spike_train.tolist()
        row = [v['unit_id']] + list(map(float, v['spike_times'].split(',')))
        writer.writerow(row)

In [None]:
spont_30min_presentation_id = spikes_df['stimulus_presentation_id'].values[0]
plot_times = spikes_df[spikes_df['stimulus_presentation_id'] == spont_30min_presentation_id]

fig = raster_plot(plot_times, title=f'spike raster for stimulus presentation {spont_30min_presentation_id}')
plt.show()

# also print out this presentation
session_.stimulus_presentations.loc[spont_30min_presentation_id]

In [None]:
# #presentations = session.get_stimulus_table("spontaneous")
# units = session.units[session.units["ecephys_structure_acronym"] == 'VISl']

# time_step = 10.0
# time_bins = np.arange(0, 1800 + time_step, time_step)

# histograms = session.presentationwise_spike_counts(
#     stimulus_presentation_ids=40639,  
#     bin_edges=time_bins,
#     unit_ids=units.index.values
# )

# histograms.coords

In [None]:
# mean_histograms = histograms.mean(dim="stimulus_presentation_id")

# fig, ax = plt.subplots(figsize=(8, 8))
# ax.pcolormesh(
#     mean_histograms["time_relative_to_stimulus_onset"], 
#     np.arange(mean_histograms["unit_id"].size),
#     mean_histograms.T
#     # vmin=0,
#     # vmax=1
# )

# ax.set_ylabel("unit")
# ax.set_xlabel("time (s)")
