In [1]:
import os
import shutil

import numpy as np
import pandas as pd

from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache

In [2]:
# Pointer to ecephys cache
data_directory = '/home/brian/data/ecephys_project_cache/'
manifest_path = os.path.join(data_directory, "manifest.json")

In [3]:
# Load manifest file
cache = EcephysProjectCache.from_warehouse(manifest=manifest_path)

In [4]:
# Filter by functional connectivity sessions
sessions = cache.get_session_table()
filtered_sessions = sessions[(sessions.session_type == 'functional_connectivity')]

In [5]:
filtered_sessions.tail()

Unnamed: 0_level_0,published_at,specimen_id,session_type,age_in_days,sex,full_genotype,unit_count,channel_count,probe_count,ecephys_structure_acronyms
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
835479236,2019-10-03T00:00:00Z,813701562,functional_connectivity,121.0,M,Vip-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,582,1857,5,"[LP, DG, CA1, VISmmp, nan, SCiw, SCig, SCop, S..."
839068429,2019-10-03T00:00:00Z,817060751,functional_connectivity,129.0,F,Sst-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,742,2298,6,"[APN, LP, MB, DG, CA1, VISam, nan, VISpm, ProS..."
839557629,2019-10-03T00:00:00Z,821469666,functional_connectivity,115.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,450,1853,5,"[APN, NOT, MB, DG, CA1, VISam, nan, VISpm, LGd..."
840012044,2019-10-03T00:00:00Z,820866121,functional_connectivity,116.0,M,Pvalb-IRES-Cre/wt;Ai32(RCL-ChR2(H134R)_EYFP)/wt,758,2298,6,"[APN, DG, CA1, VISam, nan, LP, VISpm, VISp, LG..."
847657808,2019-10-03T00:00:00Z,827809884,functional_connectivity,126.0,F,wt/wt,874,2298,6,"[APN, NOT, DG, HPF, ProS, CA1, VISam, nan, MB,..."


In [6]:
save_dir = './processed'
session_ids = filtered_sessions.tail().index.values

# Loop through sessions and extract needed information
for sid in session_ids:

    # Download NWB for session
    session = cache.get_session_data(sid)
    
    # Get stimulus table for natural movie one (60 repeats)
    nm1 = session.get_stimulus_table(['natural_movie_one_more_repeats'])
    
    # Get all units
    units = session.units

    # Create binned response table
    spikes = session.presentationwise_spike_times(
        stimulus_presentation_ids=nm1.index.values,
        unit_ids=units.index.values[:]
    )
    
    # Groupby spike times for a given presentation id and unit id
    new_df = spikes.pivot_table(index='stimulus_presentation_id', columns='unit_id', values='time_since_stimulus_presentation_onset', fill_value=0, aggfunc='count')
    # Reindex to avoid missing rows
    new_df = new_df.reindex(nm1.index.values, fill_value=0)
    # Resort column names to align with units df
    new_df = new_df.reindex(units.index.values, axis=1)
    
    # Convert to numpy and reshape (repeat x frame x cell)
    repeat_frame_cell = new_df.values.reshape(60, 900, -1)
    np.save(os.path.join(save_dir, str(sid)+'_repeat_frame_cell.npy'), repeat_frame_cell)
    
    # Save units dataframe
    units.to_csv(os.path.join(save_dir, str(sid)+'_units.csv'))

In [7]:
session_ids = filtered_sessions.tail().index.values

# Loop through sessions and extract needed information
for sid in session_ids:

    # Download NWB for session
    session = cache.get_session_data(sid)
    invalid_times = session.invalid_times

    # Check if invalid times exists
    if len(invalid_times) > 0:

        print(sid)
        print('start', 'stop')

        # Get stimulus table for natural movie one (60 repeats)
        nm1 = session.get_stimulus_table(['natural_movie_one_more_repeats'])

        for start, stop in invalid_times[['start_time', 'stop_time']].values:
            nm_subset = nm1[(nm1.start_time >= start) & (nm1.stop_time <= stop)]
            if len(nm_subset) > 0:
                print('times: {}, {}'.format(nm_subset.start_time.min(), nm_subset.stop_time.max()))
                print('presentation ids: {}, {}'.format(nm_subset.index.min(), nm_subset.index.max()))
                print('frames: {}, {}\n'.format(nm_subset.frame.values.min(), nm_subset.frame.values.max()))

840012044
start stop
847657808
start stop
times: 7806.506751995855, 7808.4750663512095
presentation ids: 62284, 62342
frames: 643.0, 701.0

times: 7898.016112527399, 7899.484013508356
presentation ids: 65027, 65070
frames: 686.0, 729.0

times: 7920.001045505373, 7921.468922152998
presentation ids: 65686, 65729
frames: 445.0, 488.0

