In [1]:
%load_ext autoreload
%autoreload 2

import os
from os import path
import json

import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from IPython.utils.capture import capture_output
with capture_output():
    tqdm.pandas()

from tbd_eeg.data_analysis.eegutils import EEGexp
from tbd_eeg.data_analysis.Utilities.utilities import get_stim_events, find_nearest_ind

In [2]:
destination_folder = '/local2/zap_zip/'

In [3]:
# accessing the Google sheet with experiment metadata in python
# setting up the permissions:
# 1. install gspread (pip install gspread / conda install gspread)
# 2. copy the service_account.json file to '~/.config/gspread/service_account.json'
# 3. run the following:
import gspread
_gc = gspread.service_account() # need a key file to access the account (step 2)
_sh = _gc.open('Zap_Zip-log_exp') # open the spreadsheet
_df = pd.DataFrame(_sh.sheet1.get()) # load the first worksheet
gmetadata = _df.T.set_index(0).T # put it in a nicely formatted dataframe

In [4]:
good_expt = gmetadata[(gmetadata['Units Sorted (X)'].isin(['X']))&(gmetadata['Brain areas assignment'].isin(['X']))&(gmetadata['Npx'].apply(lambda x: len(x.split(','))>2 if x is not None else False))].set_index(['mouse_name', 'exp_name'])
good_expt

Unnamed: 0_level_0,Unnamed: 1_level_0,brain states,stimulation,visual_stim,audio_stim,ISI (sec),stimulus duration (msec),Current (uA),Cortical Area stimulation,N trials per stimulus,EEG bad_channels,Npx,Units Sorted (X),Brain slices (X),Pupil tracking pre-processing,Brain areas assignment,"CCF coordinates stim electrode (surface,tip)","CCF area stim electrode (surface,tip)",Notes
mouse_name,exp_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
mouse551400,estim_vis_2021-01-22_11-07-12,awake/ISO/recovery/recovery,electrical/sensory,white,,[3.5 4.5],0.2/250,30/50/70,M2,120/100,"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18...","F,B,D",X,X,,X,"[ 390,121,439], [ 419,267,442]","MOs1, MOs6a",NO visual stim during the 2nd Recovery. The EE...
mouse569062,estim_vis_2021-02-18_11-17-51,awake/ISO/recovery,electrical/sensory,white,,[3.5 4.5],0.2/250,15/35/60,M2,120,613,"F,B,D",X,X,,X,"[370,146,387], [ 428,300,434]","MOs1, ccg",ISO kept ~1%. White circles for the vis stim. ...
mouse569068,estim_vis_2021-03-04_10-51-38,awake/ISO/recovery/recovery,electrical/sensory,white,,[3.5 4.5],0.2/250,10/20/40,M2,120,,"F,B,D",X,X,,X,"[383,136,400], [446,289,442]","MOs1, ccg",ISO kept ~1%. White circles for the vis stim. ...
mouse569069,estim_vis1_2021-03-11_11-02-08,awake/awake/awake/awake,electrical/sensory,white,,[3.5 4.5],0.2/250,50/60/80,M2,120,7891011121314,"F,B,C",X,X,,X,"[293,156,456], [331,266,505]","MOs1,PL5",control exp in awake. Something happen to the ...
mouse569069,estim_vis2_2021-03-12_10-52-44,awake/ISO/recovery/recovery,electrical/sensory,white,,[3.5 4.5],0.2/250,20/40/70,M2,120,,"F,B,C",X,X,,X,"[293,156,456], [331,266,505]","MOs1,PL5",ISO kept ~1%.
mouse571619,estim1_2021-03-18_11-41-26,awake/ISO,electrical,,,[3.5 4.5],0.2,20/30/40,SS-cortex,120,234,"F,B,C",X,X,,X,"[642,45,404], [649,238,431]","SSp-tr1, CA3",all EEG channels very noisy. ISO kept ~1%.
mouse571619,estim2_2021-03-19_10-09-01,awake500/awake1200/ISO1200/ISO500,electrical,,,[3.5 4.5],0.2,20/40/60,M2-frontal (2 depths),120,234513,"F,B,C",X,X,,X,"[273,172,431],[286,314,462]","MOs1, ORBvl6a",2 different depths for the stim electrode in b...
mouse569064,estim_vis_2021-04-08_10-28-24,awake/ISO/recovery/recovery,electrical/sensory,white,,[3.5 4.5],0.2/250,15/40/60,M2,120,,"F,B,C",X,X,,X,"[369,134,419], [415,272,461]","MOs1, cing",ISO kept ~1%. White circles for the vis stim. ...
mouse569073,estim_vis_2021-04-15_10-27-22,awake/ISO/recovery/recovery,electrical/sensory,white,,[3.5 4.5],0.2/250,20/40/70,M2,120,3413,"F,B,C",X,X,,X,"[430,113,421],[437,258,440]","MOp1,MOs6a",ISO kept ~1%. White circles for the vis stim. ...
mouse569073,estim_2021-04-16_10-42-44,awake/ISO,electrical,,,[3.5 4.5],0.2,60/80/100,SS-cortex,120,131110987654310,"F,B,C",X,X,,X,not visible,not visible,Control Exp. ISO kept ~1%. White circles for t...


In [5]:
def bin_spikes(spikes, bin_size_ms=1, t_start=0, t_end=1e5):
    N = int((t_end-t_start)*1000/bin_size_ms)
    spikes = spikes[(spikes>t_start)&(spikes<t_end)]
    _binned_spikes = pd.Series(
        index=(N * (spikes - t_start) / (t_end - t_start)).astype(int), data=1
    )
    counts = _binned_spikes.reset_index().groupby('index').size()
    times = np.linspace(t_start, t_end, N, endpoint=False)
    binned_spikes = np.zeros(N)
    binned_spikes[counts.index] = counts
    return pd.Series(binned_spikes, index=times, dtype=bool)

def get_spikes(rec_folder, probe):
    exp = EEGexp(rec_folder, preprocess=False, make_stim_csv=False)
    stim_log = pd.read_csv(exp.stimulus_log_file)
    stim_log.rename_axis(index='stim_id', inplace=True)
    
    # read raw spike time data
    _spike_times = np.load(exp.ephys_params[probe]['spike_times'], mmap_mode='r')
    _spike_clusters = np.load(exp.ephys_params[probe]['spike_clusters'], mmap_mode='r')
    cluster_metrics = pd.read_csv(exp.ephys_params[probe]['cluster_metrics'], index_col=1).drop('Unnamed: 0', axis=1, errors='ignore')
    cluster_groups = pd.read_csv(exp.ephys_params[probe]['cluster_group'], sep='\t', index_col=0)

    # rearrange into spike times for each cluster
    spike_df = pd.DataFrame(index=_spike_clusters, data=_spike_times, columns=['time'])
    spike_times = spike_df.groupby(level=0).apply(lambda g: g.values[:, 0])

    # keep only good clusters and drop 'noise'
    cluster_metrics = cluster_metrics[cluster_groups.group.isin(['good'])]
    cluster_metrics = cluster_metrics[(cluster_metrics.isi_viol<0.5)&(cluster_metrics.amplitude_cutoff<0.1)]
    spike_times = spike_times.loc[cluster_metrics.index]

    # bin spikes into 1ms bins
    spikes = {}
    t_start = spike_times.apply(lambda x: x.min()).min().round(3)
    t_end = spike_times.apply(lambda x: x.max()).max().round(3)
    for u, t in tqdm(spike_times.items(), total=len(spike_times)):
        spikes[u] = bin_spikes(t, t_start=t_start, t_end=t_end)
    return pd.concat(spikes, axis=1, names='units').rename_axis('time')

In [6]:
# mouse, expt = 'mouse551400', 'estim_vis_2021-01-22_11-07-12'

# rec_folder = f'../tiny-blue-dot/zap-n-zip/EEG_exp/{mouse}/{expt}/experiment1/recording1/'

# if not path.exists(path.join(destination_folder, mouse)):
#     print('created mouse directory')
#     os.mkdir(path.join(destination_folder, mouse))
# if not path.exists(path.join(destination_folder, mouse, expt)):
#     print('created experiment directory')
#     os.mkdir(path.join(destination_folder, mouse, expt))

# exp = EEGexp(rec_folder, preprocess=False, make_stim_csv=False)
# ephys_sources = exp.ephys_params.keys()
# probes = [x for x in ephys_sources if 'probe' in x]

# data = {}
# unit_meta = {}
# channel_meta = {}
# for probe in probes:
    
#     unit_meta[probe] = pd.read_csv(
#         exp.ephys_params[probe]['cluster_metrics'], index_col=0
#     ).set_index('cluster_id').join(pd.read_csv(
#         exp.ephys_params['probeF']['cluster_group'], sep='\t', index_col='cluster_id'
#     ))
    
#     with open(exp.ephys_params[probe]['probe_info'], 'r') as f:
#         _data = json.load(f)
#     channel_meta[probe] = pd.DataFrame(_data).set_index('channel')
    
#     # save waveforms
#     waveforms = np.load(exp.ephys_params[probe]['waveforms'])
#     np.savez(path.join(destination_folder, mouse, expt, f'{probe}_waveforms.npz'), waveforms)
    
#     # collect LFP
#     corrected_fname = f"{exp.ephys_params[probe]['lfp_continuous'][:-4]}_corrected.npy"
#     lfp = np.memmap(corrected_fname, mode='r+', dtype='int16')
#     lfp = np.reshape(lfp, (int(lfp.size/exp.ephys_params[probe]['num_chs']), exp.ephys_params[probe]['num_chs']))
#     timestamps = np.load(exp.ephys_params[probe]['lfp_timestamps'])
#     data[(probe, 'lfp')] = pd.DataFrame(lfp, index=timestamps)
    
#     # collect spikes
#     data[(probe, 'spikes')] = get_spikes(rec_folder, probe)

# # collect metadata
# data[('meta', 'units')] = pd.concat(unit_meta, names=['probe', 'cluster_id'])
# data[('meta', 'channels')] = pd.concat(channel_meta, names=['probe', 'channel'])
# data[('meta', 'stim')] = pd.read_csv(exp.stimulus_log_file)
# data[('meta', 'running')] = exp.load_running(return_type='pd')
# try:
#     data[('meta', 'iso')] = exp.load_analog_iso(return_type='pd')
# except:
#     pass

# # collect data
# data[('eeg', 'lfp')] = exp.load_eegdata(return_type='pd')

# # save data
# for k, v in data.items():
# #     print(k)
#     fname = path.join(destination_folder, mouse, expt, f'{k[0]}_{k[1]}.bz2')
#     v.to_pickle(fname)

In [7]:
for mouse, expt in good_expt.index[::-1]:
    rec_folder = f'../tiny-blue-dot/zap-n-zip/EEG_exp/{mouse}/{expt}/experiment1/recording1/'

    if not path.exists(path.join(destination_folder, mouse)):
        print('created mouse directory')
        os.mkdir(path.join(destination_folder, mouse))
    if not path.exists(path.join(destination_folder, mouse, expt)):
        print('created experiment directory')
        os.mkdir(path.join(destination_folder, mouse, expt))
    else:
        print(f'Found pre-existing folder for {mouse} ({expt})')
        continue

    exp = EEGexp(rec_folder, preprocess=False, make_stim_csv=False)
    ephys_sources = exp.ephys_params.keys()
    probes = [x for x in ephys_sources if 'probe' in x]

    data = {}
    unit_meta = {}
    channel_meta = {}
    for probe in probes:

        unit_meta[probe] = pd.read_csv(
            exp.ephys_params[probe]['cluster_metrics'], index_col=0
        ).set_index('cluster_id').join(pd.read_csv(
            exp.ephys_params['probeF']['cluster_group'], sep='\t', index_col='cluster_id'
        ))

        with open(exp.ephys_params[probe]['probe_info'], 'r') as f:
            _data = json.load(f)
        channel_meta[probe] = pd.DataFrame(_data).set_index('channel')

        # save waveforms
        waveforms = np.load(exp.ephys_params[probe]['waveforms'])
        np.savez(path.join(destination_folder, mouse, expt, f'{probe}_waveforms.npz'), waveforms)

        # collect LFP
        corrected_fname = f"{exp.ephys_params[probe]['lfp_continuous'][:-4]}_corrected.npy"
        lfp = np.memmap(corrected_fname, mode='r+', dtype='int16')
        lfp = np.reshape(lfp, (int(lfp.size/exp.ephys_params[probe]['num_chs']), exp.ephys_params[probe]['num_chs']))
        timestamps = np.load(exp.ephys_params[probe]['lfp_timestamps'])
        data[(probe, 'lfp')] = pd.DataFrame(lfp, index=timestamps)

        # collect spikes
        data[(probe, 'spikes')] = get_spikes(rec_folder, probe)

    # collect metadata
    data[('meta', 'units')] = pd.concat(unit_meta, names=['probe', 'cluster_id'])
    data[('meta', 'channels')] = pd.concat(channel_meta, names=['probe', 'channel'])
    data[('meta', 'stim')] = pd.read_csv(exp.stimulus_log_file)
    data[('meta', 'running')] = exp.load_running(return_type='pd')
    try:
        data[('meta', 'iso')] = exp.load_analog_iso(return_type='pd')
    except:
        pass

    # collect data
    data[('eeg', 'lfp')] = exp.load_eegdata(return_type='pd')

    # save data
    for k, v in data.items():
    #     print(k)
        fname = path.join(destination_folder, mouse, expt, f'{k[0]}_{k[1]}.bz2')
        v.to_pickle(fname)

Found pre-existing folder for mouse571620 (estim_vis_2021-05-13_11-33-47)
created mouse directory
created experiment directory
Experiment type: electrical stimulation
Experiment type: electrical stimulation


HBox(children=(FloatProgress(value=0.0, max=275.0), HTML(value='')))


Experiment type: electrical stimulation


HBox(children=(FloatProgress(value=0.0, max=196.0), HTML(value='')))


Experiment type: electrical stimulation


HBox(children=(FloatProgress(value=0.0, max=200.0), HTML(value='')))




  self._check_line_labels()


created mouse directory
created experiment directory
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=380.0), HTML(value='')))


Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=292.0), HTML(value='')))


Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=323.0), HTML(value='')))




  self._check_line_labels()


created mouse directory
created experiment directory
Experiment type: electrical stimulation
Experiment type: electrical stimulation


HBox(children=(FloatProgress(value=0.0, max=222.0), HTML(value='')))


Experiment type: electrical stimulation


HBox(children=(FloatProgress(value=0.0, max=307.0), HTML(value='')))


Experiment type: electrical stimulation


HBox(children=(FloatProgress(value=0.0, max=176.0), HTML(value='')))




  self._check_line_labels()


created experiment directory
Experiment type: electrical and sensory stimulation
Experiment type: electrical and sensory stimulation


HBox(children=(FloatProgress(value=0.0, max=239.0), HTML(value='')))


Experiment type: electrical and sensory stimulation


HBox(children=(FloatProgress(value=0.0, max=290.0), HTML(value='')))


Experiment type: electrical and sensory stimulation


HBox(children=(FloatProgress(value=0.0, max=102.0), HTML(value='')))




  self._check_line_labels()


created mouse directory
created experiment directory
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=84.0), HTML(value='')))


Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=309.0), HTML(value='')))


Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=88.0), HTML(value='')))




  self._check_line_labels()


created mouse directory
created experiment directory
Experiment type: electrical stimulation
SomnoSuite log file not found.
Experiment type: electrical stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=218.0), HTML(value='')))


Experiment type: electrical stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=343.0), HTML(value='')))


Experiment type: electrical stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=298.0), HTML(value='')))




  self._check_line_labels()


created experiment directory
Experiment type: electrical stimulation
SomnoSuite log file not found.
Experiment type: electrical stimulation
SomnoSuite log file not found.


HBox(children=(FloatProgress(value=0.0, max=231.0), HTML(value='')))




FileNotFoundError: [Errno 2] No such file or directory: '../tiny-blue-dot/zap-n-zip/EEG_exp/mouse571619/estim1_2021-03-18_11-41-26/experiment1/probeC_sorted/continuous/Neuropix-PXI-100.1/continuous_corrected.npy'