In [None]:
# Standard libraries
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath

import lib.sych.preprocess_raw as prep

%load_ext autoreload
%autoreload 2

# Pooling Data From LVM

In [None]:
# fpathData = gui_fpath("Root directory for raw data", "./")
# fpathData = '/mnt/neurophys-storage2/Sych/Yaro/data_raw/'
fpathData = '/run/user/1000/gvfs/smb-share:server=130.60.51.15,share=neurophysiology-storage2/Sych/Yaro/data_raw'

In [None]:
dfFiles = prep.raw_get_files_df(fpathData)
dfFiles

In [None]:
prep.raw_pool_data(dfFiles)

# Moving all datasets to data group

In [None]:
# fpathDataH5 = gui_fpath("Directory for data files", "./")
fpathDataH5 = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed/'
# fpathDataH5 = '/media/aleksejs/DataHDD/work/data/yaro/neuronal-raw-pooled'

In [None]:
dfRawH5 = prep.pooled_get_files_df(fpathDataH5)

In [None]:
dfRawH5

In [None]:
prep.pooled_move_data_subfolder(dfRawH5)

In [None]:
prep.pooled_move_sanity_check(dfRawH5)

# Mark Starts and ends of Trials

In [None]:
prep.pooled_mark_trial_starts_ends(dfRawH5)

# Appending Channel Labels

In [None]:
# fpathDataOrig = gui_fpath("Directory for original data tree", "./")
fpathDataOrig = '/media/alyosha/Data/TE_data/yarodata/neuro/mvg48'
# fpathDataOrig = '/media/aleksejs/DataHDD/work/data/yaro/neuronal/mvg48'

In [None]:
dfLabels = prep.channel_labels_get_files_df(fpathDataOrig)
dfLabels

In [None]:
prep.pooled_mark_channel_labels(dfRawH5, dfLabels)

In [None]:
prep.update_channel_labels_unique(dfRawH5)

# Adding behaviour

**Problems/Bugs**:
1. [early_go_trials, iGO_inhibition] overlap - within this framework could be solved by multiplexing enum
2. mvg_8_2018_11_22_a has 406 trials in behaviour but only 142 in neuro - crop to neuro

**TODO**:
1. Crop trial start/end to match behaviour
2. Drop some trials if Yaro says so

In [None]:
dfNeuro = prep.orig_neuro_get_files_df(fpathDataOrig)
dfNeuro

In [None]:
prep.pooled_mark_trial_types_performance(dfRawH5, dfNeuro)

# Drop bad sessions

1. Drop sessions that were not used by Yaro in first analysis

In [None]:
prep.drop_sessions_not_in_neuro(dfNeuro, dfRawH5)

2. Deleting extra sessions that behave badly

In [None]:
prep.drop_session(dfRawH5, 'mvg_8_2018_11_22_a')
prep.drop_session(dfRawH5, 'mvg_9_2019_03_21_a')

# Drop bad trials

1. Filtering short trials

In [None]:
shortTrialDict = prep.find_short_trials(dfRawH5)

In [None]:
shortTrialDict

In [None]:
for session, idxsTrial in shortTrialDict.items():
    prep.drop_trials(dfRawH5, session, idxsTrial)

3. Filtering outlier trials - orders of magnitude larger than others

In [None]:
largeTrialDict = prep.find_large_trials(dfRawH5)

In [None]:
largeTrialDict

In [None]:
for session, idxsTrial in largeTrialDict.items():
    prep.drop_trials(dfRawH5, session, idxsTrial)

# Fixing channels 1: Cropping broken trials

1. Find all trials not fully contained in the interval
2. Mark these trials as bad and update storage

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_02_26_a', 0, 161315, -2, 8)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_02_26_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_4_2017_11_14_a', 14000, 400000, -2, 8)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_4_2017_11_14_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_4_2017_11_20_a', 5900, 400000, -2, 8)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_4_2017_11_20_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_7_2018_11_20_a', 190000, 400000, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_7_2018_11_20_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_7_2018_11_24_a', 0, 1800, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_7_2018_11_24_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_02_19_a', 48500, 400000, -2, 8)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_02_19_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_02_21_a', 0, 102000, -2, 8)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_02_21_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_02_26_a', 145000, 400000, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_02_26_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_03_04_a', 0, 7000, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_03_04_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_03_05_a', 0, 2500, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_03_05_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_03_07_a', 0, 2500, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_03_07_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_03_08_a', 0, 15000, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_03_08_a', badTrialIdxs)

In [None]:
badTrialIdxs = prep.get_trial_idxs_by_interval(dfRawH5, 'mvg_9_2019_03_09_a', 0, 7000, -2, 8, inside=True)

In [None]:
prep.drop_trials(dfRawH5, 'mvg_9_2019_03_09_a', badTrialIdxs)

# Cropping starts and ends of trials to match behaviour

In [None]:
prep.pooled_trunc_trial_starts_ntrials(dfRawH5)

# Testing trial length sensibility

In [None]:
prep.pooled_trial_length_summary_excel(dfRawH5)

# Pulling Trial Durations From Behaviour Files


In [None]:
dfBehavTiming = prep.behav_timing_get_files_df(fpathData)
dfBehavTiming

In [None]:
prep.behav_timing_read_get_trial_lengths(dfRawH5, dfBehavTiming)

In [None]:
prep.behav_timing_compare_neuro(dfRawH5)

# Plotting raw selected data for sensibility checks

In [None]:
prep.get_sessions(dfRawH5, 'mvg_7')

In [None]:
%matplotlib inline
for iCh in range(48):
    prep.plot_raw(dfRawH5, 'mvg_9_2019_02_08_a', iCh, onlyTrials=True, onlySelected=True, figsize=(8,3))

In [None]:
%matplotlib notebook
prep.plot_raw(dfRawH5, 'mvg_9_2019_02_08_a', 1, onlyTrials=False, onlySelected=True, figsize=(8,3))

# Background Subtraction

Test if there is significant gain from using higher order polynomials

In [None]:
prep.pooled_plot_background_polyfit_residuals(dfRawH5, ordMax=8)

In [None]:
%matplotlib notebook
# %matplotlib inline
prep.poly_view_fit(dfRawH5, 'mvg_9_2019_02_08_a', 17, 15, onlyTrials=True, onlySelected=True, xPart=[110556, 8, 5])

# Fixing channels 2: Linear adjustment

In [None]:
# Linear adjustment
intervLst = [0, 7427, 28259, 29648]
valLst = [1070, 2460, 430]
prep.fix_adjust_drop_channel(dfRawH5, 'mvg_8_2018_11_23_a', 20, intervLst, valLst, update=False)

In [None]:
# Minor fix, lost FPS
pwdTmp = list(dfRawH5[dfRawH5['mousename'] == 'mvg_8']['path'])[0]

with h5py.File(pwdTmp, 'a') as f:
    f['data']['mvg_8_2018_11_23_a'].attrs['FPS'] = 20

# Test if pre-trial activity is actually small

In [None]:
prep.check_pre_trial_activity_small(dfRawH5)

# Baseline Normalization

* ZScore (Trial/Session-wise)
* Pre-trial mean estimate (Trial/Session-wise)

In [None]:
xPartMap = {
    'mvg_7_2018_11_18_a' : [50285, 3, 5],
    'mvg_7_2018_11_20_a' : [18839, 3, 10],
    'mvg_7_2018_11_21_a' : [29034, 3, 10],
    'mvg_7_2018_11_22_a' : [152251, 8, 5],
    'mvg_7_2018_11_23_a' : [99177, 5, 7],
    'mvg_7_2018_11_26_a' : [6544, 1, 8],
    'mvg_7_2018_11_29_a' : [30529, 1, 8],
    'mvg_7_2018_12_05_a' : [95557, 10, 1],
    'mvg_8_2018_11_19_a' : [16712, 3, 8],
    'mvg_9_2019_02_07_a' : [59219, 3, 8],
    'mvg_9_2019_02_08_a' : [110556, 8, 5]
}

In [None]:
prep.extract_store_trial_data(dfRawH5, xPartMap,
                              targetFPS=20, bgOrd=15, fitOnlySelectedTrials=True, keepExisting=False)

# Extract and Store Performance

In [None]:
pwd = dfRawH5['path'][0]
with h5py.File(pwd, 'r') as h5f:
    print(h5f.keys())
    print(h5f['data_bn_trial']['mvg_4_2017_11_14_a'].shape)
    print(np.array(h5f['trialTypesSelected']['mvg_4_2017_11_14_a']).shape)