In [3]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db_raw import DataFCDatabase
import lib.analysis.bulk_metrics as bulk_metrics

%load_ext autoreload
%autoreload 2

Appended root directory /home/alyosha/work/git/pub-2020-exploratory-analysis


In [4]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
#params['root_path_data'] = gui_fpath('h5path', './')

In [5]:
dataDB = DataFCDatabase(params)

Searching for data files
Extracting trial type names
Extracting data types


In [6]:
ds = DataStorage('sych_result_bulk_metrics.h5')

In [7]:
mc = MetricCalculator(serial=True, verbose=False)

# 1. Bulk Metrics vs Time

* Avg over mice
* All-region-all-types
* Expert vs Naive
* Go/NOGO/MISS/FA

# Mean

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'mean', 's', 'time', verbose=False,
                               trialTypeNames='auto', perfNames='auto')

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False)

# Variance

**TODO**
* Plot trial variance relative to temporal variance
* Plot mean with variance together

### Pros/Cons of Baseline Normalization
* DFF-Trial
    - Pos: Removes dynamic baseline changing on the order of trials.
    - Pos: Under assumption of signal-free pre-trial interval, baseline removal enhances relative change in significant activity during trial.
    - Neg: In presence of correlation between pre-trial interval and trial signals, this procedure destroys information during trial.

* DFF-Session vs ZScore-Session
    - Both linear transforms
    - Mean is more meaningful for DFF if pre-trial interval is at least somewhat stable
    - Va

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 's', 'time', verbose=False,
                               trialTypeNames='auto', perfNames='auto')

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'time', ylim=[0,None], verbose=False)

### Test variance across channels for interesting interval

* Average signal over texture presentation interval
* Compute variance over trials for each channel
* Compare channels

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 'p', 'channel', cropTime=('TEX', (3, 3.5)), verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'channel', yscale='log', verbose=False, # ylim=[0.005,2],
                              dropCols=['cropTime'])

# Effective Rank

### ByTime

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", 's', 'time', verbose=False,
                               trialTypeNames='auto', perfNames='auto')

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'rank_effective', 'time', ylim=[1, None], verbose=False) # ylim=[1,48]

### BySession

In [None]:
intervDict = {
    'TEX' : (3.0, 3.5),
    'REW' : (6.0, 6.5)
}

for intervName, interv in intervDict.items():
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "rank_effective", 'session',
                                              perfNames='auto', verbose=False, cropTime=(intervName, interv))

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'rank_effective', 'session', ylim=[1,None], verbose=False)

## Hierarchical

**TODO**:
* Add composite selectors for metric helper
   - trialType = iGO & iNOGO
   - cropTime = TEX & REW

In [8]:
intervDict = {
    'PRE' : (-2.0, 0.0),
    'TEX' : (3.0, 3.5),
    'REW' : (6.0, 6.5)
}
for intervName, interv in intervDict.items():
    bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", '', 'flat', verbose=False,
                                   trialTypeNames='auto',
                                   perfNames='auto',
                                   cropTime=(intervName, interv))

IntProgress(value=0, description='flat', max=180)

IntProgress(value=0, description='flat', max=180)

IntProgress(value=0, description='flat', max=180)

In [55]:
df = ds.list_dsets_pd()
df = df[df['name'] == 'flat']

In [56]:
data = [np.round(ds.get_data(dset), 2) for dset in df['dset']]
df['rez'] = data

In [57]:
df.drop(['datetime', 'zscoreDim', 'dset', 'metric', 'name', 'shape', 'target_dim'], axis=1, inplace=True)

In [58]:
from mesostat.utils.pandas_helper import pd_category_to_column

In [59]:
index = list(set(df.columns) - {'mousename', 'rez'})
dfMouse = df.pivot(index=index, columns='mousename', values='rez').reset_index()

In [63]:
dfMouse[dfMouse['datatype'] == 'bn_trial']

mousename,datatype,performance,trialType,cropTime,mvg_4,mvg_7,mvg_8,mvg_9
45,bn_trial,,,PRE,19.69,12.05,12.32,6.77
46,bn_trial,,,REW,10.93,7.97,8.41,8.05
47,bn_trial,,,TEX,8.69,6.88,5.94,7.81
48,bn_trial,,iFA,PRE,20.79,11.87,12.41,14.31
49,bn_trial,,iFA,REW,13.68,8.42,8.94,8.5
50,bn_trial,,iFA,TEX,9.37,6.74,7.03,9.3
51,bn_trial,,iGO,PRE,20.01,12.12,12.24,15.83
52,bn_trial,,iGO,REW,13.17,9.49,10.37,10.56
53,bn_trial,,iGO,TEX,8.39,6.67,5.78,9.54
54,bn_trial,,iMISS,PRE,20.24,11.97,11.11,17.43


# Total Correlation

In [None]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 'sp', 'time-channel', verbose=False)
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 's', 'time', verbose=False)

bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_TC", 's', 'time', verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(dataDB, ds, 'avg_TC', 'time', verbose=True) # ylim=[1,48]

In [None]:
bulk_metrics.plot_TC(dataDB, ds, ylim=None, yscale=None, verbose=True)

### Cleaning up

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(ds.list_dsets_pd().sort_values(by='datetime'))

In [None]:
ds.delete_by_query(queryDict={"metric" : "rank_effective"}, timestr="2020-11-20 18:00:00")