In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db_raw import DataFCDatabase
import lib.analysis.bulk_metrics as bulk_metrics

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
#params['root_path_data'] = gui_fpath('h5path', './')

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
ds = DataStorage('sych_result_bulk_metrics.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

# Bulk Metrics

In [None]:
# intervDict = {
#     'PRE' : (-2.0, 0.0),
#     'TEX' : (3.0, 3.5),
#     'REW' : (6.0, 6.5)
# }
intervDict = {'AVG' : (0.0, 8.0)}

## 1. Mean
### 1.1. Mean vs time

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'mean', 's', 'time', verbose=False, minTrials=10,
                               trialTypeNames='auto', perfNames='auto')

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False, xFunc=lambda m, l: dataDB.get_times())

### 1.2. Mean vs session

In [None]:
for intervName, interv in intervDict.items():
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "mean", 'session', trialTypeNames='auto',
                                              minTrials=10, verbose=False, cropTime=(intervName, interv))

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'mean', 'session', verbose=False, xlim=[0, 1],
                              xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

### 1.3 Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'mean', 'session', verbose=True)

## 2. Variance

**TODO**
* Plot trial variance relative to temporal variance
* Plot mean with variance together

### Pros/Cons of Baseline Normalization
* DFF-Trial
    - Pos: Removes dynamic baseline changing on the order of trials.
    - Pos: Under assumption of signal-free pre-trial interval, baseline removal enhances relative change in significant activity during trial.
    - Neg: In presence of correlation between pre-trial interval and trial signals, this procedure destroys information during trial.

* DFF-Session vs ZScore-Session
    - Both linear transforms
    - Mean is more meaningful for DFF if pre-trial interval is at least somewhat stable
    - Va

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 's', 'time', verbose=False, minTrials=50,
                               trialTypeNames='auto', perfNames='auto')

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'time',
                              ylim=[0,None], verbose=False, xFunc=lambda m, l: dataDB.get_times())

### Session

In [None]:
for intervName, interv in intervDict.items():
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "varmean", 'session', trialTypeNames='auto',
                                              minTrials=50, verbose=False, cropTime=(intervName, interv))

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'varmean', 'session', verbose=False,
                              xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'varmean', 'session', verbose=True)

### Test variance across channels for interesting interval

* Average signal over texture presentation interval
* Compute variance over trials for each channel
* Compare channels

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 'p', 'channel',
                               cropTime=('TEX', (3, 3.5)), verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'channel', yscale='log', verbose=False, # ylim=[0.005,2],
                              dropCols=['cropTime'])

# Effective Rank

### ByTime

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", 's', 'time', verbose=False,
                               minTrials=50, trialTypeNames='auto', perfNames='auto')

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'rank_effective', 'time', ylim=[1, None], verbose=False,
                              xFunc=lambda m, l: dataDB.get_times()) # ylim=[1,48]

### BySession

In [None]:
for intervName, interv in intervDict.items():
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "rank_effective", 'session', minTrials=50,
                                              trialTypeNames='auto', verbose=False, cropTime=(intervName, interv))

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'rank_effective', 'session', xlim=[0, 1], ylim=[1, None], verbose=False,
                                 xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'rank_effective', 'session', verbose=True)

## Hierarchical

**TODO**:
* Add composite selectors for metric helper
   - trialType = iGO & iNOGO
   - cropTime = TEX & REW

In [None]:
intervDict = {
    'PRE' : (-2.0, 0.0),
    'TEX' : (3.0, 3.5),
    'REW' : (6.0, 6.5)
}
for intervName, interv in intervDict.items():
    bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", '', 'flat', verbose=False,
                                   trialTypeNames='auto',
                                   perfNames='auto',
                                   cropTime=(intervName, interv))

In [None]:
df = ds.list_dsets_pd()
df = df[df['name'] == 'flat']

In [None]:
data = [np.round(ds.get_data(dset), 2) for dset in df['dset']]
df['rez'] = data

In [None]:
df.drop(['datetime', 'zscoreDim', 'dset', 'metric', 'name', 'shape', 'target_dim'], axis=1, inplace=True)

In [None]:
from mesostat.utils.pandas_helper import pd_category_to_column

In [None]:
index = list(set(df.columns) - {'mousename', 'rez'})
dfMouse = df.pivot(index=index, columns='mousename', values='rez').reset_index()

In [None]:
dfMouse[dfMouse['datatype'] == 'bn_trial']

# Total Correlation

In [None]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 'sp', 'time-channel', verbose=False)
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 's', 'time', verbose=False)

bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_TC", 's', 'time', verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(dataDB, ds, 'avg_TC', 'time', verbose=True) # ylim=[1,48]

In [None]:
bulk_metrics.plot_TC(dataDB, ds, ylim=None, yscale=None, verbose=True)

### Cleaning up

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(ds.list_dsets_pd().sort_values(by='datetime'))

In [None]:
ds.delete_by_query(queryDict={"metric" : "rank_effective"}, timestr="2020-11-20 18:00:00")