In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.getcwd()
# thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.gallerosalas.data_fc_db_raw import DataFCDatabase
from lib.common.metric_helper import calc_metric_mouse, calc_metric_session, calc_metric_mouse_delay
import lib.analysis.bulk_metrics as bulk_metrics


%load_ext autoreload
%autoreload 2

In [None]:
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
params['root_path_data'] = '/media/alyosha/Data/TE_data/yasirdata_raw/'
# params['root_path_data'] = '/home/alyosha/data/yasirdata_raw/'
# params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yasir/yasirdata_raw'

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
ds = DataStorage('gallerosalas_bulk_metrics.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

In [None]:
print('mice', dataDB.mice)
print('nSessions', len(dataDB.sessions))
print('datatypes', dataDB.get_data_types())
print('nChannel', dataDB.get_nchannels('mou_5'))

In [None]:
dataDB.calc_shortest_distances()

# Bulk Metrics

In [None]:
argSweepDictTime = {
    'trialType': 'auto',
    'datatype': ['bn_trial', 'bn_session']
}

argSweepDictSession = {
    'trialType': 'auto',
    'intervName': ["PRE", "TEX", "DEL", "REW", "AVG"],
    'datatype': ['bn_trial', 'bn_session']
}

In [None]:
exclQueryLst = [
    {'datatype' : 'bn_trial', 'intervName' : 'PRE'},  # Baseline normalized
    {'mousename' : 'mou_6', 'intervName' : 'REW'},    # No reward for this mouse
    {'mousename' : 'mou_6', 'intervName' : 'AVG'},    # Makes no sense to compare: no reward and longer delay
]

## 1. Mean
### 1.1. Mean vs time

In [None]:
calc_metric_mouse_delay(dataDB, mc, ds, 'mean', 's', 'time', verbose=False, minTrials=10, haveDelay=True,
                        skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictTime)

In [None]:
# bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False, xFunc=lambda m, l: dataDB.get_times())
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'mean', 'time', verbose=False, haveTimeLabels=False,
                                 xFunc=lambda m, l: dataDB.get_times())

### 1.2. Mean vs session

In [None]:
calc_metric_session(dataDB, mc, ds, 'mean', '', 'session', verbose=False, minTrials=10,
                    skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictSession)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'mean', 'session', verbose=False, xFunc=None, haveRegression=False)

### 1.3 Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'mean', 'session', verbose=True,
                                trialTypes=['Hit', 'CR'],
                                intervNames=dataDB.get_interval_names())

## 2. Variance

**TODO**
* Plot trial variance relative to temporal variance
* Plot mean with variance together

### Pros/Cons of Baseline Normalization
* DFF-Trial
    - Pos: Removes dynamic baseline changing on the order of trials.
    - Pos: Under assumption of signal-free pre-trial interval, baseline removal enhances relative change in significant activity during trial.
    - Neg: In presence of correlation between pre-trial interval and trial signals, this procedure destroys information during trial.

* DFF-Session vs ZScore-Session
    - Both linear transforms
    - Mean is more meaningful for DFF if pre-trial interval is at least somewhat stable
    - Va

In [None]:
calc_metric_mouse_delay(dataDB, mc, ds, 'varmean', 's', 'time', verbose=False, minTrials=30, haveDelay=True,
                  skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictTime)

In [None]:
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'varmean', 'time', verbose=False, haveTimeLabels=True,
                                 ylim=[0,None], xFunc=lambda m, l: dataDB.get_times())

### Session

In [None]:
calc_metric_session(dataDB, mc, ds, 'varmean', '', 'session', verbose=False, minTrials=30,
                    skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictSession)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'varmean', 'session', verbose=False, xFunc=None, haveRegression=False)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'varmean', 'session',
                                verbose=True,
                                trialTypes=['Hit', 'CR'],
                                intervNames=['PRE', 'DEL', 'TEX', 'REW'])

### Test variance across channels for interesting interval

* Average signal over texture presentation interval
* Compute variance over trials for each channel
* Compare channels

In [None]:
calc_metric_mouse(dataDB, mc, ds, 'varmean', 'p', 'channel', verbose=False, minTrials=30,
                  skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDict)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'channel', yscale='log', verbose=False) # ylim=[0.005,2], dropCols=['cropTime']

# 3. Effective Rank

### ByTime

In [None]:
calc_metric_mouse_delay(dataDB, mc, ds, 'rank_effective', 's', 'time', verbose=False, minTrials=50,
                        metricSettings={'allowBadData': True}, haveDelay=True, skipExisting=False,
                        exclQueryLst=exclQueryLst, **argSweepDictTime)

In [None]:
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'rank_effective', 'time', verbose=False, haveTimeLabels=True,
                                 ylim=[1,None], xFunc=lambda m, l: dataDB.get_times())

### BySession

In [None]:
calc_metric_session(dataDB, mc, ds, 'rank_effective', '', 'session',
                    verbose=False, minTrials=30,
                    skipExisting=False,
                    exclQueryLst=exclQueryLst,
                    **argSweepDictSession)  # trialTypeNames=[None, 'Hit', 'CR'],

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'rank_effective', 'session', ylim=[1, None], verbose=False,
                                 xFunc=None, haveRegression=False)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'rank_effective', 'session',
                                verbose=True, trialTypes=['Hit', 'CR'],
                                intervNames=dataDB.get_interval_names())

# Total Correlation

In [None]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 'sp', 'time-channel', verbose=False)
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 's', 'time', verbose=False)

bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_TC", 's', 'time', verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(dataDB, ds, 'avg_TC', 'time', verbose=True) # ylim=[1,48]

In [None]:
bulk_metrics.plot_TC(dataDB, ds, ylim=None, yscale=None, verbose=True)

### Cleaning up

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(ds.list_dsets_pd().sort_values(by='datetime'))

In [None]:
ds.delete_by_query(queryDict={"metric" : "rank_effective"}, timestr="2020-11-20 18:00:00")