In [9]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db_raw import DataFCDatabase
from lib.common.metric_helper import calc_metric_mouse, calc_metric_session, calc_metric_mouse_delay
import lib.analysis.bulk_metrics as bulk_metrics

%load_ext autoreload
%autoreload 2

Appended root directory /home/alyosha/work/git/pub-2020-exploratory-analysis
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
#params['root_path_data'] = gui_fpath('h5path', './')

In [3]:
dataDB = DataFCDatabase(params)

Searching for data files
Extracting trial type names
Extracting data types
Reading area color map


In [4]:
ds = DataStorage('sych_result_bulk_metrics.h5')

In [5]:
mc = MetricCalculator(serial=True, verbose=False)

# Bulk Metrics

In [10]:
argSweepDictTime = {
    'trialType': 'auto',
    'performance': 'auto',
    'datatype': ['bn_trial', 'bn_session']
}

argSweepDictSession = {
    'trialType': 'auto',
    'intervName': ["PRE", "TEX", "REW", "AVG"],
    'datatype': ['bn_trial', 'bn_session']
}

exclQueryLst = [
    {'datatype' : 'bn_trial', 'intervName' : 'PRE'}  # Baseline normalized
]

## 1. Mean
### 1.1. Mean vs time

In [7]:
df = ds.list_dsets_pd()
df

Unnamed: 0,datatype,datetime,dset,metric,mousename,name,performance,shape,target_dim,trialType,zscoreDim,cropTime
0,bn_session,2021-04-29 10:21:19,dset1416,mean,mvg_8,time,,"(200,)","(timesteps,)",,,
1,bn_session,2021-04-29 10:21:20,dset1417,mean,mvg_8,time,,"(200,)","(timesteps,)",iFA,,
2,bn_session,2021-04-29 10:21:20,dset1418,mean,mvg_8,time,,"(200,)","(timesteps,)",iMISS,,
3,bn_session,2021-04-29 10:21:20,dset1419,mean,mvg_8,time,,"(200,)","(timesteps,)",iGO,,
4,bn_session,2021-04-29 10:21:21,dset1420,mean,mvg_8,time,,"(200,)","(timesteps,)",iNOGO,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1876,raw,2021-04-29 12:25:32,dset3292,rank_effective,mvg_9,session,expert,"(5,)","(sessions,)",,rs,AVG
1877,raw,2021-04-29 12:25:33,dset3293,rank_effective,mvg_9,session,expert,"(5,)","(sessions,)",iFA,rs,AVG
1878,raw,2021-04-29 12:25:34,dset3294,rank_effective,mvg_9,session,expert,"(5,)","(sessions,)",iMISS,rs,AVG
1879,raw,2021-04-29 12:25:34,dset3295,rank_effective,mvg_9,session,expert,"(5,)","(sessions,)",iGO,rs,AVG


* **FIX (,trialtype)**
* **Add None too**

In [8]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'mean', 's', 'time', verbose=False, minTrials=10,
#                                trialTypeNames='auto', perfNames='auto', skipExisting=False, dropChannels=[21])

DROP_CHANNELS???

calc_metric_mouse_delay(dataDB, mc, ds, 'mean', 's', 'time', verbose=False, minTrials=10, haveDelay=True,
                        skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictTime)

AttributeError: module 'lib.analysis.bulk_metrics' has no attribute 'metric_mouse_bulk'

In [None]:
# bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False, xFunc=lambda m, l: dataDB.get_times())
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'mean', 'time', verbose=False, haveTimeLabels=False,
                                 xFunc=lambda m, l: dataDB.get_times())

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False, xFunc=lambda m, l: dataDB.get_times())

### 1.2. Mean vs session

In [None]:
for intervName in intervNames:
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "mean", 'session', skipExisting=False,
                                              minTrials=10, dropChannels=[21], verbose=False, 
                                              trialTypeNames='auto', perfNames=[None, 'naive', 'expert'],
                                              intervName=intervName)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'mean', 'session', verbose=False, xlim=[0, 1],
                              xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

### 1.3 Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'mean', 'session', verbose=True, trialTypes=['iGO', 'iNOGO'])

## 2. Variance

**TODO**
* Plot trial variance relative to temporal variance
* Plot mean with variance together

### Pros/Cons of Baseline Normalization
* DFF-Trial
    - Pos: Removes dynamic baseline changing on the order of trials.
    - Pos: Under assumption of signal-free pre-trial interval, baseline removal enhances relative change in significant activity during trial.
    - Neg: In presence of correlation between pre-trial interval and trial signals, this procedure destroys information during trial.

* DFF-Session vs ZScore-Session
    - Both linear transforms
    - Mean is more meaningful for DFF if pre-trial interval is at least somewhat stable
    - Va

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 's', 'time', verbose=False, minTrials=50,
                               trialTypeNames='auto', perfNames='auto', skipExisting=False)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'time',
                              ylim=[0,None], verbose=False, xFunc=lambda m, l: dataDB.get_times())

### Session

In [None]:
for intervName in intervNames:
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "varmean", 'session', trialTypeNames='auto',
                                              minTrials=50, verbose=False, intervName=intervName,
                                              skipExisting=False)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'varmean', 'session', verbose=False,
                              xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'varmean', 'session', verbose=True, trialTypes=['iGO', 'iNOGO'])

### Test variance across channels for interesting interval

* Average signal over texture presentation interval
* Compute variance over trials for each channel
* Compare channels

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 'p', 'channel',
                               cropTime=('TEX', (3, 3.5)), verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'channel', yscale='log', verbose=False, # ylim=[0.005,2],
                              dropCols=['cropTime'])

# Effective Rank

### ByTime

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", 's', 'time', verbose=False, skipExisting=False,
                               minTrials=50, trialTypeNames='auto', perfNames='auto', dropChannels=[21])

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'rank_effective', 'time', ylim=[1, None], verbose=False,
                              xFunc=lambda m, l: dataDB.get_times()) # ylim=[1,48]

### BySession

In [None]:
for intervName in intervNames:
    bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "rank_effective", 'session', skipExisting=True,
                                              minTrials=50, perfNames=[None, 'naive', 'expert'],
                                              dropChannels=[21], trialTypeNames='auto', verbose=False,
                                              intervName=intervName)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'rank_effective', 'session', xlim=[0, 1], ylim=[1, None], verbose=False,
                                 xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'rank_effective', 'session', verbose=True, trialTypes=['iGO', 'iNOGO'])

## Hierarchical

**TODO**:
* Add composite selectors for metric helper
   - trialType = iGO & iNOGO
   - cropTime = TEX & REW

In [None]:
for intervName in dataDB.get_interval_names():
    bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", '', 'flat', verbose=False,
                                   trialTypeNames='auto',
                                   perfNames='auto',
                                   intervName=intervName)

In [None]:
df = ds.list_dsets_pd()
df = df[df['name'] == 'flat']

In [None]:
data = [np.round(ds.get_data(dset), 2) for dset in df['dset']]
df['rez'] = data

In [None]:
df.drop(['datetime', 'zscoreDim', 'dset', 'metric', 'name', 'shape', 'target_dim'], axis=1, inplace=True)

In [None]:
from mesostat.utils.pandas_helper import pd_category_to_column

In [None]:
index = list(set(df.columns) - {'mousename', 'rez'})
dfMouse = df.pivot(index=index, columns='mousename', values='rez').reset_index()

In [None]:
dfMouse[dfMouse['datatype'] == 'bn_trial']

# Total Correlation

In [None]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 'sp', 'time-channel', verbose=False)
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 's', 'time', verbose=False)

bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_TC", 's', 'time', verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(dataDB, ds, 'avg_TC', 'time', verbose=True) # ylim=[1,48]

In [None]:
bulk_metrics.plot_TC(dataDB, ds, ylim=None, yscale=None, verbose=True)

### Cleaning up

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(ds.list_dsets_pd().sort_values(by='datetime'))

In [None]:
ds.delete_by_query(queryDict={"metric" : "rank_effective"}, timestr="2020-11-20 18:00:00")