In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

import statsmodels.api as sm
from statsmodels.formula.api import ols

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage
from mesostat.stat.anova import as_pandas, as_pandas_lst, anova_homebrew

from lib.sych.data_fc_db_raw import DataFCDatabase
from lib.sych.plot_helper import imshow_dataset_by_mouse, imshow_dataset_by_session
from lib.sych.metric_helper import metric_by_session

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
params['root_path_data'] = gui_fpath('h5path', './')

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
ds = DataStorage('sych_result_individual_region.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

In [None]:
print(dataDB.mice)
print(dataDB.dataTypes)
print(dataDB.trialTypeNames)

In [None]:
dataDB.get_channel_labels()

# 1. Exploratory Study

Explore activity across sessions, phases and channels

**TODO:**
* Append pre-trial phase to dataDB, adjust all methods
* Add loop over phases. Trial-average is not significant here

## 1.1 Mean and Variance

    mvg_4_2017_11_10_a - 14 - High start                   - Natural bad start -> Ord up to 15 helps
    mvg_4_2017_11_10_a - 33 - Low start then jump          - Natural bad start -> Ord up to 15 helps
    mvg_4_2017_11_20_a - 41 - Very Negative.               - Has impulses -> Selected-trial-specific helps
    mvg_4_2017_11_21_a - Multiple channels very negative   - Has impulses -> Selected-trial-specific helps
    mvg_4_2017_11_22_a - Multiple channels very negative   - Has impulses -> Selected-trial-specific helps
    mvg_4_2017_11_23_a - Multiple channels very negative   - Has impulses -> Selected-trial-specific helps
    mvg_4_2017_11_24_a - Multiple channels very negative   - Has impulses -> Selected-trial-specific helps
    
    mvg_7_2018_11_14_a - 41 - High start                   - Quadratic not good enough -> Ord up to 15 helps
    mvg_7- All         - 21 - Weirc clacking                           - Drop channel
    
    mvg_8_2018_11_17_a - 21 - Weird trial-level periodicity            - Drop channel
        - Yaro will look
    mvg_8_2018_11_23_a - 20 - Huge experimental dip in the beginning   - Drop channel. Later crop trials after dip
    
    mvg_9_2019_02_26_a - Weird jump all channels @ trial--470          - Drop session. Later crop trials before jump
    
**Solutions**:
1. Drop channel 21 completely for mouse 7 and 8
2. [Done] Crop `mvg_9_2019_02_26_a` to [:470]
3. [Done] Fix `mvg_8_2018_11_23_a` Manually

In [None]:
for datatype in dataDB.get_data_types():
    for iMouse, mousename in enumerate(sorted(dataDB.mice)):
        for session in dataDB.get_sessions(mousename):
            dataThis = dataDB.get_neuro_data({'session': session}, datatype=datatype)[0]
            
            plt.figure()
            plt.plot(np.mean(dataThis, axis=1))
            plt.title('_'.join([datatype, session]))
            plt.show()

In [None]:
for metricName in ['mean', 'std']:#, 'avg_entropy']:
    for iMouse, mousename in enumerate(sorted(dataDB.mice)):
        for datatype in dataDB.get_data_types():
            for trialType in dataDB.get_trial_type_names():
                dataName = '_'.join([metricName, datatype, trialType])
                print(dataName)
                metric_by_session(dataDB, mc, ds, mousename, metricName, 'p',
                                  datatype=datatype, dataName=dataName, trialType=trialType)

In [None]:
for metricName in ['mean', 'std']:#, 'avg_entropy']:
    for datatype in dataDB.get_data_types():
        for trialType in dataDB.get_trial_type_names():
            dataName = '_'.join([metricName, datatype, trialType])
            imshow_dataset_by_mouse(dataDB, ds, dataName, dropX=21)

## 1.2 Temporal Mean

In [None]:
metricName = 'mean'
for iMouse, mousename in enumerate(sorted(dataDB.mice)):
    for datatype in dataDB.get_data_types():
        dataName = metricName + '_time_' + datatype
        if datatype == 'raw':
            zscoreDim='rs'
        else:
            zscoreDim=None
        
        metric_by_session(dataDB, mc, ds, mousename, metricName, 'ps',
                          datatype=datatype,
                          dataName=dataName,
                          zscoreDim=zscoreDim)

In [None]:
metricName = 'mean'
for datatype in dataDB.get_data_types(mousename):
    dataName = metricName + '_time_' + datatype
    print(dataName)
    
    imshow_dataset_by_session(dataDB, ds, dataName, limits=None,
                              fig1size=(12, 2), colBased=False, aspect='auto')

# 2. Phase and TrialType Specificity

**Significance**
* 1D Test if phases (PRE, TEX, REW) are different
* 1D Test if trialTypes are different in each phase
* Hierarchical test if TEX and REW are better at discriminating trialType than PRE
* Hierarchical test if bn_trial is better than bn_session at discriminating trialType

**Effect size**
* Compute scatter of pval vs effect size
* Check if there is a large range of effect sizes for significant tests
* Comment if effect sizes meaningful