In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

import statsmodels.api as sm
from statsmodels.formula.api import ols

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath

from lib.sych.data_fc_db_raw import DataFCDatabase
from lib.analysis.channel_similarity import linear_fit_correlation, correlation_by_session

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
# params['root_path_data'] = gui_fpath('h5path', './')

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
print(dataDB.mice)
print(dataDB.dataTypes)
print(dataDB.trialTypeNames)

# Channel Similarity

* Study effective rank as function of session/phase
* Are some channels effectively indistinguishable 
    * High correlation
    * $I(x: \Delta y) = 0$
    * $\Delta y$ is uncorrelated to behaviour/other channels
* Are orthogonal channel clusters (e.g. PCA) meaningful?
    * Orthogonal w.r.t phase/trialType specificity?

# 0. Plot Correlations

* Apparent clustiering:
    - most channels are globally correlated
    - few channels form smaller clusters, frequently anti-correlated to other channels
    - some channels uncorrelated to anything
    - some variance among all parameters (sessions, phases, trialTypes)
* Could try identifying clusters, cross-correlating

**TODO**:
* Check that these results are consistent with correlation violins we did before.
    - In particular, do we see as much negative correlation there?
* Why is there strong negative correlation bn_session?? Check code for bugs
* Double-check we use only Yaro-selected sessions here. In particular, that they don't have optogenetics
* Try trial-based autocorrelation at some point

In [None]:
intervDict = {
    "TEX" : [3, 3.5],
    "REW" : [6, 6.5]
}
trialTypesTrg = {'iGO', 'iNOGO'}

In [None]:
correlation_by_session(dataDB, 'bn_session', intervDict, trialTypesTrg)

# 1. Testing linear fitness

1. For every channel $A$
    1. For every channel $x_i$ except A, $\Delta x_i = x_i - LinearFit(x_i, x_A)$
    2. Compute $c_{ij} = corr(\Delta x_i, \Delta x_j)$
    3. For every $i$, test Null hypothesis $c_{i, :} = 0$
    
**Results:**
* Very high variance across sessions/phases/trialTypes
* Results are asymmetric, which is expected - if two channels are not very similar, fitting one to another is not gonna change much
* Most low correlations are in vertical lines, implying that the channels were already very weakly correlated to the rest, so it is just a reflection of that fact

**Conclusion**:
* This approach not useful at detecting smilarities, as has too many cofoundings variables

In [None]:
linear_fit_correlation(dataDB, 'bn_session', intervDict, trialTypesTrg)