In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

import statsmodels.api as sm
from statsmodels.formula.api import ols

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage
from mesostat.stat.connectomics import offdiag_1D

from lib.sych.data_fc_db_raw import DataFCDatabase
from lib.analysis import model_comparison

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
# params['root_path_data'] = gui_fpath('h5path', './')

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
ds = DataStorage('sych_result_fc.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

In [None]:
print(dataDB.mice)
print(dataDB.dataTypes)
print(dataDB.trialTypeNames)

# Functional Connectivity Model Comparison

## 1. Coactivity

### 1.1 Linear coactivity

* Test if pairwise correlation among channels is significant

**Results**:
* P-values (true for both baseline normalizations)
    * Even with Bonferroni correction, at least $80\%$ of channel pairs are significantly correlated.
    * Further, $99.9\%$ of all channel pairs have p-value below $0.01$, and are only considered insignificant because 
* Correlations:
    * For session-based normalization, most correlations are in 0.7-1.0 range. Significant negative correlations exist but are very few. Almost no variance across mice/trialTypes/phases
    * For trial-based normalization most correlations are within 0.25-1.0 range, which is a little more healthy. There is some significant variance across mice/trialTypes/phases. However, still, almost all pairs are significantly correlated, which is bad

In [None]:
intervDict = {
    "TEX" : [-2, 0],
    "TEX" : [3, 3.5],
    "REW" : [6, 6.5]
}

In [None]:
resultsDictCorr = model_comparison.corr_evaluation(dataDB, mc, intervDict, 'corr', 'raw',
                                                   trialTypes={'iGO', 'iNOGO'})

In [None]:
model_comparison.plot_fc_explore(resultsDictCorr['corr'], resultsDictCorr['pval'], 'corr', withBonferroni=True)

### 1.2 Monotonic coactivity

* Test if Spearmann Rank between channels is significant
* Test if Spearmann Rank between channels is more significant than correlation

**Results**:
* No obvious improvement of Spr over Corr.
* Possibly can be blaimed on data, as it is oversync

In [None]:
resultsDictSpr = model_comparison.corr_evaluation(dataDB, mc, intervDict, 'spr', 'bn_session')

In [None]:
model_comparison.plot_fc_explore(resultsDictSpr['corr'], resultsDictSpr['pval'], 'spr', withBonferroni=True)

In [None]:
model_comparison.empirical_corr_spr(resultsDictCorr, resultsDictSpr)

### 1.3 BivariateMI

In [None]:
ds = DataStorage('extern/sych_result_multiregional_df.h5')

In [None]:
# Naive plots
dataRows = ds.list_dsets_pd()

for idx, row in dataRows.iterrows():
    key = '_'.join(row[['mousename','datatype', 'name']])
    data = ds.get_data(row['dset'])
    
    for iSession, dataSession in enumerate(data):
        keyS = key + '_' + str(iSession)
        
        plt.figure()
        plt.imshow(dataSession[0])
        plt.colorbar()
        plt.savefig('MI_' + keyS + '.png')
        plt.close()
        
        plt.figure()
        plt.imshow(-np.log10(dataSession[2]))
        plt.colorbar()
        plt.savefig('pVal_' + keyS + '.png')
        plt.close()

In [None]:
dataRows = ds.list_dsets_pd()

for mousename in sorted(set(dataRows['mousename'])):
    print(mousename)
    rowsMouse = dataRows[dataRows['mousename'] == mousename]
    
    rezMI = {}
    rezP = {}
    for idx, row in rowsMouse.iterrows():
        key = '_'.join(row[['datatype', 'name']])
        data = ds.get_data(row['dset'])

        MIs = np.hstack([offdiag_1D(d[0]) for d in data])
        pVals = np.hstack([offdiag_1D(d[2]) for d in data])

        MIs[np.isnan(MIs)] = 0
        pVals[np.isnan(pVals)] = 1

        rezMI[key] = MIs
        rezP[key] = pVals
        
    model_comparison.plot_fc_explore(rezMI, rezP, 'MI')

### 1.4. Model comparison for coactivity

* Compare Correlation, Spr, MI
    - Compare significances (p-values)
    - Find metric of effect size, invariant of number of measurements, compare
    - Report on differences between p-value-based vs effect-size-based FC

## 2. Model-based analysis of coactivity function

* Exploring functional relation of coactivity
    * Plot phase-space for random selection of channel pairs
    * Comment if on any evidence of bifurcations
    * Perform model comparison for fitness by different order polynomials (e.g. AIC)
    * Comment on uniformity of distribution, test uniformification
* Repeat for 3D pairings
* Repeat for single future timestep

**Observations**:
* Channels appear to be highly correlated
* Channels are more correlated fir bn_session than nb_trial. However, it is unclear if the differences are due to a shifting baseline
    - Hypothesis 1: Additive baseline
    - Hypothesis 2: Multiplicative baseline

In [None]:
sessions = dataDB.get_sessions('mvg_4')
data = dataDB.get_neuro_data({'session' : sessions[-2]}, datatype='bn_session', trialType='iGO')[0]

In [None]:
dataTr = np.mean(data, axis=1)

plt.figure()
plt.plot(dataTr[:, 0], dataTr[:, 1], '.')
plt.show()

np.corrcoef(dataTr[:, 0], dataTr[:, 1])[0, 1]

In [None]:
%matplotlib inline
nTrial, nTime, nChannel = data.shape

ch1, ch2 = np.random.randint(0, nChannel, 2)
print(ch1, ch2)

plt.figure()
for i in range(data.shape[0]):
    plt.plot(data[i, :, ch1], data[i, :, ch2])
plt.show()

In [None]:
from mesostat.visualization.mpl_matrix import imshow

def phase_space_occupancy(fig, ax, x2D, y2D, nbin=20):
    xmax = np.max(x2D)*1.0001
    xmin = np.min(x2D)
    ymax = np.max(y2D)*1.0001
    ymin = np.min(y2D)
    dx = (xmax - xmin) / nbin
    dy = (ymax - ymin) / nbin
    
    rezArr = np.zeros((nbin, nbin))
    
    for x, y in zip(x2D, y2D):
        idxsX = ((x - xmin) / dx).astype(int)
        idxsY = ((y - ymin) / dy).astype(int)
        
        rezTmp = np.zeros((nbin, nbin))
        rezTmp[idxsX, idxsY] += 1
        rezArr += (rezTmp > 0).astype(int)
        
    rezArr /= len(x2D)
    imshow(fig, ax, rezArr, haveColorBar=True, haveTicks=True)
        

ch1, ch2 = np.random.randint(0, nChannel, 2)
print(ch1, ch2)

fig, ax = plt.subplots()
phase_space_occupancy(fig, ax, data[:, :, ch1], data[:, :, ch2], nbin=50)

plt.show()

In [None]:
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection

def phase_space_boxes(fig, ax, x2D, y2D, nbin=20):
    rects = []
    for x, y in zip(x2D, y2D):
        xmax = np.max(x)
        xmin = np.min(x)
        ymax = np.max(y)
        ymin = np.min(y)
        
        rects += [Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, alpha=1, facecolor='none')]

    
    colors = np.linspace(0, 1, len(rects))
    pc = PatchCollection(rects, facecolor='none', cmap=plt.cm.hsv, alpha=0.5)#, edgecolor=edgecolor)
    pc.set_array(np.array(colors))
    ax.add_collection(pc)
    
    
    ax.autoscale()

ch1, ch2 = np.random.randint(0, nChannel, 2)
print(ch1, ch2)

fig, ax = plt.subplots()
phase_space_boxes(fig, ax, data[:, :, ch1], data[:, :, ch2], nbin=50)

plt.show()

## 3. Model comparison of directed functional connectivity

* Compare MAR, Gau-TE, GLM, DCM (average over phase time)
* Compare directed and undirected connectivity

## 4. Directed and undirected links to behaviour