In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db_raw import DataFCDatabase
import lib.analysis.coactivity as coactivity

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yaro/neuronal-raw-pooled'
# params['root_path_data'] = gui_fpath('h5path', './')

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
ds = DataStorage('sych_result_activity.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

In [None]:
print(dataDB.mice)
print(dataDB.dataTypes)
print(dataDB.trialTypeNames)

In [None]:
intervDict = {
    "PRE" : [-2, 0],
    "TEX" : [3, 3.5],
    "REW" : [6, 6.5]
}

# 1. Significance

## 1.1. Correlation plots


## 1.2 PCA exploration

## 1.3. Highly uncorrelated channels

In [None]:
dataLST = dataDB.get_neuro_data({'mousename' : 'mvg_4'})

In [None]:
from mesostat.utils.signals.filter import zscore, drop_PCA

In [None]:
dataRSP = dataLST[-1]
dataRP = np.mean(dataRSP[:, 60:70], axis=1)
dataRP = drop_PCA(dataRP, 1)
CC = np.corrcoef(dataRP.T)

plt.figure()
plt.imshow(CC, cmap='jet', vmin=-1, vmax=1)
plt.show()

In [None]:
import numpy as np
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.cluster import AffinityPropagation, SpectralClustering, OPTICS

# Compute clustering given distance matrix and distance threshold
def cluster_dist_matrix(M, t, method='hierarchic'):
    if method=='hierarchic':
        distTril = np.tril(M, 1)
        linkageMatrix = linkage(distTril, method='centroid', metric='euclidean', optimal_ordering=True)
        return fcluster(linkageMatrix, t, criterion='maxclust')# - 1  # Original numbering starts at 1 for some reason
#         linkageMatrix = linkage(distTril, method='centroid', metric='euclidean')
#         rez = fcluster(linkageMatrix, t, criterion='distance')
    elif method == 'affinity':
        clustering = AffinityPropagation(affinity='precomputed', damping=t).fit(M)
        rez =  clustering.labels_
    elif method == 'spectral':
        clustering = SpectralClustering(affinity='precomputed', assign_labels="discretize", n_init=100).fit(M)
        rez =  clustering.labels_
    elif method == 'optics':
        clustering = OPTICS(metric='precomputed', min_samples=t).fit(M)
        rez =  clustering.labels_
    else:
        raise ValueError("Unknown method", method)

    # Original numbering may start at something other than 0 for some methods
    rez = np.array(rez, dtype=int)
    return rez - np.min(rez).astype(int)


def cluster_plot(M, clusters):
    idxs = np.argsort(clusters)
    MSort = CC[idxs][:, idxs]
    
    idCluster, nCluster = np.unique(clusters, return_counts=True)
    nClustCum = np.cumsum(nCluster)
    
    plt.figure()
    plt.imshow(CC[idxs][:, idxs], cmap='jet', vmin=-1, vmax=1)
    
    for nLine in nClustCum:
        plt.axvline(x=nLine-0.5, linestyle='--', color='black', alpha=0.3)
        plt.axhline(y=nLine-0.5, linestyle='--', color='black', alpha=0.3)
    plt.show()

In [None]:
clusters = cluster_dist_matrix(np.clip(CC, 0, 1), 0.9, method='affinity')
print(clusters)
cluster_plot(CC, clusters)

In [None]:
coactivity.corr_plot_session_composite(dataDB, mc, intervDict, 'corr', 'bn_session',
                                             trialTypes=['iGO', 'iNOGO'],
                                       performances=['naive', 'expert'],
                                       haveMono=True)

**Drop first PCA and explore result**

In [None]:
coactivity.corr_plot_session_composite(dataDB, mc, intervDict, 'corr', 'bn_trial',
                                             trialTypes={'iGO', 'iNOGO'},
                                       performances=['naive', 'expert'],
                                       haveMono=False,
                                       nDropPCA=1)

# 2. Consistency
## 2.1. PCA consistency over mice
### 2.1.1. Angle-based consistency

Tasks
  * Explained variance by phase/session/mouse/trialType
     * Do not separate phases, its meaningless. Compute PCA for all timesteps, then see proj differences for phases
     * Implement HAC correction

  * Global PCA shifts vs session

Approaches:
  * Eval PCA over all data, select strongest components, plot components as function of cofound
  * Eval PCA for cofounds, compare PCA
  
**Plots**:
* Cosine-squared matrix $C^2_{ij} = (R^{1}_{ik}R^{2}_{jk})^2$, where $R^l$ is the PCA-transform
* Consistency metric $E = e^1_i e^2_j C^2_{ij}$, where $e^l$ are the eigenvalues

**Problem**:
The consistency metric $E$ has all necessary ingredients (angles, eigenvalues), but it is not mathematically clear that it behaves the desired way. Solid theory is required for this metric to be useful.

**Alternative approach**:
Try consistency metric $H(\frac{C^2_{ij}}{N})$. Should be great at measuring the sparsity of basis coupling. The challenge is to include eigenvalue priority into this metric, since spread of weak eigenvalues is not as relevant as spread of strong ones.

In [None]:
coactivity.plot_pca_alignment_bymouse(dataDB, datatype='bn_session', trialType=None)

### 2.1.2. Eigenvalue-based consistency

* Let $x_1$, $x_2$ be some datasets
* Let $R_1$, $R_2$ be the corresponding PCA-transforms 
* Find total variances
    - $V_1 = \sum_i eig_i(x_1) = tr(cov(x_1)) = \sum_i cov_{ii}(x_1)$
    - $V_2 = \sum_i eig_i(x_2) = tr(cov(x_2)) = \sum_i cov_{ii}(x_2)$
* Find explained variances
    - $e_1 = eval(cov(x_1)) = diag(cov(R_1 x_1))$
    - $e_2 = eval(cov(x_2)) = diag(cov(R_2 x_2))$
* Find explained variances using wrong bases
    - $e_{12} = diag(cov(R_2 x_1))$
    - $e_{21} = diag(cov(R_1 x_2))$
* Find representation errors in explained variance ratios
    - $\epsilon_1 = \frac{\sum_i |e^1_i - e^{12}_i|}{2 V_1}$
    - $\epsilon_2 = \frac{\sum_i |e^2_i - e^{21}_i|}{2 V_2}$



* TODO: iter trialType=[hit, cr, all]
* TODO: iter perf=[naive,expert,all]

In [None]:
coactivity.plot_pca_consistency(dataDB, intervDict)

In [None]:
coactivity.plot_pca_consistency(dataDB, intervDict, dropFirst=1)

## 2.2. PCA consistency over phases
### 2.2.1 Angle-based consistency

In [None]:
intervDict = {
    "TEX" : [3, 3.5],
    "REW" : [6, 6.5]
}

coactivity.plot_pca_alignment_byphase(dataDB, intervDict, datatype='bn_trial', trialType=None)