In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

import statsmodels.api as sm
from statsmodels.formula.api import ols

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db_raw import DataFCDatabase

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yaro/neuronal-raw-pooled'
# params['root_path_data'] = gui_fpath('h5path', './')

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

In [None]:
print(dataDB.mice)
print(dataDB.dataTypes)
print(dataDB.trialTypeNames)

# PCA Analysis


Tasks
  * Explained variance by phase/session/mouse/trialType
     * Do not separate phases, its meaningless. Compute PCA for all timesteps, then see proj differences for phases
     * Implement HAC correction

  * Global PCA shifts vs session

Approaches:
  * Eval PCA over all data, select strongest components, plot components as function of cofound
  * Eval PCA for cofounds, compare PCA

In [None]:
from sklearn.decomposition import PCA
from mesostat.visualization.mpl_matrix import imshow


intervDict = {
    "TEX" : [3, 3.5],
    "REW" : [6, 6.5]
}


def calc_plot_pca_alignment_bymouse():
    nMouse = len(dataDB.mice)
    mice = sorted(dataDB.mice)
    
    for datatype in ['bn_trial', 'bn_session']:
        fig1, ax1 = plt.subplots(ncols=2, figsize=(8, 4))
        fig2, ax2 = plt.subplots(nrows=nMouse, ncols=nMouse, figsize=(4*nMouse, 4*nMouse))
        
        compLst = []
        varLst = []
        for mousename in sorted(dataDB.mice):            
            dataRSP = dataDB.get_neuro_data({'mousename' : mousename}, trialType='iGO')
            dataRSP = np.concatenate(dataRSP, axis=0)
            dataRP = np.concatenate(dataRSP, axis=0)   # Use timesteps as samples

            pca = PCA(n_components=dataRP.shape[1])
            pca.fit(dataRP)
            ax1[0].semilogy(pca.explained_variance_ratio_, label=mousename)

            compLst += [np.copy(pca.components_)]
            varLst += [np.sqrt(pca.explained_variance_ratio_)]

        # Compute pca_alignment coefficient
        matAlign = np.zeros((nMouse, nMouse))
        for iMouse in range(nMouse):
            for jMouse in range(nMouse):
                matVar = np.outer(varLst[iMouse], varLst[jMouse])
                matComp = np.abs(compLst[iMouse].dot(compLst[jMouse].T))
                matTot = matVar * matComp**2
                matAlign[iMouse, jMouse] = np.sum(matTot)
                
                print(np.sum(matComp**2))
                
                ax2[iMouse, jMouse].imshow(matComp, vmin=0, vmax=1)
                
        ax1[0].set_xlabel('PCA')
        ax1[0].set_ylabel('Explained Variance')

        imshow(fig1, ax1[1], matAlign, limits=[0,1], title='PCA alignment',
               xTicks=mice, yTicks=mice, haveColorBar=True, cmap='jet')

        ax1[0].legend()
        plt.show()

#             for session in dataDB.get_sessions(mousename):


def calc_plot_pca_exp_var():
    for datatype in ['bn_trial', 'bn_session']:
        for mousename in sorted(dataDB.mice):
            fig, ax = plt.subplots(ncols=2, figsize=(8, 4))
            ax[0].set_title(mousename + '_' + datatype)
            
            compLst = []
            varLst = []
            
            for intervName, interv in intervDict.items():
                dataRSP = dataDB.get_neuro_data({'mousename' : mousename}, cropTime=interv, trialType='iGO')
                dataRSP = np.concatenate(dataRSP, axis=0)
                dataRP = np.mean(dataRSP, axis=1)

                pca = PCA(n_components=dataRP.shape[1])
                pca.fit(dataRP)
                ax[0].semilogy(pca.explained_variance_ratio_, label=intervName)
                
                compLst += [np.copy(pca.components_)]
                varLst += [np.copy(pca.explained_variance_ratio_)]
        
                '''
                    TODO: How to use PCA of X to compute explained variance for Y ???
                '''

            # 
            matVar = np.outer(varLst[0], varLst[1])
            matComp = np.abs(compLst[0].dot(compLst[1].T))
            matTot = matVar * matComp
            print(np.sum(matTot))
            
            img = ax[1].imshow(matComp, vmin=0, vmax=1)
            imshow_add_color_bar(fig, ax[1], img)
            ax[0].legend()
            plt.show()

#             for session in dataDB.get_sessions(mousename):
                
                
# calc_plot_pca_exp_var()
calc_plot_pca_alignment_bymouse()