In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

import statsmodels.api as sm
from statsmodels.formula.api import ols

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage
from mesostat.stat.anova import as_pandas, as_pandas_lst, anova_homebrew

from lib.gallerosalas.data_fc_db_aud_raw import DataFCDatabase
from lib.analysis.metric_helper import metric_by_session

%load_ext autoreload
%autoreload 2

In [None]:
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yasirdata_raw/'
params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yasir/yasirdata_aud_raw'

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
ds = DataStorage('gallerosalas_result_individual_region.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

In [None]:
print('mice', dataDB.mice)
print('nSessions', len(dataDB.sessions))
print('datatypes', dataDB.get_data_types())
print('nChannel', dataDB.get_nchannels('mou_5'))

# Analysis of Variance

* Across sessions
    - Explained by performance
* Across channels, trials, timesteps
    - Explained by trial type

Things to understand:
* How to compare different rows?
* What models make sense?
* Try linear mixed models?

In [None]:
#trialTypeNames = dataDB.get_trial_type_names()
trialTypeNames = ['Hit', 'CR']
intervNames = ['AUD', 'DEL', 'REW']

dfDict = {}
for mousename in dataDB.mice:
    sessions = dataDB.get_sessions(mousename)
    dfThis = pd.DataFrame()
    for session in sessions:
        for trialType in trialTypeNames:
            dataTrialLst = []
            for intervName in intervNames:
                if (mousename != 'mou_6') or (intervName != 'REW'):                
                    data = dataDB.get_neuro_data({'session' : session}, datatype='bn_session',
                                                 trialType=trialType, intervName=intervName)[0]
                    data = np.mean(data, axis=1)  # Average over timesteps
                    dataDF = as_pandas(data, ('trials', 'channels'))
                    dataDF['trialType'] = trialType
                    dataDF['interval'] = intervName
                    dataDF['session'] = session
                    dfThis = dfThis.append(dataDF, ignore_index=True)
            
    dfThis = dfThis.drop('trials', axis=1)
    dfDict[mousename] = dfThis

In [None]:
model = '''
    rez ~ C(channels)
    + C(trialType)
    + C(interval)
    + C(session)
    + C(trialType)*C(session)
    + C(trialType)*C(channels)
    + C(interval)*C(channels)
'''

# Session-wide
for mousename in sorted(dataDB.mice):
    print(mousename)
    linModel = ols(model, data=dfDict[mousename]).fit()
    display(sm.stats.anova_lm(linModel, typ=1))

In [None]:
# Across-sessions
nMice = len(dataDB.mice)
fig, ax = plt.subplots(ncols = nMice, figsize=(5*nMice, 5))

model = '''
rez ~ C(channels)+C(trialType)+C(interval)
'''

for iMouse, mousename in enumerate(sorted(dataDB.mice)):
    dfThis = dfDict[mousename]
    print(mousename)
    sessions = dataDB.get_sessions(mousename)
    plotData = []
    for session in sessions:
        dfSession = dfThis[dfThis['session'] == session]
        linModel = ols(model, data=dfSession).fit()
        rezStat = sm.stats.anova_lm(linModel, typ=1)
        rezStat = rezStat.drop('Residual')
        plotData += [np.array(rezStat['mean_sq'])]

    names = ['channels', 'trialType', 'interval']
    plotData = np.array(plotData).T


    ax[iMouse].set_ylabel('mean_sq')
    for name, x in zip(names, plotData):
        ax[iMouse].semilogy(x, label=name)

    ax[iMouse].set_xticks(np.arange(len(sessions)))
    ax[iMouse].set_xticklabels(sessions, rotation=90)
    ax[iMouse].legend()

plt.savefig('pics/ANOVA_by_session.png')
plt.show()
plt.close()