In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db import DataFCDatabase
from lib.sych.plot_helper import imshow_dataset_by_mouse, imshow_dataset_by_session
from lib.sych.metric_helper import metric_by_session

%load_ext autoreload
%autoreload 2

In [None]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/neuro'
params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yaro/neuronal'

#params['root_path_paw']   = gui_fpath("Path to Paw data",         params['root_path_data'])
#params['root_path_lick']  = gui_fpath("Path to Lick data",        params['root_path_paw'])
#params['root_path_whisk'] = gui_fpath("Path to Whisk Data",       params['root_path_lick'])
# params['root_path_te']    = gui_fpath("Select TE results folder", params['root_path_data'])

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
dataDB.read_te_files()
dataDB.read_neuro_files()
dataDB.read_resample_paw_files()
dataDB.read_resample_lick_files()   # Currently does not work. Ask Yaro if we need it
dataDB.read_resample_whisk_files()

In [None]:
pTHR = 0.7
dataDB.mark_days_expert_naive(pTHR)

# Exploratory Analysis

* Time/Frequency Analysis
    * SNR - via AC1
    * PSD
    * Recurrence Plot?
* Activity - Testing if one region is more active than another (raw data)
    * Mean
    * Variance
    * Entropy
* Temporal structure
    * Individual temporal specificity (regions more active later vs earlier)
        - Test activity for each timestep vs cycle null model, plot significant bins
    * Individual temporal stability (how consistent is the activity over trials)
        - Test activity for each timestep vs cycle null model, plot -log(p) for ranksum
* Heterogeneous decompositions - Auto-identify across sessions, trials, time, channels
    * TCA

In [None]:
ds = DataStorage('sych_result_individual_region.h5')

In [None]:
mc = MetricCalculator(serial=True, verbose=False)

# 1. Frequency properties
## 1.1 SNR via AutoCorrelation

**Plan:**
* AutoCorr[1] is a number that is close to 1 if data is changing slowly, and close to zero if data changes very fast.

In [None]:
for iMouse, mousename in enumerate(sorted(dataDB.mice)):
    metric_by_session(dataDB, mc, ds, mousename, 'autocorr_d1', 'p')

In [None]:
imshow_dataset_by_mouse(dataDB, ds, 'autocorr_d1', limits=[0,1])

## 1.2 PSD-based SNR [TODO]

In [None]:
dataThis = dataDB.dataNeuronal[5]
# stdThis = np.std(dataThis)
nTrial, nTime, nChannel = dataThis.shape

def weirdpsd(f, psd, tau):
    return psd * (1 + (f*tau)**2)

def transfunc(dff):
    return dff / (1.5 - dff)


psdMuLst = []
for iChannel in range(nChannel):
    psdLogLst = []
    for iTrial in range(nTrial):
        dataTrial = np.copy(dataThis[iTrial, :, iChannel])# / stdThis
#         dataTrial /= np.std(dataTrial)
    #     dataTrial = np.random.normal(0, 1, nTime)
    #     dataTrial = transfunc(dataTrial)

        f, psd = periodogram(dataTrial, fs=20)
    #     psd = weirdpsd(f, psd, 0.2)
        psdLogLst += [np.log10(psd[1:])]

    f = f[1:]
    psdMu = np.mean(psdLogLst, axis=0)
#     psdStd = np.std(psdLogLst, axis=0)
    psdMuLst += [psdMu]
#     plt.fill_between(f, psdMu-psdStd, psdMu+psdStd, alpha=0.2)

psdMuAvg = np.mean(psdMuLst, axis=0)

plt.figure()
for iChannel in range(nChannel):
    plt.plot(f, psdMuLst[iChannel] - psdMuAvg)

plt.show()

# 2. Activity

## 2.1 Variance-based measures

In [None]:
for metricName in ['mean', 'std', 'avg_entropy']:
    print(metricName)
    for iMouse, mousename in enumerate(sorted(dataDB.mice)):
        metric_by_session(dataDB, mc, ds, mousename, metricName, 'p')

In [None]:
imshow_dataset_by_mouse(dataDB, ds, 'mean')

In [None]:
imshow_dataset_by_mouse(dataDB, ds, 'std')

In [None]:
imshow_dataset_by_mouse(dataDB, ds, 'avg_entropy')

# 3. Temporal Structure
## 3.1 Mean vs Time

In [None]:
ds.delete_by_query({'name' : 'mean_time_zscore'})

In [None]:
for iMouse, mousename in enumerate(sorted(dataDB.mice)):
    metric_by_session(dataDB, mc, ds, mousename, 'mean', 'ps',
                      dataName='mean_time_zscore',
                      cropTime=200, zscoreDim='rs')

In [None]:
imshow_dataset_by_session(dataDB, ds, 'mean_time_zscore', limits=None, fig1size=(12, 2), colBased=False, aspect='auto')

## 3.2 Significance vs Time

In [None]:
# Metric defined by user
def temporal_significance(data, settings):
    assert data.ndim == 3
    assert data.shape[1] == 1
    
    data2D = np.copy(data[:, 0])
    nTrial, nTime = data2D.shape
    
    dataTmp = data2D - np.mean(data2D)
    
    rez = np.zeros(nTime)
    for iTime in range(nTime):
        T, pval = wilcoxon(dataTmp[:, iTime])
#         rez[iTime] = np.sign(np.mean(dataTmp[:, iTime])) * int(pval < 0.01 / nTime)
        rez[iTime] = np.sign(np.mean(dataTmp[:, iTime])) * (-np.log10(pval))
    return rez

In [None]:
for iMouse, mousename in enumerate(sorted(dataDB.mice)):
    metric_by_session(dataDB, mc, ds, mousename, 'generic_metric', 'p',
                      dataName='temporal_significance',
                      metricSettings={"metric" : temporal_significance},
                      cropTime=200
                     )

In [None]:
imshow_dataset_by_session(dataDB, ds, 'temporal_significance', limits=[-10,10], fig1size=(12, 2), colBased=False, aspect='auto')