In [1]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.metric.metric import MetricCalculator
from mesostat.utils.qt_helper import gui_fname, gui_fnames, gui_fpath
from mesostat.utils.hdf5_io import DataStorage

from lib.sych.data_fc_db_raw import DataFCDatabase
import lib.analysis.pid as pid

%load_ext autoreload
%autoreload 2

Appended root directory /home/alyosha/work/git/pub-2020-exploratory-analysis


In [2]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
params = {}
# params['root_path_data'] = './'
params['root_path_data'] = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed'
# params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yaro/neuronal-raw-pooled'
# params['root_path_data'] = gui_fpath('h5path', './')

In [3]:
dataDB = DataFCDatabase(params)

Searching for data files
Extracting trial type names
Extracting data types


In [4]:
dataDB.mice

{'mvg_4', 'mvg_7', 'mvg_8', 'mvg_9'}

In [5]:
h5outname = 'sych_result_higher_order_df.h5'

In [6]:
mc = MetricCalculator(serial=False, verbose=False, nCore=4)

# TODO
    
Hypothesis - Chain inhibition should increase synergy
* Cpu -> iGP/GP/eGP -> VM/VL

Performance-depencence
* Session-wise changes of redundancy/synergy as function of performance
* Movement-correlations of synergy/redundancy
    - Lick
    - Integral movement

# Hypotheses

In [7]:
intervDict = {
    "TEX" : [3, 3.5],
    "REW" : [6, 6.5]
}

hypothesesDict = {
    # Feedforwards Prefrontal
    "H1_TEX"  : ("TEX", ['M1_l', 'S1_bf'], ['PrL', 'LO', 'VO', 'M2', 'Cg1']),
    "H1a_TEX" : ("TEX", ['S1_bf', 'S2'],   ['PrL', 'LO', 'VO', 'M2', 'Cg1']),
    "H1b_TEX" : ("TEX", ['M1_l', 'M2'],    ['PrL', 'LO', 'VO', 'Cg1']),  # Drop M2 because its a source

    # High order is Sensory/Motor Thalamus
    # Test if (M1, S1) has more synergy than (M1, M2) or (S1, S2)
    "H2_TEX"  : ("TEX", ['M1_l', 'S1_bf'], ['Po', 'VM']),
    "H2a_TEX" : ("TEX", ['S1_bf', 'S2'],   ['Po', 'VM']),
    "H2b_TEX" : ("TEX", ['M1_l', 'M2'],    ['Po', 'VM']),
    
    # Thalamus as source
    "H3_TEX"  : ("TEX", ['Po', 'VPM'],     ['S1_bf', 'S2']),
    
    # Motor Thalamus synchronization
    "H4_TEX"  : ("TEX", ['VM', 'VL', 'LDVL'],      ['M1_l', 'M2']),
}

In [8]:
pid.hypotheses_calc_pid(dataDB, mc, hypothesesDict, intervDict, h5outname, nDropPCA=1,
                        datatypes=['bn_session', 'bn_trial'], trialType='iGO', performance='expert')

H1_TEX
H1a_TEX
H1b_TEX
H2_TEX
H2a_TEX
H2b_TEX
H3_TEX
H4_TEX
H1_TEX
H1a_TEX
H1b_TEX
H2_TEX
H2a_TEX
H2b_TEX
H3_TEX
H4_TEX


In [9]:
pid.hypotheses_plot_pid(dataDB, hypothesesDict, h5outname, datatypes=['bn_session'])

H1_TEX
H1a_TEX
H1b_TEX
H2_TEX
H2a_TEX
H2b_TEX
H3_TEX
H4_TEX


In [10]:
pid.hypotheses_calc_plot_info3D(dataDB, hypothesesDict, intervDict,
                                nBin=4, datatypes=['bn_session'], trialType='iGO', performance='expert')

H1_TEX
H1a_TEX
H1b_TEX
H2_TEX
H2a_TEX
H2b_TEX
H3_TEX
H4_TEX


# All-Distribution

**TODO**:
* [] Drop shitty sessions
* [] Try composite p-values
* [] For Info3D, drop PCA1
* [] Consider re-doing analysis with PCA1 dropped
* Fraction Significant triplets per session
    * [+] Do regression on PID instead of Naive vs Expert
    * [ ] Binomial test fraction significant PID's above chance
    * [ ] Test if regression explained by having more iGO trials in expert
* Most significant triplets
    * [ ] Fraction Significant Sessions bymouse
        - For each triplet, find fraction of sessions it is significant in
        - Sorted 1D plot
        - Drop non-significant
        - Extract labels for top 10
    * [ ] Fraction Significant Sessions concat
        - Same as above, but for all mice
        - Sort by all mice
        - Concat barplot/1Dplot, mice with different colors
    * [ ] Plot pvalue vs performance for top10 sessions

In [12]:
# tmp_path = root_path_data if 'root_path_data' in locals() else "./"
pwdAllH5 = '/media/alyosha/Data/TE_data/yarodata/sych_preprocessed/sych_result_multiregional_pid_df3.h5'
# pwdAllH5 = '/media/aleksejs/DataHDD/work/data/yaro/pid/sych_result_multiregional_pid_df3.h5'
# pwdAllH5 = gui_fname('h5path', './', '(*.h5)')

In [None]:
pid.plot_all_results_distribution(dataDB, pwdAllH5, plotstyle='cdf', minTrials=50)

In [None]:
pid.plot_all_frac_significant_bysession(dataDB, pwdAllH5, minTrials=50)

In [None]:
# TODO: Linear fit + pval(H0: alpha=0)
pid.plot_all_frac_significant_performance_scatter(dataDB, pwdAllH5, minTrials=50)

TODO:
* Top 10 most synergetic connections
    - Try magnitude vs avg p-value vs fraction significant
    - Plot colorbars to show fraction of sessions by mouse
* Top 10 most synergy-involved regions
    - Count fraction of significant triplets where this region is target

In [None]:
pid.plot_all_top_n_frac_significant(dataDB, pwdAllH5, nTop=20, minTrials=50,
                                    haveTriplet=True, haveSinglet=False, trgPlotList=['VPL'])

In [88]:
hDict = {
    "H_ALL_REW"  : ("REW", ['LPAG', 'Cpu_1'], ['VPL'])
}

pid.hypotheses_calc_plot_info3D(dataDB, hDict, intervDict, performance='expert',
                                datatypes=['bn_session', 'bn_trial'], trialType='iGO')

H_ALL_REW
H_ALL_REW


## Suppl

In [None]:
np.array(dataDB.get_channel_labels('mvg_4'))