In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
# thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
thispath = os.getcwd()
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.metric.metric import MetricCalculator
from mesostat.utils.qt_helper import gui_fname, gui_fnames, gui_fpath
from mesostat.utils.hdf5_io import DataStorage
from mesostat.utils.pandas_helper import pd_query

from lib.gallerosalas.data_fc_db_aud_raw import DataFCDatabase
import lib.analysis.triplet_analysis.mousewise as mousewise
import lib.analysis.triplet_analysis.calc_reader_mousewise as calc_reader


%load_ext autoreload
%autoreload 2

In [None]:
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
params['root_path_data'] = '/media/alyosha/Data/TE_data/yasirdata_aud_raw/'
# params['root_path_data'] = '/home/alyosha/data/yasirdata_aud_raw/'

In [None]:
dataDB = DataFCDatabase(params)

In [None]:
dataDB.calc_shortest_distances()

In [None]:
print('mice', dataDB.mice)
print('nSessions', len(dataDB.sessions))
print('datatypes', dataDB.get_data_types())
print('nChannel', dataDB.get_nchannels('mou_5'))

In [None]:
h5outname = 'gallerosalas_result_higher_order_df.h5'

In [None]:
mc = MetricCalculator(serial=True, verbose=False, nCore=4)

In [None]:
dataDB.channelAreasDF

## All - Distribution - Nosession

In [None]:
# pwdPrefix = '/home/alyosha/data/yasirdata_aud_raw/pid/'

# pwdAllH5_2 = pwdPrefix + 'gallerosalas_result_multiregional_pid_all_df.h5'
# pwdAllH5_2_Rand = pwdPrefix + 'gallerosalas_result_multiregional_pid_all_df_rand.h5'

prefixPath = '/media/alyosha/Data/TE_data/calculations/pid/yasir-aud/'

pwdH5data = prefixPath + 'pid_gallerosalas_multimouse_nbin_2_data.h5'
pwdH5rand = prefixPath + 'pid_gallerosalas_multimouse_nbin_2_rand.h5'

In [None]:
intervNames = dataDB.get_interval_names()
trialTypes = ['Hit', 'CR', 'Miss', 'FA']

In [None]:
dfSummary = calc_reader.summary_df(pwdH5data)
dfSummaryDataSizes = calc_reader.summary_update_data_sizes(dfSummary, dataDB)

In [None]:
dfSummaryRand = calc_reader.summary_df(pwdH5rand)

### 1. Global Significance Testing

1. For all sweep get data size
2. Inside test, generate AdversaryDist for each data size, save as file
3. Load file as dict, provide as argument to test
4. In test, produce dataframe: Sweep -> (frac>Shuffle, pValSuffle, frac>Conserv, pValConserv)
5. Plot table

In [None]:
pwdAdversarial = '/media/alyosha/Data/TE_data/pid_rand_dist.h5'

In [None]:
dictAdversarial = calc_reader.read_adversarial_distr_file(pwdAdversarial)

In [None]:
rezTestShuffle = mousewise.plot_violin_test(pwdH5data, pwdH5rand, dfSummary, dfSummaryRand, thrBig=0.2)

In [None]:
rezTestAdversarial = mousewise.plot_violin_test_adversarial(pwdH5data, dictAdversarial, dfSummaryDataSizes, thrBig=0.2)

In [None]:
from mesostat.visualization.mpl_matrix import plot_df_2D_outer_product

In [None]:
def inf_update(df, key, val):
    df.loc[df[key] == np.inf, key] = val
    df.loc[df[key] == -np.inf, key] = val
    return df

rezTestShuffle = inf_update(rezTestShuffle, '-log10(pval)', 1000)
rezTestAdversarial = inf_update(rezTestAdversarial, '-log10(pval)', 1000)

# Plot significance
print('Shuffle-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestShuffle, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             '-log10(pval)', vmin=0, vmax=20, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pid_aud_neglogpval_shuffle_outer2d.svg')
plt.show()


# Plot significance
print('Adv-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestAdversarial, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             '-log10(pval)', vmin=0, vmax=20, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pid_aud_neglogpval_adversarial_outer2d.svg')
plt.show()

In [None]:
print('Shuffle-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestShuffle, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             'fracSign', vmin=0, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pid_aud_significant_shuffle_outer2d.svg')
plt.show()


print('Adv-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestAdversarial, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             'fracSign', vmin=0, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pid_aud_significant_adversarial_outer2d.svg')
plt.show()


print('Shuffle-Big')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestShuffle, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             'fracBig', vmin=0, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pid_aud_big_outer2d.svg')
plt.show()

In [None]:
mousewise.plot_cdf(pwdH5data, dfSummary, printSummary=True)

In [None]:
pid_joint.barplot_avg(dataDB, pwdAllH5_2, dfSummary, 'phase', intervNames)

In [None]:
pid_joint.barplot_avg(dataDB, pwdAllH5_2, dfSummary, 'trialType', trialTypes)

In [None]:
pid_joint.plot_top_singlets(dataDB, pwdAllH5_2, dfSummary)

In [None]:
pid_joint.plot_singlets_brainplot(dataDB, pwdAllH5_2, dfSummary, 'phase', intervNames)

In [None]:
pid_joint.plot_singlets_brainplot(dataDB, pwdAllH5_2, dfSummary, 'trialType',
                                  trialTypes)

In [None]:
pid_joint.plot_singlets_brainplot_mousephase_subpre(dataDB, pwdAllH5_2, dfSummary)

In [None]:
pid_joint.plot_singlets_brainplot_mousephase_submouse(dataDB, pwdAllH5_2, dfSummary)

In [None]:
pid_joint.plot_triplets(pwdAllH5_2, dfSummary, nTop=20)#, dropChannels=['BC'])

In [None]:
pid_joint.plot_2D_avg(dataDB, pwdAllH5_2, dfSummary, 'phase', intervNames,
                      dropChannels=[16, 26], avgAxis=2)

In [None]:
pid_joint.plot_2D_avg(dataDB, pwdAllH5_2, dfSummary, 'trialType', trialTypes,
                      dropChannels=[16, 26], avgAxis=2)

In [None]:
for trgChName in ['SSp-bfd', 'VISrl', 'AUDp']:
    pid_joint.plot_2D_target(dataDB, pwdAllH5_2, dfSummary, trgChName,
                         'phase', intervNames, dropChannels=[16, 26])
    pid_joint.plot_2D_target(dataDB, pwdAllH5_2, dfSummary, trgChName,
                         'trialType', trialTypes, dropChannels=[16, 26])

In [None]:
for trgChName in ['SSp-bfd', 'VISrl', 'AUDp']:
    pid_joint.plot_2D_target_mousephase_subpre(dataDB, pwdAllH5_2, dfSummary,
                                              trgChName, dropChannels=[16, 26])

In [None]:
for trgChName in ['SSp-bfd', 'VISrl', 'AUDp']:
    pid_joint.plot_2D_target_mousephase_submouse(dataDB, pwdAllH5_2, dfSummary,
                                                trgChName, dropChannels=[16, 26])

In [None]:
pid_joint.plot_unique_top_pairs(dataDB, pwdAllH5_2, dfSummary, nTop=20, dropChannels=None)

# Consistency

In [None]:
pid_joint.plot_consistency_bymouse(pwdAllH5_2, dfSummary, dropChannels=None, kind='fisher', limits=[0, 1])

In [None]:
pid_joint.plot_consistency_byphase(pwdAllH5_2, dfSummary, dropChannels=None,
                                   kind='fisher', limits=[0, 1], datatype='bn_trial')
pid_joint.plot_consistency_byphase(pwdAllH5_2, dfSummary, dropChannels=None, 
                                   kind='fisher', limits=[0, 1], datatype='bn_session')

In [None]:
pid_joint.plot_consistency_bytrialtype(pwdAllH5_2, dfSummary, dropChannels=None, datatype='bn_trial',
                                       trialTypes=['Hit', 'CR'], kind='fisher', fisherThr=0.1, limits=[0, 1])
pid_joint.plot_consistency_bytrialtype(pwdAllH5_2, dfSummary, dropChannels=None, datatype='bn_session',
                                       trialTypes=['Hit', 'CR'], kind='fisher', fisherThr=0.1, limits=[0, 1])

In [None]:
dfSummary