In [1]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
# thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
thispath = os.getcwd()
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.metric.metric import MetricCalculator
from mesostat.utils.qt_helper import gui_fname, gui_fnames, gui_fpath
from mesostat.utils.hdf5_io import DataStorage
from mesostat.utils.pandas_helper import pd_query

from lib.gallerosalas.data_fc_db_aud_raw import DataFCDatabase
from lib.common.visualization import merge_image_sequence_movie
import lib.analysis.triplet_analysis.mousewise as mousewise
import lib.analysis.triplet_analysis.calc_reader_mousewise as calc_reader

%load_ext autoreload
%autoreload 2

Appended root directory /home/alyosha/work/git/pub-2020-exploratory-analysis


In [2]:
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
params['root_path_data'] = '/media/alyosha/Data/TE_data/yasirdata_aud_raw/'
# params['root_path_data'] = '/home/alyosha/data/yasirdata_aud_raw/'
# params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yasir/yasirdata_aud_raw'

In [3]:
dataDB = DataFCDatabase(params)

Reading channel label file
Reading channel area file
Reading allen brain map
Reading session structure
Searching for data files
Found mice ['mou_5', 'mou_6', 'mou_7', 'mou_9']


In [4]:
dataDB.calc_shortest_distances()

In [5]:
print('mice', dataDB.mice)
print('nSessions', len(dataDB.sessions))
print('datatypes', dataDB.get_data_types())
print('nChannel', dataDB.get_nchannels('mou_5'))

mice {'mou_9', 'mou_6', 'mou_7', 'mou_5'}
nSessions 4
datatypes ['raw', 'bn_session', 'bn_trial']
nChannel 27


## All - Distribution - Nosession

In [None]:
prefixPath = '/media/alyosha/Data/TE_data/calculations/pr2/yasir-aud/'

pwdH5data = prefixPath + 'pr2_gallerosalas_multimouse_data.h5'
pwdH5rand = prefixPath + 'pr2_gallerosalas_multimouse_rand.h5'
# pwdH5data = prefixPath + 'pid_gallerosalas_multimouse_nbin_2_data.h5'
# pwdH5rand = prefixPath + 'pid_gallerosalas_multimouse_nbin_2_rand.h5'

In [None]:
intervNames = dataDB.get_interval_names()
#trialTypes = ['Hit', 'CR', 'Miss', 'FA']
trialTypes = ['Hit', 'CR']

In [None]:
dfSummary = calc_reader.summary_df(pwdH5data)
dfSummaryDataSizes = calc_reader.summary_update_data_sizes(dfSummary, dataDB)

In [None]:
dfSummaryRand = calc_reader.summary_df(pwdH5rand)

### 1. Global Significance Testing

1. For all sweep get data size
2. Inside test, generate AdversaryDist for each data size, save as file
3. Load file as dict, provide as argument to test
4. In test, produce dataframe: Sweep -> (frac>Shuffle, pValSuffle, frac>Conserv, pValConserv)
5. Plot table

In [None]:
pwdAdversarial = '/media/alyosha/Data/TE_data/pr2_rand_dist.h5'

In [None]:
dictAdversarial = calc_reader.read_adversarial_distr_file(pwdAdversarial)

In [None]:
rezTestShuffle = mousewise.plot_violin_test(pwdH5data, pwdH5rand, dfSummary, dfSummaryRand, thrBig=0.01)

In [None]:
rezTestAdversarial = mousewise.plot_violin_test_adversarial(pwdH5data, dictAdversarial, dfSummaryDataSizes, thrBig=0.01)

In [None]:
from mesostat.visualization.mpl_matrix import plot_df_2D_outer_product

In [None]:
def inf_update(df, key, val):
    df.loc[df[key] == np.inf, key] = val
    df.loc[df[key] == -np.inf, key] = val
    return df

rezTestShuffle = inf_update(rezTestShuffle, '-log10(pval)', 1000)
rezTestAdversarial = inf_update(rezTestAdversarial, '-log10(pval)', 1000)

# Plot significance
print('Shuffle-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestShuffle, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             '-log10(pval)', vmin=0, vmax=20, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pr2_aud_neglogpval_shuffle_outer2d.svg')
plt.show()


# Plot significance
print('Adv-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique']):
    dfQuery = pd_query(rezTestAdversarial, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             '-log10(pval)', vmin=0, vmax=20, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pr2_aud_neglogpval_adversarial_outer2d.svg')
plt.show()

In [None]:
print('Shuffle-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestShuffle, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             'fracSign', vmin=0, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pr2_aud_significant_shuffle_outer2d.svg')
plt.show()


print('Adv-Sign')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique']):
    dfQuery = pd_query(rezTestAdversarial, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             'fracSign', vmin=0, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pr2_aud_significant_adversarial_outer2d.svg')
plt.show()


print('Shuffle-Big')
fig, ax = plt.subplots(ncols=3, figsize=(12,4), tight_layout=True)
for iPid, pidType in enumerate(['red', 'unique', 'syn']):
    dfQuery = pd_query(rezTestShuffle, {'atom': pidType})
    plot_df_2D_outer_product(ax[iPid], dfQuery, ['datatype', 'mousename'], ['trialType', 'intervName'],
                             'fracBig', vmin=0, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']})
    
plt.savefig('pr2_aud_big_outer2d.svg')
plt.show()

In [None]:
dfTop1Dmean = mousewise.plot_top_singlets_bymouse_outer2D(dataDB, pwdH5data, dfSummary, 'syn',
                                                          func=np.nanmean, dropna=True, magThr=0.02)

In [None]:
dfTop1Dmax = mousewise.plot_top_singlets_bymouse_outer2D(dataDB, pwdH5data, dfSummary, 'syn',
                                                          func=np.nanmax, dropna=True, magThr=0.02)

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(20, 10), tight_layout=True)

for i, datatype in enumerate(['bn_trial', 'bn_session']):
    dfQuery = pd_query(dfTop1Dmean, {'datatype': datatype})
    dfQuery = dfQuery[dfQuery['trialType'] != 'None']
    ax[i].set_title(datatype)
    plot_df_2D_outer_product(ax[i], dfQuery, ['label'], ['datatype', 'mousename', 'intervName', 'trialType'],
                             'syn', vmin=0, orderDict={'intervName': ['PRE', 'TEX', 'DEL', 'REW']}, 
                             dropEmpty=True)
    
#plt.savefig('pid_tex_big_outer2d.svg')
plt.show()

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(20, 10), tight_layout=True)

for i, datatype in enumerate(['bn_trial', 'bn_session']):
    dfQuery = pd_query(dfTop1Dmax, {'datatype': datatype})
    dfQuery = dfQuery[dfQuery['trialType'] != 'None']
    ax[i].set_title(datatype)
    plot_df_2D_outer_product(ax[i], dfQuery, ['label'], ['datatype', 'mousename', 'intervName', 'trialType'],
                             'syn', vmin=0, orderDict={'intervName': ['PRE', 'TEX', 'DEL', 'REW']}, 
                             dropEmpty=True)
    
# plt.savefig('pid_tex_big_outer2d.svg')
plt.show()

In [None]:
dfSelectedTriplets = mousewise.plot_filter_top_triplets_bymouse(dataDB, pwdH5data, dfSummary,
                                                                nTop=30, thrBig=0.01, nConsistent=4)

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(20, 10), tight_layout=True)

for i, datatype in enumerate(['bn_trial', 'bn_session']):
    ax[i].set_title(datatype)
    plot_df_2D_outer_product(ax[i], dfSelectedTriplets[datatype], ['label'],
                             ['mousename', 'intervName', 'trialType'],
                             'syn', vmin=0, vmax=0.1, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']}, 
                             dropEmpty=True)
    
plt.savefig('pr2_aud_big_triplets_outer2d.svg')
plt.show()

In [None]:
def plot_triplet_clusters(fig, ax, dataDB, labelsS1, labelsS2, labelsTrg):
    labelsCanon = list(dataDB.map_channel_labels_canon().values())
    
    clusterDict = {
        'source1': [labelsCanon.index(l) for l in labelsS1],
        'source2': [labelsCanon.index(l) for l in labelsS2],
        'target': [labelsCanon.index(l) for l in labelsTrg],
    }
    
    dataDB.plot_area_clusters(fig, ax, clusterDict, haveLegend=True)

In [None]:
clusterDictDict = {
    'bn_trial': {
        'rez1':  [['MOp', 'MOs', 'SSp-ll'], ['RSPd', 'VISpm'], ['TEa', 'AUDp', 'AUDpo']],
        'rez2':  [['SSp-n'], ['SSp-m'], ['VISa', 'SSp-tr']],
        'rez3':  [['VISpl', 'VISpor'], ['SSp-tr'], ['SSp-m']],
    },
    'bn_session': {
        'rez1':  [['SSp-ll', 'SSp-tr', 'SSp-ul'], ['VISam'], ['VISli']],
        'rez2':  [['SSp-m', 'SSp-n'], ['VISpl'], ['AUDp', 'SSp-ll', 'SSp-tr', 'VISa', 'VISam']],
        'rez3':  [['SSs'], ['MOs', 'VISpl'], ['VISam', 'SSp-tr']],
        'rez4':  [['TEa', 'VISli', 'VISpor'], ['VISl', 'VISpl'], ['SSp-ll']],
        'rez5':  [['VISa'], ['SSp-ll', 'SSp-tr'], ['SSp-m', 'SSp-n']],
        'rez6':  [['VISpl'], ['MOs'], ['SSp-tr', 'VISa',  'VISam']]
    }
}

In [None]:
labelsCanon = list(dataDB.map_channel_labels_canon().values())

_triplet_to_string = lambda s1,s2,t: str(tuple([s1,s2,t])).replace("'", "")

for datatype, clusterDict in clusterDictDict.items():
    for tripletKey, (s1Lst, s2Lst, trgLst) in clusterDict.items():
        print(datatype, s1Lst, s2Lst, trgLst)

        dfTot = pd.DataFrame()
        groupLst = sorted(list(set(dfSummary.columns) - {'key', 'label', 'syn'}))
        for key, dataCond in dfSelectedTriplets[datatype].groupby(groupLst):
            dfSub = pd.DataFrame()
            for s1 in s1Lst:
                for s2 in s2Lst:
                    for t in trgLst:
                        dfThis = pd_query(dataCond, {'label': _triplet_to_string(s1,s2,t)})
                        if len(dfThis) > 0:
                            dfSub = dfSub.append(dfThis)

    #         print('--', key, len(dfSub))

            assert len(dfSub) > 0



            rezTmp = np.mean(dfSub['syn'])
            dfRow = pd.DataFrame(dict(zip(groupLst, key)), index=[0])
            dfRow['syn'] = rezTmp
            dfTot = dfTot.append(dfRow)

        fig, ax = plt.subplots(ncols=2, figsize=(8,4))
        plot_triplet_clusters(fig, ax[0], dataDB, s1Lst, s2Lst, trgLst)
        plot_df_2D_outer_product(ax[1], dfTot, ['mousename'],
                             ['intervName', 'trialType'],
                             'syn', vmin=0, vmax=0.06, orderDict={'intervName': ['PRE', 'AUD', 'DEL', 'REW']}, 
                             dropEmpty=True)

        plt.savefig('large_triplet_brainplot_aud_'+datatype + '_' + tripletKey + '.svg')
        plt.show()