In [None]:
import numpy as np
import pandas as pd
from scipy import stats, linalg
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

# Append base directory
import os,sys #,inspect
rootname = "pub-2020-exploratory-analysis"
#thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
thispath = os.getcwd()
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)


from mesostat.metric.dim3d.r2 import pr2_quadratic_triplet_decomp_1D
from mesostat.visualization.mpl_colors import base_colors_rgb

import lib.nullmodels.null_models_3D as null3D
import lib.nullmodels.null_test as nulltest

%load_ext autoreload
%autoreload 2

In [None]:
contFuncDict = null3D.cont_method_dict()

## Noise-dependence

### Effect sizes for example variance

In [None]:
decompLabels = ['unq_s1', 'unq_s2', 'red', 'syn']
f_metric_cont = lambda x,y,z: dict(zip(decompLabels, pr2_quadratic_triplet_decomp_1D(x,y,z)))

In [None]:
valThrDict = None
# valThrDict = {'unq_s1': None, 'unq_s2': None, 'shd_s1_s2': None, 'syn_s1_s2': 0.28}

In [None]:
taskDict = {
    'yolo': np.array([0,0,0]),
    'norand': np.array([0,0,0.5]),
    'randx': np.array([0.5,0,0.5]),
    'rand': np.array([0.5,0.5,0.5])
}

for taskName, params in taskDict.items():
    print(taskName)
    rezDict = {}

    # Do continuous tests
    for funcName, func in contFuncDict.items():
        print('-', funcName)
        
        f_data   = lambda: func(10000, *params)

        rezDF   = nulltest.run_tests(f_data, f_metric_cont, decompLabels, nTest=100)
        rezDFsh = nulltest.run_tests(f_data, f_metric_cont, decompLabels, nTest=100, haveShuffle=True)

        nulltest.plot_test_summary(rezDF, rezDFsh, suptitle=funcName, haveEff=False, valThrDict=valThrDict)
        suffix = '' if valThrDict is None else '_withThr'
        plt.savefig(funcName + '_r2_summary_'+taskName+suffix+'.png', dpi=200)
        plt.show()

### Effect of variance

In [None]:
# Do continuous tests
nSample = 10000

alphaStratDict = {
    'ImpureX': lambda alpha: [alpha,0,alpha],
    'PureSrc': lambda alpha: [0,0,alpha],
    'Impure' : lambda alpha: [alpha,alpha,alpha],
}

thrMetricDictDict = {
    'H0_orig' : None,
    'H0_adj' : {'unq_s1': 0.195, 'unq_s2': 0.195, 'red': None, 'syn': None}
}


for fName, f_data in contFuncDict.items():
    for alphaStratName, alphaFunc in alphaStratDict.items():
        
        f_data_eff = lambda alpha: f_data(nSample, *alphaFunc(alpha))
        
        for h0type, thrMetricDict in thrMetricDictDict.items():
            print(fName, alphaStratName, h0type)

            nulltest.run_plot_param_effect(f_data_eff, f_metric_cont, decompLabels,
                                           nStep=1001, nSkipTest=100, nTest=200, alphaRange=(0, 1),
                                           thrMetricDict=thrMetricDict, plotAlphaSq=False, fontsize=12)

            suffix = 'n_' + str(nSample) + '_' + alphaStratName + '_' + h0type

            plt.savefig(fName + '_pr2_scatter_vareff_'+suffix+'.png', dpi=300)
            plt.show()

In [None]:
nSample=10000
for fName, f_data in contFuncDict.items():
    print(fName)
    
    f_data_eff = lambda alpha: f_data(n=nSample, aX=alpha, aY=alpha, aZ=alpha)
    nulltest.run_plot_param_effect_test(f_data_eff, f_metric_cont, decompLabels,
                                        nStep=10, nTest=400, alphaRange=(0, 1), valThrDict=valThrDict)
    
    suffix = '' if valThrDict is None else '_withThr'
    plt.savefig(fName + '_r2_vareff_n'+str(nSample)+suffix+'.png', dpi=200)
    plt.show()

### Effect of number of samples

In [None]:
alpha=0.5

alphaStratDict = {
    'PureSrc': [0,0,alpha],
    'ImpureX': [alpha,0,alpha],
    'Impure' : [alpha,alpha,alpha],
}

thrMetricDictDict = {
    'H0_orig' : None,
    'H0_adj'  : {'unq_s1': 0.195, 'unq_s2': 0.195, 'red': None, 'syn': None}
}


for fName, f_data in contFuncDict.items():
    for alphaStratName, alphaFunc in alphaStratDict.items():
        f_data_eff = lambda n: f_data(n, *alphaFunc)

        for h0type, thrMetricDict in thrMetricDictDict.items():
            print(fName, alphaStratName, h0type)

            nulltest.run_plot_data_effect(f_data_eff, f_metric_cont, decompLabels,
                                          nStep=101, nSkipTest=10, nTest=200, pVal=0.01,
                                          thrMetricDict=thrMetricDict, fontsize=12)

            suffix = 'sig_' + str(alpha) + '_' + alphaStratName + '_' + h0type

            plt.savefig(fName + '_pr2_scatter_nEff_'+suffix+'.png', dpi=300)
            plt.show()

In [None]:
alpha=0.5
for fName, f_data in contFuncDict.items():
    print(fName)

    f_data_eff = lambda n: f_data(n=n, aX=alpha, aY=alpha, aZ=alpha)
    nulltest.run_plot_data_effect_test(f_data_eff, f_metric_cont, decompLabels,
                                       nStep=10, nTest=400, valThrDict=valThrDict)
    
    suffix = '' if valThrDict is None else '_withThr'
    plt.savefig(fName + '_r2_nEff_sig'+str(sig)+suffix+'.png', dpi=200)
    plt.show()

### Test relationship of unique and redundancy for fixed data size

#### 2. Finding max synergy parameters - GridSearch1D

In [None]:
tableauColors = base_colors_rgb(key='tableau')

In [None]:
loopLst = [
    ['red', 'unq', 'red',    'unq_s1', lambda nSample, alpha: null3D.cont_red_noisy(nSample, alpha, alpha, alpha)],
    ['red', 'syn', 'red',    'syn',    lambda nSample, alpha: null3D.cont_red_noisy(nSample, alpha, alpha, 0)],
    ['unq', 'red', 'unq_s1', 'red',    lambda nSample, alpha: null3D.cont_unq_noisy(nSample, alpha, alpha, alpha)],
    ['unq', 'syn', 'unq_s1', 'syn',    lambda nSample, alpha: null3D.cont_unq_noisy(nSample, alpha, alpha, alpha)],
    ['syn', 'red', 'syn',    'red',    lambda nSample, alpha: null3D.cont_xor_noisy(nSample, alpha, alpha, alpha)],
    ['syn', 'unq', 'syn',    'unq_s1', lambda nSample, alpha: null3D.cont_xor_noisy(nSample, alpha, alpha, alpha)]
]

In [None]:
nSampleLst = 1000 * np.arange(1, 11)
for labelA, labelB, atomA, atomB, f_data_1D in loopLst:
    prefix = labelA+'Cont_r2_1Dscan_'+labelB

    alphaMaxLst = []
    thrLst = []

    for nSample in nSampleLst:
        print(nSample)
        alphaMax, thr = nulltest.run_plot_1D_scan(f_data_1D, f_metric_cont, atomA, atomB,
                                                  varLimits=(0, 1), nSample=nSample, nStep=100, nTest=100,
                                                  colorA = tableauColors[2], colorB = tableauColors[0])
        plt.savefig(prefix+'_n_'+str(nSample)+'.png', dpi=200)
        plt.show()

        alphaMaxLst += [alphaMax]
        thrLst += [thr]

    plt.figure()
    plt.plot(nSampleLst, alphaMaxLst, label='param')
    plt.plot(nSampleLst, thrLst, label='thr')
    plt.legend()
    plt.savefig(prefix + '_summary.png', dpi=200)
    plt.show()

#### 3. Determining Unique-Redundancy Relationship

In [None]:
discrDataMethodDict = {
    'Cont' : null3D.cont_method_dict(),
    'Discr' : null3D.discr_method_dict()
}

atomCombList = {
    ['red',    'unq_s1'],
    ['red',    'syn'],
    ['unq_s1', 'red'],
    ['unq_s1', 'syn'],
    ['syn',    'red'],
    ['syn',    'unq_s1']
}

In [None]:
for discrKey, dataMethodsDict in discrDataMethodDict.items():
    for fDataLabel, f_data_3D in dataMethodsDict.items():
        for atomA, atomB in atomCombList:
            nulltest.run_plot_scatter_explore(f_data_3D, f_metric_cont,
                                              atomA, atomB, 3,
                                              varLimits=(0, 1), nSample=1000, nTestDim=20)