In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats, linalg
import matplotlib.pyplot as plt
from idtxl.bivariate_pid import BivariatePID
from idtxl.data import Data

from mesostat.metric.dim3d.partialcorr import partial_corr

# Append base directory
import os,sys
rootname = "conservative-tripartite-testing"
thispath = os.getcwd()
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

import src.null_models_3D as null3D
import src.null_test as nulltest

%load_ext autoreload
%autoreload 2

In [None]:
prefixPath = 'figs/cont_pcorr/'

## PCorr Funictions

In [None]:
decompLabels = ['unq_s1', 'unq_s2']

def pcorr(x, y, z):
    return {
        'unq_s1': partial_corr(x, z, np.array([y])),
        'unq_s2': partial_corr(y, z, np.array([x]))
    }

In [None]:
contFuncDict = null3D.cont_method_dict()

### Testing binning-dependence

In [None]:
# valThrDict = None
valThrDict = {'unq_s1': None, 'unq_s2': None}

In [None]:
nData = 10000

taskDict = {
    'yolo': np.array([0,0,0]),
    'norand': np.array([0,0,0.5]),
    'randx': np.array([0.5,0,0.5]),
    'rand': np.array([0.5,0.5,0.5])
}

for taskName, params in taskDict.items():
    print(taskName)
    rezDict = {}

    # Do continuous tests
    for funcName, func in contFuncDict.items():
        print('-', funcName)
        
        f_data   = lambda: func(nData, *params)
        f_metric = lambda x, y, z: pcorr(x,y,z)

        rezDF   = nulltest.run_tests(f_data, f_metric, decompLabels, nTest=100)
        rezDFsh = nulltest.run_tests(f_data, f_metric, decompLabels, nTest=100, haveShuffle=True)

        nulltest.plot_test_summary(rezDF, rezDFsh, suptitle=funcName, haveEff=False, valThrDict=valThrDict)
        suffix = '' if valThrDict is None else '_withThr'
        plt.savefig(prefixPath + funcName + '_cont_pcorr_summary_'+taskName+suffix+'.svg')
        plt.show()

### Effect of variance

Continuous

In [None]:
f_metric_cont = lambda x, y, z: pcorr(x,y,z)

In [None]:
# Do continuous tests
nData = 10000

alphaStratDict = {
    'PureSrc': lambda alpha: [0,0,alpha],
    'ImpureX': lambda alpha: [alpha,0,alpha],
    'Impure' : lambda alpha: [alpha,alpha,alpha],
}

thrMetricDictDict = {
    'H0_orig' : None,
    'H0_adj' : {'unq_s1': 0.518, 'unq_s2': 0.518}
}


for fName, f_data in contFuncDict.items():
    for alphaStratName, alphaFunc in alphaStratDict.items():
        # Plot constant thresholds for PureSrc
        avgRand = alphaStratName == 'PureSrc'
        
        f_data_eff = lambda alpha: f_data(nData, *alphaFunc(alpha))
        
        for h0type, thrMetricDict in thrMetricDictDict.items():
            print(fName, alphaStratName, h0type)

            nulltest.run_plot_param_effect(f_data_eff, f_metric_cont, decompLabels, fontsize=12,
                                           nStep=1001, nSkipTest=100, nTest=200, alphaRange=(0, 1),
                                           avgRand=avgRand, thrMetricDict=thrMetricDict, plotAlphaSq=False)

            suffix = 'n_' + str(nData) + '_' + alphaStratName + '_' + h0type

            plt.savefig(prefixPath + fName + '_cont_pcorr_scatter_vareff_'+suffix+'.svg')
            plt.show()

In [None]:
nData=10000
for fName, f_data in contFuncDict.items():
    print(fName)
    
    f_data_eff = lambda alpha: f_data(nData, alpha, alpha, alpha)
    nulltest.run_plot_param_effect_test(f_data_eff, f_metric_cont, decompLabels,
                                        nStep=10, nTest=400, alphaRange=(0, 2), valThrDict=valThrDict)
    
    suffix = '' if valThrDict is None else '_withThr'
    plt.savefig(prefixPath + fName + '_cont_pcorr_vareff_n'+str(nData)+suffix+'.png', dpi=200)
    plt.show()

### Effect of data size

In [None]:
alpha=0.25

# thrLst = [0.6719351345001467,
#  0.6146967156250432,
#  0.5893340729485583,
#  0.5784473458733254,
#  0.5641776108079606,
#  0.5480398857334239,
#  0.536161629340794,
#  0.5292341868435549,
#  0.5214093689544852,
#  0.5155426964271462]

# thrDict = dict(zip((10**np.linspace(2, 4, 10)).astype(int), thrLst))

# NOTE: Use MRed to validate PCorr - based threshold analysis
nDataLst, thrRandLst, thrAdjLst = np.loadtxt(prefixPath + 'cont_pcorr_mred_1Dscan_unq_summary.csv', dtype=float)
thrDict = dict(zip(nDataLst, thrAdjLst))


alphaStratDict = {
    'PureSrc': [0,0,alpha],
    'ImpureX': [alpha,0,alpha],
    'Impure' : [alpha,alpha,alpha],
}

thrMetricDictDict = {
    'H0_orig' : None,
    'H0_adj' : {'unq_s1': thrDict, 'unq_s2': thrDict}
}


for fName, f_data in contFuncDict.items():
    for alphaStratName, alphaFunc in alphaStratDict.items():
        f_data_eff = lambda n: f_data(n, *alphaFunc)

        for h0type, thrMetricDict in thrMetricDictDict.items():
            print(fName, alphaStratName, h0type)

            nulltest.run_plot_data_effect(f_data_eff, f_metric_cont, decompLabels,
                                          nStep=101, nSkipTest=10, nTest=200, pVal=0.01,
                                          thrMetricDict=thrMetricDict, fontsize=12)

            suffix = 'alpha_' + str(alpha) + '_' + alphaStratName + '_' + h0type

            plt.savefig(prefixPath + fName + '_cont_pcorr_scatter_nEff_'+suffix+'.svg')
            plt.show()

In [None]:
alpha=0.5
for fName, f_data in contFuncDict.items():
    print(fName)

    f_data_eff = lambda n: f_data(n=n, aX=alpha, aY=alpha, aZ=alpha)
    nulltest.run_plot_data_effect_test(f_data_eff, f_metric_cont, decompLabels,
                                       nStep=10, nTest=400, valThrDict=valThrDict)
    
    suffix = '' if valThrDict is None else '_withThr'
    plt.savefig(prefixPath + fName + '_cont_pcorr_nEff_sig'+str(sig)+suffix+'.png', dpi=200)
    plt.show()

# Computing conservative thresholds

In [None]:
from mesostat.visualization.mpl_colors import base_colors_rgb

In [None]:
tableauColors = base_colors_rgb(key='tableau')

In [None]:
colorDict = {
    'unq_s1' : tableauColors[0],
    'unq_s2' : tableauColors[1]
}

### Redundant Model

Find noise fraction combination that causes highest FP unique atoms
* NOTE: Scan only over $p_x = p_y$. In case of mismatched noise fractions FP unique arises naturally, but this is a problem in experimental design rather than testing so it is not included in this testing procedure.
* NOTE 2: Actual assumption in this case is that noise is equal in all tested channels. Hence it is ok to simply test $p_x = p_y = p_z$ only

In [None]:
f_data_2D = lambda nData, a, b: null3D.cont_red_noisy(nData, a, a, b)

paramArr3D, dataArr3D = nulltest.run_scan_bare(f_data_2D, f_metric_cont, 2, decompLabels, varLimits=(0, 1),
                                               nData=5000, nStep=30, nTest=20)

In [None]:
nulltest.print_scan_max(paramArr3D, dataArr3D, 'unq_s1', decompLabels, nMax=10)

In [None]:
nulltest.plot_scan_2D(dataArr3D, 'unq_s1', decompLabels, 30, (0,1), fontsize=16)

#### Intermediate Conclusion: Highest FP generated at low (non-zero) source noise, but zero target noise.

In [None]:
nSample = 1000

f_data_1D = lambda nData, a: null3D.cont_red_noisy(nData, a, a, a)
nDataLst = (10**np.linspace(2, 4, 10)).astype(int)
thrAdjLst = []
thrRandLst = []

for nData in nDataLst:
    paramArr1D, dataArr1D = nulltest.run_scan_bare(f_data_1D, f_metric_cont, 1, decompLabels,
                                                   varLimits=(0, 1), nData=nData, nStep=100, nTest=20)
    
    thrAdj = nulltest.resample_get_thr(f_data_1D, f_metric_cont, 'unq_s1', decompLabels, paramArr1D, dataArr1D,
                                       nData=nData, nTestResample=nSample, pVal=0.01, haveShuffle=False)
    
    thrShuffle = nulltest.resample_get_thr(f_data_1D, f_metric_cont, 'unq_s1', decompLabels, paramArr1D, dataArr1D,
                                           nData=nData, nTestResample=nSample, pVal=0.01, haveShuffle=True)
    
    print(nData, thrAdj, thrShuffle)
    
    savename = prefixPath + 'cont_pcorr_mred_1Dscan_unq_n_'+str(nData)+'.svg'
    nulltest.plot_scan_1D(paramArr1D, dataArr1D, ['unq_s1'], 'unq_s1', decompLabels,
                          maxThr=thrAdj, colorDict=colorDict, savename=savename,
                          fontsize=16, xlabel='Noise Fraction', ylabel='Partial Correlation')
    plt.close()
    
    thrAdjLst += [thrAdj]
    thrRandLst += [thrShuffle]
    
filePrefix = prefixPath + 'cont_pcorr_mred_1Dscan_unq_summary'
    
plt.figure()
plt.plot(nDataLst, thrAdjLst, label='adjusted', color='purple')
plt.plot(nDataLst, thrRandLst, label='shuffle')
plt.legend()
plt.ylim([0, None])
plt.savefig(filePrefix + '.svg')
plt.show()

# Save results to file
rezArr = np.array([nDataLst, thrRandLst, thrAdjLst])
np.savetxt(filePrefix + '.csv', rezArr)

### Synergistic Model

In [None]:
f_data_3D = lambda nData, a, b, c: null3D.cont_xor_noisy(nData, a, b, c)

paramArr3D, dataArr3D = nulltest.run_scan_bare(f_data_3D, f_metric_cont, 3, decompLabels, varLimits=(0, 1),
                                               nData=5000, nStep=10, nTest=20)

In [None]:
nulltest.print_scan_max(paramArr3D, dataArr3D, 'unq_s1', decompLabels, nMax=10)

In [None]:
nulltest.plot_scan_3D_2D_bytrg(paramArr3D, dataArr3D, 'unq_s1', decompLabels, 5, 10, (0, 1),
                               fontsize=16, haveColorbar=True)

#### Intermediate Conclusion: Synergy behaviour completely erratic, likely non-significant

In [None]:
nSample = 1000

f_data_1D = lambda nData, a: null3D.cont_xor_noisy(nData, a, a, a)
nDataLst = (10**np.linspace(2, 4, 10)).astype(int)
thrAdjLst = []
thrRandLst = []

for nData in nDataLst:
    paramArr1D, dataArr1D = nulltest.run_scan_bare(f_data_1D, f_metric_cont, 1, decompLabels,
                                                   varLimits=(0, 1), nData=nData, nStep=100, nTest=20)
    
    thrAdj = nulltest.resample_get_thr(f_data_1D, f_metric_cont, 'unq_s1', decompLabels, paramArr1D, dataArr1D,
                                       nData=nData, nTestResample=nSample, pVal=0.01, haveShuffle=False)
    
    thrShuffle = nulltest.resample_get_thr(f_data_1D, f_metric_cont, 'unq_s1', decompLabels, paramArr1D, dataArr1D,
                                           nData=nData, nTestResample=nSample, pVal=0.01, haveShuffle=True)
    
    print(nData, thrAdj, thrShuffle)
    
    savename = prefixPath + 'cont_pcorr_msyn_1Dscan_unq_n_'+str(nData)+'.svg'
    nulltest.plot_scan_1D(paramArr1D, dataArr1D, ['unq_s1'], 'unq_s1', decompLabels,
                          maxThr=thrAdj, colorDict=colorDict, savename=savename,
                          fontsize=16, xlabel='Noise Fraction', ylabel='Partial Correlation')
    plt.close()
    
    thrAdjLst += [thrAdj]
    thrRandLst += [thrShuffle]
    
filePrefix = prefixPath + 'cont_pcorr_msyn_1Dscan_unq_summary'
    
plt.figure()
plt.plot(nDataLst, thrAdjLst, label='adjusted', color='purple')
plt.plot(nDataLst, thrRandLst, label='shuffle')
plt.legend()
plt.ylim([0, None])
plt.savefig(filePrefix + '.svg')
plt.show()

# Save results to file
rezArr = np.array([nDataLst, thrRandLst, thrAdjLst])
np.savetxt(filePrefix + '.csv', rezArr)