### Evaluation of the calibrition

In [None]:
### imports
import os
import pandas as pd
import numpy as np
#from sklearn import metrics
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [None]:
### parameters 
# define alpha
alpha = 0.05
# kcmi kperm
sum_kcmi = [5,25,100,200]
sum_kperm = [5,25,100,200]
# auc function
def calc_auc(y,pred):
    fpr, tpr, thresholds = metrics.roc_curve(y, pred)
    return metrics.auc(fpr, tpr)

def calc_typeIrate(y,pred):
    tn, fp, fn, tp = metrics.confusion_matrix(y, pred, labels=[0, 1]).ravel()
    return (fp / (fp + tn))

def calc_typeIIrate(y,pred):
    tn, fp, fn, tp = metrics.confusion_matrix(y, pred, labels=[0, 1]).ravel()
    return (fn / (tp + fn))

In [None]:
### preparation steps
# load dataframe
df = pd.read_csv('./calibration.csv')
# add CItest and H_0
df["H_0"] = df["hasedge"].astype(int)
df["CItest"] = df["pvalue"].le(alpha).astype(int)

#### Table A.2

In [None]:
print("\\begin{table}[!htb]")
print("\\caption{ROC AUC for different combinations of $k_{CMI}$, $k_{perm}$, and samples $n$ with fixed $M_{perm}\\!=\\!1\\,000$ derived from CI decisions over multiple settings, e.g., sampled with a varying dimension of $Z$, $d_Z \in\{1,3,5,7\}$, continuous functions, or discrete node ratios (see Table~\\ref{tab:app:Calibration:Param}).}")
print("\\label{tab:supplement:CalibrationAUC}")

print("\\begin{center}")
print("\\resizebox{0.9\\linewidth}{!}{")
print('\\begin{tabular*}{1\\columnwidth}{@{\\extracolsep{\\fill} } c | c', ' '.join('c' for x in range(0,len(sum_kperm))),'}')
print("\\toprule")
print('\makecell[c]{samples $n$} & \\backslashbox{$k_{CMI}$}{$k_{perm}$} &',' & '.join(str(int(x)) for x in sorted(sum_kperm)), ' \\\ ')


for samples in sorted(df['samples'].unique()):
    # subselect and compute auc
    subset = df[(df.samples == samples)]
    subset = subset[["kperm", "kcmi", "hasedge", "pvalue", "H_0", "CItest"]]
    grouped = subset.groupby(["kcmi","kperm"]).apply(lambda x: calc_auc(x[["H_0"]], x[["CItest"]])).reset_index(name='AUC')
    


    print("\\midrule")
    line = "\\multirow{"+str(sum(sum_kcmi < samples))+"}{*}{"+str(samples) + "} & "

    ## determine highest value for textbf
    highest = 0
    for y in sorted(sum_kcmi):
        if y >= samples:
            continue
        for x in sorted(sum_kperm):
            if x >= samples:
                continue
            row = grouped[((grouped.kcmi == y) & (grouped.kperm == x))]
            highest = round(row.iloc[0]['AUC'],2) if (round(row.iloc[0]['AUC'],2) > highest) else highest    

    for y in sorted(sum_kcmi):
        if y >= samples:
            continue
        line += str(int(y))
        for x in sorted(sum_kperm):
            if x >= samples:
                line += " & - " 
            else:
                row = grouped[((grouped.kcmi == y) & (grouped.kperm == x))]
                line += " & " + str(round(row.iloc[0]['AUC'],2)) if (round(row.iloc[0]['AUC'],2) < highest) else " & \\textbf{" + str(round(row.iloc[0]['AUC'],2)) +"}"
        line += ' \\\ '
        print(line)
        line = " & "
      
        
print("\\bottomrule")
print("\\end{tabular*}")
print("}\\end{center}")
print("\\end{table}")


#### Table A.3

In [None]:
print("\\begin{table}[!htb]")
print("\\caption{Type I (top) and type II (bottom) error rates for different combinations of $k_{CMI}$, $k_{perm}$, and samples $n$ with fixed $M_{perm}\\!=\\!1\\,000$ derived from CI decisions over multiple settings, e.g., sampled with a varying dimension of $Z$, $d_Z \\in\\{1,3,5,7\\}$, continuous functions, or discrete node ratios (see Table~\\ref{tab:app:Calibration:Param}).}")
print("\\label{tab:supplement:CalibrationTypeI}")
print("\\begin{center}")

print('\\begin{tabular*}{1\\columnwidth}{@{\\extracolsep{\\fill} } c | c', ' '.join('c' for x in range(0,len(sum_kperm))),'}')
print("\\toprule")
print("\\multicolumn{6}{c}{Type I Error Rates} \\\ ")
print("\\midrule")
print('\makecell[c]{samples\\\$n$} & \\backslashbox{$k_{CMI}$}{$k_{perm}$} &',' & '.join(str(int(x)) for x in sorted(sum_kperm)), ' \\\ ')


for samples in sorted(df['samples'].unique()):
    # subselect and compute auc
    #subset = perm500[(perm500.samples == samples)]
    subset = df[(df.samples == samples)]
    subset = subset[["kperm", "kcmi", "hasedge", "pvalue", "H_0", "CItest"]]
    grouped = subset.groupby(["kcmi","kperm"]).apply(lambda x: calc_typeIrate(x[["H_0"]], x[["CItest"]])).reset_index(name='AUC')
    
    
    print("\\midrule")
    line = "\\multirow{"+str(sum(sum_kcmi < samples))+"}{*}{"+str(samples) + "} & "
    
    ## determine smallest value for textbf
    smallest = 1
    for y in sorted(sum_kcmi):
        if y >= samples:
            continue
        for x in sorted(sum_kperm):
            if x >= samples:
                continue
            else:
                row = grouped[((grouped.kcmi == y) & (grouped.kperm == x))]
                smallest = round(row.iloc[0]['AUC'],2) if (round(row.iloc[0]['AUC'],2) < smallest) else smallest    
    
    for y in sorted(sum_kcmi):
        if y >= samples:
            continue
        line += str(int(y))
        for x in sorted(sum_kperm):
            if x >= samples:
                line += " & - " 
            else:
                row = grouped[((grouped.kcmi == y) & (grouped.kperm == x))]
                line += " & " + str(round(row.iloc[0]['AUC'],2)) if (round(row.iloc[0]['AUC'],2) > smallest) else " & \\textbf{" + str(round(row.iloc[0]['AUC'],2)) +"}"
        line += ' \\\ '
        print(line)
        line = " & "

print("\\midrule")
print("\\multicolumn{6}{c}{Type II Error Rates} \\\ ")

for samples in sorted(df['samples'].unique()):
    # subselect and compute auc
    subset = df[(df.samples == samples)]
    subset = subset[["kperm", "kcmi", "hasedge", "pvalue", "H_0", "CItest"]]
    grouped = subset.groupby(["kcmi","kperm"]).apply(lambda x: calc_typeIIrate(x[["H_0"]], x[["CItest"]])).reset_index(name='AUC')

    print("\\midrule")
    line = "\\multirow{"+str(sum(sum_kcmi < samples))+"}{*}{"+str(samples) + "} & "
    
    ## determine smallest value for textbf
    smallest = 1
    for y in sorted(sum_kcmi):
        if y >= samples:
            continue
        for x in sorted(sum_kperm):
            if x >= samples:
                continue
            else:
                row = grouped[((grouped.kcmi == y) & (grouped.kperm == x))]
                smallest = round(row.iloc[0]['AUC'],2) if (round(row.iloc[0]['AUC'],2) < smallest) else smallest    
    
    for y in sorted(sum_kcmi):
        if y >= samples:
            continue
        line += str(int(y))
        for x in sorted(sum_kperm):
            if x >= samples:
                line += " & - " 
            else:
                row = grouped[((grouped.kcmi == y) & (grouped.kperm == x))]
                line += " & " + str(round(row.iloc[0]['AUC'],2)) if (round(row.iloc[0]['AUC'],2) > smallest) else " & \\textbf{" + str(round(row.iloc[0]['AUC'],2)) +"}"
        line += ' \\\ '
        print(line)
        line = " & "

print("\\bottomrule")
print("\\end{tabular*}")
print("\\end{center}")
print("\\end{table}")
