### Evaluation of the robustness

In [None]:
### imports
import pandas as pd
import os
import seaborn as sns
import warnings
import math
import numpy as np
from sklearn import metrics
from matplotlib.text import TextPath
from matplotlib.font_manager import FontProperties


In [None]:
### parameters
alpha = 0.05

In [None]:
### preparations
df = pd.read_csv("./robustness.csv")

### optional penalize all entries where a method has a -1.0 in pvalue, i.e., it failed computing, setting
### pvalue as it did not match the appropriate decision e.g., see below
# CI test decisions, i.e., 
# reject H_0 for pvalue <= alpha, i.e., we assume H_1 is true -> edge (1) 
# cannot reject for pvalue > alpha i.e., we assume H_0 is true -> no edge (0)
df['pvalue'] = np.where(((df["pvalue"] == -1.0) & (df['hasedge'] == False)), 0.0, df['pvalue'])
df['pvalue'] = np.where(((df["pvalue"] == -1.0) & (df['hasedge'] == True)), 1.0, df['pvalue'])


df.loc[df["hasedge"] == False, "H_0"] = 0    #H_0 true is 0 (independent, no edge)
df.loc[df["hasedge"] == True, "H_0"] = 1    #H_0 false is 1 (dependent, edge)

# CI test decisions, i.e., 
# reject H_0 for pvalue <= alpha, i.e., we assume H_1 is true -> edge (1) 
# cannot reject for pvalue > alpha i.e., we assume H_0 is true -> no edge (0)
df["CItest"] = df["pvalue"].le(alpha).astype(int)


In [None]:
### further preparations and grouping
## compute error rate type 1 & type 2 over CGMs grouped by samples, sepsetsize and discretenoderatio
def computeTypeI(y, pred):
    d = {}
    tn, fp, fn, tp = metrics.confusion_matrix(y, pred, labels=[0, 1]).ravel()
    d['error rate'] = (fp / (fp + tn))
    d['error type'] = "type I error"
    return pd.Series(d, index=['error type', 'error rate'])
def computeTypeII(y, pred):
    d = {}
    tn, fp, fn, tp = metrics.confusion_matrix(y, pred, labels=[0, 1]).ravel()
    d['error type'] = "type II error"
    d['error rate'] = (fn / (tp + fn))
    return pd.Series(d, index=['error type', 'error rate'])

### we do not group over the cgmid in order to get the error rates for a select setting as a combination of:
### (discretenoderatio, sepsetsize, sample) over 100 Experiments (i.e., CGMs)
#groupedI = df_1.groupby(['cgmid', 'hasedge', 'samples', 'sepsetsize', 'discretenoderatio']).apply(lambda x: computeTypeI(x[["H_0"]], x[["CItest"]])).reset_index()
#groupedII = df_1.groupby(['cgmid', 'hasedge', 'samples', 'sepsetsize', 'discretenoderatio']).apply(lambda x: computeTypeII(x[["H_0"]], x[["CItest"]])).reset_index()
groupedI = df.groupby(['samples', 'sepsetsize', 'discretenoderatio']).apply(lambda x: computeTypeI(x[["H_0"]], x[["CItest"]])).reset_index()
groupedII = df.groupby(['samples', 'sepsetsize', 'discretenoderatio']).apply(lambda x: computeTypeII(x[["H_0"]], x[["CItest"]])).reset_index()
grouped = pd.concat([groupedI, groupedII],ignore_index=True)

## renaming columns
grouped['$d_Z$'] = grouped['sepsetsize']
grouped['dnr'] = grouped['discretenoderatio']
grouped['sample sizes $n$'] = grouped['samples']



### Figure 1 in paper

In [None]:
plt.rcParams.update({'font.size': 14})

fp = FontProperties( weight="bold")#, size =  14)

fig, axs = plt.subplots(ncols=5, sharey=True, figsize=(16, 3.6))

subgroup = grouped[grouped.dnr == 0.0]
line = subgroup[subgroup['$d_Z$'] == 1]
label = TextPath((0,0), str(1), prop = fp)
f1 = sns.lineplot(x="sample sizes $n$", y="error rate", hue="error type", palette="colorblind", data=line, marker=label, markersize=19, ax=axs[0])


for dz in [3,5,7]:
    line = subgroup[subgroup['$d_Z$'] == dz]
    label = TextPath((0,0), str(dz), prop = fp)
    f1 = sns.lineplot(x="sample sizes $n$", y="error rate", hue="error type", palette="colorblind", data=line, marker=label, markersize=19, ax=axs[0], legend=False)
    f1.set_ylabel("error rate")
    f1.set_xlabel("sample sizes $n$")
    f1.spines["top"].set_visible(False)
    f1.spines["right"].set_visible(False)
    f1.set_title('dnr=0.0',{'fontsize': 12})
    f1.legend([],[], frameon=False)

for i,dnr in enumerate([0.25, 0.5, 0.75, 1.0]):
    subgroup = grouped[grouped.dnr == dnr]
    for dz in [1,3,5,7]:
        line = subgroup[subgroup['$d_Z$'] == dz]
        label = TextPath((0,0), str(dz), prop = fp)
        f2 = sns.lineplot(x="sample sizes $n$", y="error rate", hue="error type", palette="colorblind", data=line, marker=label, markersize=19, ax=axs[i+1], legend=False)
        f2.set_ylabel("error rate")
        f2.set_xlabel("sample sizes $n$")
        f2.spines["top"].set_visible(False)
        f2.spines["right"].set_visible(False)
        f2.set_title('dnr='+str(dnr),{'fontsize': 12})
        f2.legend([],[], frameon=False)


f1.legend(loc='upper center', bbox_to_anchor=(2.5, 1.25), frameon=False, ncol=2)
fig.savefig('./case_1_combined.pdf',bbox_inches="tight")


### Figure A.1. in appendix

In [None]:
## create plots
g = sns.FacetGrid(grouped, col="$d_Z$", row="dnr",hue="error type", palette="colorblind")
g.set(ylim=(0, 1))
g.map_dataframe(sns.lineplot, x="sample sizes $n$", y="error rate")
g.add_legend(title="",loc='upper center')
g.fig.subplots_adjust(top=0.9) 
g.fig.savefig('./Robusteness_expanded.pdf',bbox_inches="tight")
