In [8]:
from src.plotsutils import *
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from src.utils import set_seed
import os


wdir = os.path.abspath(os.path.dirname(__name__))
tab_fold = '../tabs/'

seed = 42
set_seed(seed)
# file results
filename = "../results/all_results.csv"
if not os.path.exists(filename):
    raise FileNotFoundError("No results found. Run train.py and test.py first.")
else:
    df = pd.read_csv(filename)

Random seed 42 has been set.


In [9]:

df["pv_rob_formatted"] = "\\num{"+df["pv_rob"].apply(lambda x: "{:.2e}".format(x))+"}"
df["pv_rob_ATT_formatted"] = "\\num{"+df["pv_rob_ATT"].apply(lambda x: "{:.2e}".format(x))+"}"


In [10]:
metric_dict = {"coef_rob": "$\\hat{\\tau}_{\mathtt{RDD}}$", "system_acc": "System Accuracy",
               "system_acc_v2": "System Accuracy", "ATT": "$\\hat{\\tau}_{\mathtt{ATD}}$",
               "classifier_all_acc": "System Accuracy", "pv_rob": "Estimated $\\tau$",
               "acc_system": "System Accuracy"}


In [11]:
df.sort_values(by=["data", "method", "target_coverage"], inplace=True)

In [12]:
df

Unnamed: 0,data,method,target_coverage,ATT,ci_l_ATT,ci_u_ATT,pv_rob_ATT,acc_hum,acc_ML,acc_system,...,c,p,q,coef_rob,se_rob,pv_rob,ci_rob_l,ci_rob_u,pv_rob_formatted,pv_rob_ATT_formatted
76,cifar10h,ASM,0.0,0.028000,0.014541,0.041459,4.552340e-05,0.957500,0.000000,0.957500,...,,,,,,,,,\num{nan},\num{4.55e-05}
66,cifar10h,ASM,0.1,0.032676,0.017604,0.047748,2.145606e-05,0.953239,1.000000,0.958500,...,-1.000000,1.0,2.0,-0.842888,0.505405,0.095366,-1.833463,0.147688,\num{9.54e-02},\num{2.15e-05}
67,cifar10h,ASM,0.2,0.039052,0.022085,0.056020,6.449956e-06,0.948784,1.000000,0.960000,...,-1.000000,1.0,2.0,-0.022057,0.017692,0.212490,-0.056732,0.012618,\num{2.12e-01},\num{6.45e-06}
68,cifar10h,ASM,0.3,0.048035,0.029052,0.067018,7.062399e-07,0.945415,1.000000,0.962500,...,-0.999998,1.0,2.0,-0.043245,0.010062,0.000017,-0.062966,-0.023523,\num{1.73e-05},\num{7.06e-07}
69,cifar10h,ASM,0.4,0.061962,0.039981,0.083943,3.296803e-08,0.940620,1.000000,0.965500,...,-0.999935,1.0,2.0,-0.039047,0.012408,0.001651,-0.063367,-0.014727,\num{1.65e-03},\num{3.30e-08}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,xray-airspace,SP,0.6,0.130031,0.069609,0.190453,2.465568e-05,0.829721,0.937736,0.896835,...,0.203150,1.0,2.0,-0.171448,0.122025,0.160014,-0.410613,0.067717,\num{1.60e-01},\num{2.47e-05}
204,xray-airspace,SP,0.7,0.157676,0.088377,0.226976,8.214969e-06,0.834025,0.915033,0.892145,...,0.271377,1.0,2.0,-0.113580,0.196773,0.563794,-0.499248,0.272087,\num{5.64e-01},\num{8.21e-06}
205,xray-airspace,SP,0.8,0.217647,0.133259,0.302035,4.304583e-07,0.823529,0.907760,0.890973,...,0.332907,1.0,2.0,0.277976,0.186589,0.136283,-0.087732,0.643683,\num{1.36e-01},\num{4.30e-07}
206,xray-airspace,SP,0.9,0.253012,0.127244,0.378780,8.049209e-05,0.783133,0.881818,0.872216,...,0.418754,1.0,2.0,0.007736,0.133856,0.953914,-0.254617,0.270089,\num{9.54e-01},\num{8.05e-05}


In [15]:
dataset = "synth"
def write_latex_table(df, dataset):
    tmp = df[(df["data"] == dataset)].copy()
    methods = sorted(tmp["method"].unique())
    tab = pd.pivot_table(tmp, values=["coef_rob", "ATT", "pv_rob_formatted","pv_rob_ATT_formatted", "acc_system"], index="target_coverage",
                         aggfunc = {"coef_rob":"mean", "ATT": "mean", "pv_rob_formatted":"first", "pv_rob_ATT_formatted":"first", "acc_system":"mean"},columns="method").fillna(-100)
    tab.columns = ["_".join(col).strip() for col in tab.columns.values]
    tab.reset_index(inplace=True)
    tab1 = tab.copy()
    cols_ = ["{}".format(method) for method in methods]
    tab1["max_col"] = tab1[["ATT_{}".format(method) for method in methods]].max(axis=1)
    for method in methods:
        tab1["{}".format(method)] = tab1["ATT_{}".format(method)].apply(lambda x: "{:.3f}".format(x))
        tab1["{}".format(method)] = np.where(
            tab1["max_col"] == tab1["ATT_{}".format(method)],"$\mathbf{"+tab1["{}".format(method)]+"~("+tab1["pv_rob_ATT_formatted_{}".format(method)]+")}$",
            "$"+tab1["{}".format(method)]+"~("+tab1["pv_rob_ATT_formatted_{}".format(method)]+")$")
        tab1["{}".format(method)] = tab1["{}".format(method)].str.replace("0.", ".")
        tab1["{}".format(method)] = tab1["{}".format(method)].str.replace("e-0", "e-")
    tab1["metric"] = np.where(tab1["target_coverage"]==0, "\multirow{10}{*}{\\rotatebox[origin=c]{90}{$\htatd$}}",
                              "")
    tab1 = tab1[~tab1["target_coverage"].isin([1])]
    tab1 = tab1[["metric", "target_coverage"]+cols_]
    tab2 = tab.copy()
    tab2["max_col"] = tab2[["coef_rob_{}".format(method) for method in methods]].max(axis=1)
    for method in methods:
        tab2["{}".format(method)] = tab2["coef_rob_{}".format(method)].apply(lambda x: "{:.3f}".format(x))
        tab2["{}".format(method)] = np.where(
            tab2["max_col"] == tab2["coef_rob_{}".format(method)],"$\mathbf{"+tab2["{}".format(method)]+"~("+tab2["pv_rob_formatted_{}".format(method)]+")}$",
            "$"+tab2["{}".format(method)]+"~("+tab2["pv_rob_formatted_{}".format(method)]+")$")
        tab2["{}".format(method)] = tab2["{}".format(method)].str.replace("0.", ".")
        tab2["{}".format(method)] = tab2["{}".format(method)].str.replace("e-0", "e-")

    tab2 = tab2[~tab2["target_coverage"].isin([0,1])]
    tab2["metric"] = np.where(tab2["target_coverage"]==0.1, "\n\midrule\n\multirow{9}{*}{\\rotatebox[origin=c]{90}{$\\htrdd$}}",
                              "")
    tab2 = tab2[["metric", "target_coverage"]+cols_]
    tab3 = tab.copy()
    tab3["max_col"] = tab3[["acc_system_{}".format(method) for method in methods]].max(axis=1)
    for method in methods:
        tab3["{}".format(method)] = tab3["acc_system_{}".format(method)].apply(lambda x: "{:.3f}".format(x))
        tab3["{}".format(method)] = np.where(
            tab3["max_col"] == tab3["acc_system_{}".format(method)],
            "$\mathbf{"+tab3["{}".format(method)]+"}$",
            "$"+tab3["{}".format(method)]+"$")
        tab3["{}".format(method)] = tab3["{}".format(method)].str.replace("0.", ".")
        # tab3["{}".format(method)] = tab3["{}".format(method)].str.replace("e-0", "e-")
    tab3["metric"] = np.where(tab3["target_coverage"]==0, "\n\midrule\n\multirow{11}{*}{\\rotatebox[origin=c]{90}{Accuracy}}",
                              "")
    tab3 = tab3[["metric", "target_coverage"]+cols_]
    tab_all = pd.concat([tab1, tab2, tab3], axis=0)
    tab_all["target_coverage"] = tab_all["target_coverage"].apply(lambda x: "{:.2f}".format(x))
    tab_all["target_coverage"] = tab_all["target_coverage"].str.replace("0.", ".")
    col_format = "c|c|ccccccc"
    str_latex = tab_all.to_latex(index=False, escape=False, label="tab: {}-results".format(dataset), caption="{} results".format("\\texttt{"+dataset+"}"), position="t", column_format=col_format)
    str_latex = str_latex.replace("metric & target_coverage & ASM & CC & DT & LCE & OVA & RS & SP \\",
                                  " & $c$ & \ASM{} & \CC{} & \DT{} & \LCE{} & \OVA{} & \RS{} & \SP{} \\")
    str_latex = str_latex.replace("\\begin{tabular}", "\\resizebox{\\textwidth}{!}{\n\\begin{tabular}")
    str_latex = str_latex.replace("\\end{tabular}", "\\end{tabular}\n}")
    str_latex = str_latex.replace("10.000~(nan)", "")
    str_latex = str_latex.replace("$\\mathbf{}$", "")
    with open(tab_fold+"{}_results.tex".format(dataset), "w") as f:
        f.write(str_latex)


In [16]:
write_latex_table(df, "synth")
write_latex_table(df, "galaxyzoo")
write_latex_table(df, "xray-airspace")
write_latex_table(df, "hatespeech")
write_latex_table(df, "cifar10h")