The choices for each index column are listed below:
- benchmark: 
    - CIFAR10
    - Imagenet100
- data:
    - ID
    - OoD
- dataset
    - CIFAR10, Textures, SVHN, LSUN-C, LSUN-R, iSUN, Places365, average
    - Imagenet100, NINCO, Textures, iNaturalist, SUN, Places, average
- model: 
    - wrn_40_2
    - resnet50
- variant: 
    - NT
    - DA
    - AT
    - PAT
- detector:
    - Entropy
    - ViM
    - Mahalanobis+ODIN
    - Mahalanobis
    - KLMatching
    - SHE
    - MSP
    - EnergyBased
    - MaxLogit
    - ODIN
    - DICE
    - RMD
- perturb_function:
    - rotation
    - translation
    - scale
    - hue
    - saturation
    - bright_contrast
    - blur  
    - Linf
    - L2
    - average
- severity: 1, 2, 3, 4, 5, average

In [19]:
import pandas as pd
import os
import yaml

# Load configs: benchmarks, model variants, OoD datasets and save directory.
with open('../config.yaml', 'r') as f:
    configs = yaml.safe_load(f)
sorters = dict(benchmark=["CIFAR10", "Imagenet100", "Imagenet1k"], model=["wrn_40_2", "resnet50", "swin", "deit", "vit"],
                    variant=["NT", "DA", "AT", "PAT"], detector=configs["score_functions"],
                    data=["ID", "OoD"], 
                    perturb_function=["average"]+configs["perturb_functions"])


# Read the DAE summary file
filepath = os.path.join("../results/eval/robustness", "dae_summary.csv")
df_dae = pd.read_csv(filepath).copy()
# display(df_dae)

# Read the model accuracy and FPR95 summary file
filepath = os.path.join("../results/eval/performance", "model_performance.csv")
df_mdl = pd.read_csv(filepath).copy()
filepath = os.path.join("../results/eval/performance", "detector_performance.csv")
df_detector = pd.read_csv(filepath).copy()
df_detector = df_detector.query("dataset=='average'").drop(columns=["dataset", "AUROC", "AUPR_IN", "AUPR_OUT"]).\
    rename(columns={"score_function": "detector"}).copy()

### 1. Overview of the DAE rate

In [None]:
# Overview of the DAE rate
# Search for related experiment results
df_filtered = df_dae[
                #  (df_dae["variant"].isin(["NT", "DA"])) &
                #  (df_dae["detector"]=="ODIN") &
                #  (df_dae["data"]=="OoD") &
                 (df_dae["perturb_function"]=="average") &
                 (df_dae["dataset"].isin(["average", "CIFAR10", "Imagenet100", "Imagenet1k"]))
                ].drop(["dataset", "perturb_function"], axis=1).copy()
df_perf = pd.merge(left=df_mdl, right=df_detector, on=["benchmark", "model", "variant"], how="inner")
df_perf_dae = pd.merge(left=df_perf, right=df_filtered, on=["benchmark", "model", "variant", "detector"], how="inner")
df_rlt = df_perf_dae.pivot(index=["benchmark", "model", "variant", "accuracy", "detector", "FPR95",], 
                           columns=["data"])
df_rlt["ID_DAE"] = df_rlt[("dae_mean", "ID")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "ID")].astype(str)
df_rlt["OOD_DAE"] = df_rlt[("dae_mean", "OoD")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "OoD")].astype(str)
df_rlt["MAE"] = df_rlt[("mae_mean", "ID")].astype(str)+u"\u00B1"+ df_rlt[("mae_std", "ID")].astype(str)
df_rlt = df_rlt[["ID_DAE", "OOD_DAE", "MAE"]]
df_rlt.columns = df_rlt.columns.droplevel([1])
df_rlt.set_index("MAE", append=True, inplace=True)
df_rlt = df_rlt.reorder_levels(["benchmark", "model", "variant", "accuracy", "MAE", "detector", "FPR95"]).reset_index()

df_rlt_baseline = df_rlt[df_rlt["detector"]=="MSP"]
df_rlt = df_rlt.sort_values(by="FPR95")
df_rlt = df_rlt.groupby(by=["benchmark", "model", "variant", "accuracy", "MAE"]).head(3)
df_rlt = pd.concat([df_rlt, df_rlt_baseline], axis=0).copy()

for sort_col in ["benchmark", "model", "variant"]:
    df_rlt[sort_col] = df_rlt[sort_col].astype("category")
    df_rlt[sort_col] = df_rlt[sort_col].cat.set_categories(sorters[sort_col], ordered=True)
df_rlt.sort_values(by=["benchmark", "model", "variant", "FPR95"], inplace=True)
df_rlt.set_index(["benchmark", "model", "variant", "accuracy", "MAE", "detector", "FPR95"], inplace=True, drop=True)
display(df_rlt)
# df_rlt.to_csv("DAE_overview_table.csv")

### 2. Show DA improvement from NT

In [None]:
# Show DA improvement from NT

# Search for related experiment results
df_filtered = df_dae[
                #  (df_dae["variant"].isin(["NT", "DA"])) &
                #  (df_dae["detector"]=="ODIN") &
                #  (df_dae["data"]=="OoD") &
                 (df_dae["perturb_function"]=="average") &
                 (df_dae["dataset"].isin(["average", "CIFAR10", "Imagenet100"]))
                ].copy()

df_filtered.drop(["dataset", "perturb_function", "mae_mean", "mae_std"], axis=1, inplace=True)
for sort_col in ["benchmark", "model", "detector", "variant", "data"]:
    df_filtered[sort_col] = df_filtered[sort_col].astype("category")
    df_filtered[sort_col] = df_filtered[sort_col].cat.set_categories(sorters[sort_col], ordered=True)
df_filtered.sort_values(by=["benchmark", "model", "detector", "variant", "data"], inplace=True)

df_rlt = df_filtered.pivot(index=["benchmark", "model", "detector"], columns=["variant", "data", ])
df_rlt["ID_NT_DAE"] = df_rlt[("dae_mean", "NT", "ID")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "NT", "ID")].astype(str)
df_rlt["ID_DA_DAE"] = df_rlt[("dae_mean", "DA", "ID")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "DA", "ID")].astype(str)
df_rlt["OOD_NT_DAE"] = df_rlt[("dae_mean", "NT", "OoD")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "NT", "OoD")].astype(str)
df_rlt["OOD_DA_DAE"] = df_rlt[("dae_mean", "DA", "OoD")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "DA", "OoD")].astype(str)
df_rlt = df_rlt[["ID_NT_DAE", "ID_DA_DAE", "OOD_NT_DAE", "OOD_DA_DAE"]]
df_rlt.columns = df_rlt.columns.droplevel([1,2])

def highlighter(x):
    s1 = "color: blue;" if float(x["ID_NT_DAE"].split(u"\u00B1")[0]) < float(x["ID_DA_DAE"].split(u"\u00B1")[0]) else "color: red;"
    s2 = "color: blue;" if float(x["OOD_NT_DAE"].split(u"\u00B1")[0]) < float(x["OOD_DA_DAE"].split(u"\u00B1")[0]) else "color: red;"
    return [""]+[s1] + [""]+[s2]
    
    
df_rlt = df_rlt.style.apply(highlighter, axis=1)
display(df_rlt)
# df_rlt.to_latex("DA_DAE_table.md")

### Overview of DAE rate under 1-5 severity levels

In [None]:
# Read the DAE summary file
filepath = os.path.join("../results/eval/severity_levels/robustness", "dae_summary.csv")
df_dae = pd.read_csv(filepath).copy()

df_filtered = df_dae[
                #  (df_dae["variant"].isin(["NT", "DA"])) &
                #  (df_dae["detector"]=="ODIN") &
                #  (df_dae["data"]=="OoD") &
                 (df_dae["perturb_function"]=="average") &
                 (df_dae["dataset"].isin(["average", "CIFAR10", "Imagenet100"]))
                ].drop(["dataset", "perturb_function"], axis=1).copy()
df_perf = pd.merge(left=df_mdl, right=df_detector, on=["benchmark", "model", "variant"], how="inner")
df_perf_dae = pd.merge(left=df_perf, right=df_filtered, on=["benchmark", "model", "variant", "detector"], how="inner")
df_rlt = df_perf_dae.pivot(index=["benchmark", "model", "variant", "accuracy", "detector", "FPR95", "severity"], 
                           columns=["data"])

df_rlt["ID_DAE"] = df_rlt[("dae_mean", "ID")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "ID")].astype(str)
df_rlt["OOD_DAE"] = df_rlt[("dae_mean", "OoD")].astype(str)+u"\u00B1"+ df_rlt[("dae_std", "OoD")].astype(str)
df_rlt["MAE"] = df_rlt[("mae_mean", "ID")].astype(str)+u"\u00B1"+ df_rlt[("mae_std", "ID")].astype(str)
df_rlt = df_rlt[["ID_DAE", "OOD_DAE", "MAE"]]
df_rlt.columns = df_rlt.columns.droplevel([1])
df_rlt.set_index("MAE", append=True, inplace=True)
df_rlt = df_rlt.reorder_levels(["benchmark", "model", "variant", "accuracy", "MAE", "detector", "FPR95", "severity"]).reset_index()

df_rlt_baseline = df_rlt[df_rlt["detector"]=="MSP"]
df_rlt = df_rlt.sort_values(by="FPR95")
df_rlt = df_rlt.groupby(by=["benchmark", "model", "variant", "accuracy", "MAE"]).head(3)
df_rlt = pd.concat([df_rlt, df_rlt_baseline], axis=0).copy()

for sort_col in ["benchmark", "model", "variant"]:
    df_rlt[sort_col] = df_rlt[sort_col].astype("category")
    df_rlt[sort_col] = df_rlt[sort_col].cat.set_categories(sorters[sort_col], ordered=True)
df_rlt.sort_values(by=["benchmark", "model", "variant", "FPR95", "severity"], inplace=True)
df_rlt.set_index(["benchmark", "model", "variant", "accuracy", "detector", "FPR95", "severity", "MAE"], inplace=True, drop=True)
display(df_rlt)
