In [9]:
# Imports
import pandas as pd 
import glob
import numpy as np

 # Load the csv
df = pd.read_csv("csv/raw_experimental_results.csv")	
assert df.isnull().values.any() == False
assert len(df) == 1848

# Clear obvious results
def filter_own_oc_scores(df):
    mapping  = dict(
        UnetEncoderClassifier = ['oracle_UnetEncoder', 'oracle_tcv_UnetEncoder'],
        MadryClassifier = ['oracle_MNR-RN50', 'oracle_tcv_MNR-RN50'],
        AlexNetClassifier = ['oracle_AlexNet (pretrained)', 'oracle_tcv_AlexNet (pretrained)'],
        AlexNetRandomInitClassifier = ['oracle_AlexNet (random init)', 'oracle_tcv_AlexNet (random init)'],
        ConvNeXtLClassifier = ['oracle_ConvNeXt-L', 'oracle_tcv_ConvNeXt-L'],
        SIMCLRClassifier = ['oracle_SIMCLR', 'oracle_tcv_SIMCLR'],
        SwinTFLClassifier = ['oracle_SwinTFL', 'oracle_tcv_SwinTFL']
    )
    
    for model_key, value in mapping.items():
        for v in value:
            df.loc[df['classifier'] == model_key, v] = None
    
    return df

df = filter_own_oc_scores(df)

# Dropped SIMCLR oracle due to technical problems 
df = df.drop(columns=["oracle_SIMCLR", "oracle_tcv_SIMCLR"])

# Normalize lp scores
df["lp_norm_1"] /= 1000000.0
df["lp_norm_1_5"] /= 1000.0
df["lp_norm_2"] /= 1000.0

df.head()

Unnamed: 0,run_id,classifier,cone_projection,xzero_prediction,source_class,source_class_name,target_class,target_class_name,target_class_conf_std,target_class_conf_mean,...,oracle_AlexNet (pretrained),oracle_tcv_AlexNet (pretrained),oracle_AlexNet (random init),oracle_tcv_AlexNet (random init),oracle_UnetEncoder,oracle_tcv_UnetEncoder,oracle_ConvNeXt-L,oracle_tcv_ConvNeXt-L,oracle_SwinTFL,oracle_tcv_SwinTFL
0,run_cheetah(293)_to_leopard(288),UnetEncoderClassifier,False,False,293,cheetah,288,leopard,0.177915,0.953398,...,0.3,0.28,0.0,0.0,,,0.26,0.22,0.32,0.28
1,run_cheetah(293)_to_tiger(292),UnetEncoderClassifier,False,False,293,cheetah,292,tiger,0.119551,0.964065,...,0.02,0.02,0.0,0.0,,,0.06,0.06,0.02,0.02
2,run_cheetah(293)_to_golden-retriever(207),UnetEncoderClassifier,False,False,293,cheetah,207,golden retriever,0.271491,0.884884,...,0.04,0.0,0.0,0.0,,,0.06,0.0,0.12,0.02
3,run_cheetah(293)_to_Labrador-retriever(208),UnetEncoderClassifier,False,False,293,cheetah,208,Labrador retriever,0.347169,0.769877,...,0.04,0.0,0.0,0.0,,,0.1,0.0,0.12,0.0
4,run_cheetah(293)_to_guacamole(924),UnetEncoderClassifier,False,False,293,cheetah,924,guacamole,0.427857,0.630766,...,0.14,0.0,0.0,0.0,,,0.22,0.02,0.22,0.02


In [10]:
# Helpful methods
## filter for [source_class, target_class] combinations
def filter_valid_cfs(dy):
    valid_combinations = [
        (293, 288),
        (209, 207),
        (935, 924),
        (973, 970),
        (970, 980),
        (965, 963),
    ]
    dy = dy[dy[['source_class', 'target_class']].apply(tuple, axis=1).isin(valid_combinations)]
    return dy

def filter_non_valid_cfs(dy):
    valid_combinations = [
        (293, 288),
        (209, 207),
        (935, 924),
        (973, 970),
        (970, 980),
        (965, 963),
    ]
    dy = dy[~dy[['source_class', 'target_class']].apply(tuple, axis=1).isin(valid_combinations)]
    return dy

def filter_metrics(dy):
    return dy.drop(columns=["OA ↓", "L1 (m) ↓", "L1.5 (k) ↓", "L2 (k) ↓", "OS RandomNet ↓", "OTA RandomNet ↓", "OS UnetE ↑",  "OS ConvNeXt ↑", "OS SwinTFL ↑", "OS AlexNet ↑","OS Madry ↑"])

def rename(dy):
    dy = dy[["target_class_conf_std", "target_class_conf_mean", "original_class_conf_std", "original_class_conf_mean", "target_class_validity", "original_class_validity", "oracle_MNR-RN50", "oracle_tcv_MNR-RN50", "oracle_AlexNet (pretrained)", "oracle_tcv_AlexNet (pretrained)", "oracle_AlexNet (random init)", "oracle_tcv_AlexNet (random init)", "oracle_UnetEncoder", "oracle_tcv_UnetEncoder", "oracle_ConvNeXt-L", "oracle_tcv_ConvNeXt-L", "oracle_SwinTFL", "oracle_tcv_SwinTFL", "lp_norm_1" , "lp_norm_1_5" , "lp_norm_2", "lpips", "fid"]]
    dy = dy.drop(columns=["target_class_conf_std", "target_class_conf_mean", "original_class_conf_std", "original_class_conf_mean"])
    dy = dy.rename(columns={"target_class_conf_std":"TConfStd ↓", "target_class_conf_mean":"TConf ↑","original_class_conf_std":"OConfStd ↓", "original_class_conf_mean":"OConf ↓", "fid":"FID ↓","target_class_validity": "TA ↑", "original_class_validity": "OA ↓", "lp_norm_1" : "$\mathrm{D_1}$ (m) ↓", "lp_norm_1_5" : "$\mathrm{D_{1.5}}$ (k) ↓", "lp_norm_2": "$\mathrm{D_2}$ (k) ↓", "lpips": "LPIPS ↓", "oracle_MNR-RN50":"OS Madry ↑", "oracle_tcv_MNR-RN50":"OTA Madry ↑", "oracle_AlexNet (pretrained)":"OS AlexNet ↑", "oracle_tcv_AlexNet (pretrained)":"OTA AlexNet ↑", "oracle_AlexNet (random init)":"OS RandomNet ↓", "oracle_tcv_AlexNet (random init)":"OTA RandomNet ↓", "oracle_UnetEncoder":"OS UnetE ↑", "oracle_tcv_UnetEncoder":"OTA UnetE ↑", "oracle_ConvNeXt-L":"OS ConvNeXt ↑", "oracle_tcv_ConvNeXt-L":"OTA ConvNeXt ↑", "oracle_SIMCLR":"OS SIMCLR ↑", "oracle_tcv_SIMCLR":"OTA SIMCLR ↑", "oracle_SwinTFL":"OS SwinTFL ↑", "oracle_tcv_SwinTFL":"OTA SwinTFL ↑" })
    
    dy = dy.rename(index={
        "AlexNetClassifier" : "AlexNet",
        "AlexNetRandomInitClassifier" : "RandomNet",
        "ConvNeXtLClassifier" : "ConvNeXt",
        "MadryClassifier" : "Madry",
        "SIMCLRClassifier" : "SIMCLR",
        "SwinTFLClassifier" : "SwinTFL",
        "UnetEncoderClassifier" : "UnetE"
    })
    return dy

def prettify_df(dy):
    dy = dy[["Madry", "AlexNet", "UnetE", "ConvNeXt", "SwinTFL", "SIMCLR", "RandomNet"]]
    dy = dy.T[["TA ↑", "OTA Madry ↑", "OTA AlexNet ↑", "OTA UnetE ↑", "OTA ConvNeXt ↑", "OTA SwinTFL ↑", "LPIPS ↓", "FID ↓"]].T
    dy.replace('- ± -', value="-", inplace=True)
    dy.index.names = ['metric'] # rename index "level_0" to "metric"
    return dy
    
def build_df(df, filter_valid, cone_projection, x_zero_prediction, rename_df=True, aggregate=True):
        dy = df.copy()
        if filter_valid:
            dy = filter_valid_cfs(dy)
        else:
             dy = filter_non_valid_cfs(dy)
        dy = dy.set_index(['classifier', "source_class_name", "target_class_name"])
        dy = dy[dy["cone_projection"] == cone_projection ]
        dy = dy[dy["xzero_prediction"] == x_zero_prediction ]
        dy = dy.drop(columns=["source_class", "target_class", "run_id", "cone_projection", "xzero_prediction"])
        if rename_df:
            dy = rename(dy)
        if aggregate:
            dy = dy.groupby(['classifier']).agg(["mean", "std"],axis=0)
        return dy
    

def minmax_highlight_df(dy):
    
    def highlight(col):
        s = col.str.split(' ± ')
        s_mean_value = s.str[0].astype('float')
        if '↓' in col.name:
            col[s_mean_value.argmin()] = f"\textbf{{{col[s_mean_value.argmin()]}}}"
        elif '↑' in col.name:
            col[s_mean_value.argmax()] = f"\textbf{{{col[s_mean_value.argmax()]}}}"
        col[s_mean_value.isnull()] = "-"
        return col
    
    return dy.apply(highlight,axis=0)

def df2tex(dy, name):
    dy.to_latex(name, escape=False, column_format="lcccccccc")

# Aggregate the runs over each classifier type with mean and standard deviation
def mean_std(data):
    arr = np.array(data)
    std, mean = np.around(arr.std(),decimals=2), np.around(arr.mean(), decimals=2)
    
    if mean >= 100:
        std, mean = int(std), int(mean)
    
    return f"{mean} ± {std}"

def highlight_max(cell):
    if type(cell) != str:
        cell = float(round(cell,2))
        if cell < 0 :
            return 'color: red'
        elif cell > 0:
            return 'color: green'
        else:
            return 'color: grey'
        
def df_diff(dy1,dy2):
    dy12diff = (dy2-dy1)

    dy12diff = dy12diff.drop(columns=["OA ↓", "L1 (m) ↓", "L1.5 (k) ↓", "L2 (k) ↓", "OS RandomNet ↓", "OTA RandomNet ↓", "OS UnetE ↑",  "OS ConvNeXt ↑", "OS SwinTFL ↑", "OS AlexNet ↑","OS Madry ↑"])# ])#.head(30)
    dy12diff = dy12diff.T
    dy12diff = dy12diff[["Madry", "AlexNet", "UnetE", "ConvNeXt", "SwinTFL", "SIMCLR", "RandomNet"]]

    dy12diff = dy12diff.round(decimals=2)

    def highlight_rg(c):
        zeros = c==0.0
        positive = c>0.0 
        negative = c<0.0
        
        c[zeros] = "\colorgrey 0.00"
        c[positive] = [f"\colorgreen {x:.2f}" for x in c[positive]]
        c[negative] = [f"\colorred {x:.2f}" for x in c[negative]]
        return c
        
    dy12diff.apply(highlight_rg,axis=0)
    dy12diff.fillna('-', inplace=True)
    
    dy12diff = dy12diff.melt(ignore_index=False)
    dy12diff = dy12diff.reset_index().set_index(["level_0", "classifier", "level_1" ])
    dy12diff = dy12diff.unstack()
    dy12diff = dy12diff.apply(lambda x: f"{x['value']['mean']} \colorblack ± {x['value']['std']}", axis=1)# concat mean and std
    dy12diff = dy12diff.reset_index().set_index(["level_0", "classifier"])# back to original form
    dy12diff = dy12diff.unstack()
    dy12diff.columns = dy12diff.columns.droplevel() # remove the "0" column text
    dy12diff.index.names = ['metric'] # rename index "level_0" to "metric"

    dy12diff = dy12diff[["Madry", "AlexNet", "UnetE", "ConvNeXt", "SwinTFL", "SIMCLR", "RandomNet"]]
    dy12diff = dy12diff.T[["TA ↑", "OTA Madry ↑", "OTA AlexNet ↑", "OTA UnetE ↑", "OTA ConvNeXt ↑", "OTA SwinTFL ↑", "LPIPS ↓", "FID ↓"]].T
    dy12diff.replace('- \colorblack ± -', value="-", inplace=True)
    
    return dy12diff

# Table 3 - Cone Projection, XZero Prediction, All Metrics, All Classifiers, Only Good CFs
Settings from DVCE (cone projection and xzero both True), show all metrics for the different classifiers with different model capacities, for data only on one well working target class (1:1) - so 6 data points in total

<!-- RQ: How does the counterfactual generation perform for different base accuracies of the classifiers?
- Exp: different classifiers with different model capacities.
- Hyp: A simpler classifier is easier to fool, whereas a non-robust high-capacity model tends to produce adversarial examples  

RQ: What happens if the classifier is very bad, e.g., random?
- Exp: randomly initialized classifier
- Hyp: the diffusion process is steered to uniform class distribution and no counterfactuals are produced, however the oracles still have some target accuracy and the visual inspection shows that the images were in fact changed towards the target class but not enough to be a valid example for the classifier, this effect is most likely due to the cone projection

RQ: Are there non-robust classifiers that still produce meaningful counterfactuals?
- Exp: different non-robust classifiers
- Hyp: yes, with cone projection and the xzero prediction the samples are of high quality and the changes are mostly class discriminative

RQ: Are there differences when the features of the classifier were extracted in a self-supervised instead or fully supervised manner?
- Exp: implement and use the SIMCLR classifier for the counterfactual generation
- Hyp: the image distribution changes are quite extreme although the individual pixel changes are not changed that much, the classifier provides very unreliable counterfactuals and the oracles are very uncertain -->

In [11]:
table3 = build_df(df,filter_valid=True,cone_projection=True,x_zero_prediction=True, aggregate=False)

table3 = table3.groupby(['classifier']).agg(mean_std)
table3.replace('nan ± nan', value="-", inplace=True)

table3 = table3.T
table3 = table3[["Madry", "AlexNet", "UnetE", "ConvNeXt", "SwinTFL", "SIMCLR", "RandomNet"]]
table3.index.names = ['metric'] # rename index "level_0" to "metric"

df2tex(table3, "table_3.tex")
table3

  dy.to_latex(name, escape=False, column_format="lcccccccc")


classifier,Madry,AlexNet,UnetE,ConvNeXt,SwinTFL,SIMCLR,RandomNet
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TA ↑,0.84 ± 0.1,0.99 ± 0.01,0.99 ± 0.01,0.95 ± 0.04,0.9 ± 0.08,0.45 ± 0.29,0.0 ± 0.0
OA ↓,0.07 ± 0.07,0.0 ± 0.0,0.01 ± 0.01,0.02 ± 0.02,0.05 ± 0.03,0.0 ± 0.0,0.01 ± 0.01
OS Madry ↑,-,0.32 ± 0.14,0.36 ± 0.23,0.29 ± 0.17,0.32 ± 0.15,0.05 ± 0.03,0.0 ± 0.0
OTA Madry ↑,-,0.32 ± 0.14,0.36 ± 0.23,0.28 ± 0.16,0.3 ± 0.16,0.2 ± 0.13,0.21 ± 0.16
OS AlexNet ↑,0.72 ± 0.13,-,0.48 ± 0.21,0.36 ± 0.13,0.42 ± 0.13,0.02 ± 0.03,0.0 ± 0.01
OTA AlexNet ↑,0.72 ± 0.14,-,0.48 ± 0.21,0.36 ± 0.13,0.41 ± 0.15,0.15 ± 0.05,0.23 ± 0.11
OS RandomNet ↓,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,-
OTA RandomNet ↓,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,0.0 ± 0.0,-
OS UnetE ↑,0.72 ± 0.08,0.43 ± 0.15,-,0.43 ± 0.19,0.48 ± 0.19,0.06 ± 0.04,0.0 ± 0.01
OTA UnetE ↑,0.76 ± 0.07,0.43 ± 0.15,-,0.4 ± 0.19,0.45 ± 0.2,0.17 ± 0.09,0.22 ± 0.13


# Table 4 - Cone Projection, XZero Prediction, Selected Metrics, All Classifiers, Only Good CFs


In [12]:
table4 = build_df(df,filter_valid=True,cone_projection=True,x_zero_prediction=True, aggregate=False)
table4 = table4.groupby(['classifier']).agg(mean_std)

table4 = minmax_highlight_df(table4)
table4 = filter_metrics(table4)
table4 = prettify_df(table4.T)

df2tex(table4, "table_4.tex")
table4


KeyError: "['L1 (m) ↓', 'L1.5 (k) ↓', 'L2 (k) ↓'] not found in axis"

# Table 5 - DIFF Table4 without Cone Projection


In [None]:
data1 = build_df(df,filter_valid=True,cone_projection=True,x_zero_prediction=True)
data2 = build_df(df,filter_valid=True,cone_projection=False,x_zero_prediction=True)

table5 = df_diff(data1,data2)
df2tex(table5, "table_5.tex")
table5


  dy.to_latex(name, escape=False, column_format="lcccccccc")


classifier,Madry,AlexNet,UnetE,ConvNeXt,SwinTFL,SIMCLR,RandomNet
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TA ↑,\colorgrey 0.00 \colorblack ± \colorgrey 0.00,\colorred -0.01 \colorblack ± \colorgreen 0.02,\colorred -0.04 \colorblack ± \colorgreen 0.05,\colorred -0.12 \colorblack ± \colorgreen 0.06,\colorred -0.19 \colorblack ± \colorgreen 0.12,\colorgreen 0.12 \colorblack ± \colorred -0.03,\colorgrey 0.00 \colorblack ± \colorgrey 0.00
OTA Madry ↑,-,\colorred -0.29 \colorblack ± \colorred -0.11,\colorred -0.31 \colorblack ± \colorred -0.16,\colorred -0.27 \colorblack ± \colorred -0.14,\colorred -0.29 \colorblack ± \colorred -0.14,\colorred -0.19 \colorblack ± \colorred -0.11,\colorred -0.20 \colorblack ± \colorred -0.15
OTA AlexNet ↑,\colorgrey 0.00 \colorblack ± \colorgreen 0.01,-,\colorred -0.27 \colorblack ± \colorred -0.05,\colorred -0.30 \colorblack ± \colorred -0.07,\colorred -0.30 \colorblack ± \colorred -0.07,\colorred -0.12 \colorblack ± \colorred -0.02,\colorred -0.19 \colorblack ± \colorred -0.07
OTA UnetE ↑,\colorgrey 0.00 \colorblack ± \colorgreen 0.01,\colorred -0.37 \colorblack ± \colorred -0.13,-,\colorred -0.35 \colorblack ± \colorred -0.10,\colorred -0.39 \colorblack ± \colorred -0.14,\colorred -0.15 \colorblack ± \colorred -0.08,\colorred -0.21 \colorblack ± \colorred -0.11
OTA ConvNeXt ↑,\colorgrey 0.00 \colorblack ± \colorred -0.01,\colorred -0.30 \colorblack ± \colorred -0.09,\colorred -0.27 \colorblack ± \colorred -0.07,-,\colorred -0.36 \colorblack ± \colorred -0.04,\colorred -0.06 \colorblack ± \colorred -0.04,\colorred -0.16 \colorblack ± \colorred -0.12
OTA SwinTFL ↑,\colorred -0.01 \colorblack ± \colorgrey 0.00,\colorred -0.29 \colorblack ± \colorred -0.16,\colorred -0.26 \colorblack ± \colorred -0.08,\colorred -0.34 \colorblack ± \colorred -0.12,-,\colorred -0.06 \colorblack ± \colorred -0.05,\colorred -0.14 \colorblack ± \colorred -0.14
LPIPS ↓,\colorgrey 0.00 \colorblack ± \colorgrey 0.00,\colorred -0.04 \colorblack ± \colorgrey 0.00,\colorred -0.02 \colorblack ± \colorgrey 0.00,\colorred -0.04 \colorblack ± \colorgrey 0.00,\colorred -0.04 \colorblack ± \colorred -0.01,\colorred -0.01 \colorblack ± \colorgreen 0.01,\colorred -0.04 \colorblack ± \colorgrey 0.00
FID ↓,\colorgreen 0.01 \colorblack ± \colorgreen 0.01,\colorred -0.12 \colorblack ± \colorred -0.04,\colorgreen 0.42 \colorblack ± \colorred -0.04,\colorgreen 0.17 \colorblack ± \colorgreen 0.34,\colorred -0.07 \colorblack ± \colorgreen 0.37,\colorgreen 1.18 \colorblack ± \colorgreen 1.66,\colorgreen 0.36 \colorblack ± \colorred -0.06


# Table 6 - DIFF Table4 without XZero Prediction


In [None]:
# Third table (no xzero prediction)
data1 = build_df(df,filter_valid=True,cone_projection=True,x_zero_prediction=True)
data2 = build_df(df,filter_valid=True,cone_projection=True,x_zero_prediction=False)

table6 = df_diff(data1,data2)
df2tex(table6, "table_6.tex")
table6

  dy.to_latex(name, escape=False, column_format="lcccccccc")


classifier,Madry,AlexNet,UnetE,ConvNeXt,SwinTFL,SIMCLR,RandomNet
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TA ↑,\colorgrey 0.00 \colorblack ± \colorgrey 0.00,\colorred -0.08 \colorblack ± \colorgreen 0.07,\colorgrey 0.00 \colorblack ± \colorgrey 0.00,\colorred -0.58 \colorblack ± \colorgreen 0.15,\colorred -0.55 \colorblack ± \colorgreen 0.18,\colorred -0.45 \colorblack ± \colorred -0.31,\colorgrey 0.00 \colorblack ± \colorgrey 0.00
OTA Madry ↑,-,\colorred -0.06 \colorblack ± \colorgreen 0.01,\colorred -0.02 \colorblack ± \colorgrey 0.00,\colorred -0.06 \colorblack ± \colorgrey 0.00,\colorred -0.08 \colorblack ± \colorgreen 0.01,\colorgreen 0.02 \colorblack ± \colorgreen 0.03,\colorgrey 0.00 \colorblack ± \colorgrey 0.00
OTA AlexNet ↑,\colorred -0.01 \colorblack ± \colorred -0.02,-,\colorred -0.02 \colorblack ± \colorred -0.02,\colorred -0.07 \colorblack ± \colorgrey 0.00,\colorred -0.12 \colorblack ± \colorred -0.01,\colorgreen 0.09 \colorblack ± \colorgreen 0.06,\colorgreen 0.02 \colorblack ± \colorgreen 0.01
OTA UnetE ↑,\colorgrey 0.00 \colorblack ± \colorgreen 0.02,\colorred -0.09 \colorblack ± \colorred -0.01,-,\colorred -0.15 \colorblack ± \colorred -0.06,\colorred -0.19 \colorblack ± \colorred -0.07,\colorgreen 0.05 \colorblack ± \colorgreen 0.04,\colorgrey 0.00 \colorblack ± \colorgrey 0.00
OTA ConvNeXt ↑,\colorred -0.02 \colorblack ± \colorgrey 0.00,\colorred -0.10 \colorblack ± \colorgreen 0.01,\colorred -0.01 \colorblack ± \colorgrey 0.00,-,\colorred -0.38 \colorblack ± \colorred -0.01,\colorgreen 0.07 \colorblack ± \colorgreen 0.04,\colorred -0.03 \colorblack ± \colorred -0.02
OTA SwinTFL ↑,\colorgreen 0.01 \colorblack ± \colorgrey 0.00,\colorred -0.10 \colorblack ± \colorred -0.04,\colorgreen 0.04 \colorblack ± \colorgrey 0.00,\colorred -0.30 \colorblack ± \colorred -0.06,-,\colorgreen 0.07 \colorblack ± \colorgreen 0.08,\colorred -0.01 \colorblack ± \colorred -0.01
LPIPS ↓,\colorgrey 0.00 \colorblack ± \colorgrey 0.00,\colorred -0.03 \colorblack ± \colorred -0.01,\colorgrey 0.00 \colorblack ± \colorgrey 0.00,\colorred -0.03 \colorblack ± \colorgrey 0.00,\colorred -0.03 \colorblack ± \colorred -0.01,\colorred -0.08 \colorblack ± \colorred -0.04,\colorgrey 0.00 \colorblack ± \colorgrey 0.00
FID ↓,\colorred -0.13 \colorblack ± \colorred -0.03,\colorred -0.18 \colorblack ± \colorred -0.19,\colorred -0.03 \colorblack ± \colorgrey 0.00,\colorgreen 0.37 \colorblack ± \colorgreen 0.17,\colorgreen 0.27 \colorblack ± \colorgreen 0.21,\colorred -1.96 \colorblack ± \colorred -2.66,\colorred -0.03 \colorblack ± \colorred -0.01


# Table 7 - DIFF Table4 for Bad CFs


In [None]:
# Fifth table (only bad CFs, DVCE setting)
data1 = build_df(df,filter_valid=True,cone_projection=True,x_zero_prediction=True)
data2 = build_df(df,filter_valid=False,cone_projection=True,x_zero_prediction=True)

table7 = df_diff(data1,data2)
df2tex(table7, "table_7.tex")
table7

  dy.to_latex(name, escape=False, column_format="lcccccccc")


classifier,Madry,AlexNet,UnetE,ConvNeXt,SwinTFL,SIMCLR,RandomNet
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
TA ↑,\colorred -0.16 \colorblack ± \colorgreen 0.12,\colorred -0.03 \colorblack ± \colorgreen 0.03,\colorred -0.09 \colorblack ± \colorgreen 0.09,\colorred -0.23 \colorblack ± \colorgreen 0.12,\colorred -0.35 \colorblack ± \colorgreen 0.16,\colorgreen 0.08 \colorblack ± \colorred -0.06,\colorgrey 0.00 \colorblack ± \colorgrey 0.00
OTA Madry ↑,-,\colorred -0.14 \colorblack ± \colorgreen 0.05,\colorred -0.19 \colorblack ± \colorred -0.05,\colorred -0.16 \colorblack ± \colorgrey 0.00,\colorred -0.17 \colorblack ± \colorgrey 0.00,\colorred -0.12 \colorblack ± \colorred -0.01,\colorred -0.13 \colorblack ± \colorred -0.02
OTA AlexNet ↑,\colorred -0.23 \colorblack ± \colorgreen 0.10,-,\colorred -0.29 \colorblack ± \colorred -0.03,\colorred -0.23 \colorblack ± \colorgreen 0.04,\colorred -0.24 \colorblack ± \colorgreen 0.02,\colorred -0.11 \colorblack ± \colorgreen 0.02,\colorred -0.17 \colorblack ± \colorgrey 0.00
OTA UnetE ↑,\colorred -0.22 \colorblack ± \colorgreen 0.16,\colorred -0.22 \colorblack ± \colorgreen 0.04,-,\colorred -0.25 \colorblack ± \colorred -0.02,\colorred -0.28 \colorblack ± \colorred -0.03,\colorred -0.11 \colorblack ± \colorgreen 0.03,\colorred -0.15 \colorblack ± \colorgreen 0.01
OTA ConvNeXt ↑,\colorred -0.24 \colorblack ± \colorgreen 0.09,\colorred -0.16 \colorblack ± \colorgreen 0.04,\colorred -0.35 \colorblack ± \colorred -0.05,-,\colorred -0.34 \colorblack ± \colorgreen 0.03,\colorred -0.05 \colorblack ± \colorgreen 0.05,\colorred -0.12 \colorblack ± \colorgreen 0.01
OTA SwinTFL ↑,\colorred -0.21 \colorblack ± \colorgreen 0.11,\colorred -0.18 \colorblack ± \colorred -0.05,\colorred -0.33 \colorblack ± \colorred -0.05,\colorred -0.31 \colorblack ± \colorred -0.01,-,\colorred -0.04 \colorblack ± \colorgreen 0.03,\colorred -0.09 \colorblack ± \colorgrey 0.00
LPIPS ↓,\colorgreen 0.05 \colorblack ± \colorgrey 0.00,\colorgreen 0.06 \colorblack ± \colorgrey 0.00,\colorgreen 0.03 \colorblack ± \colorgrey 0.00,\colorgreen 0.02 \colorblack ± \colorgrey 0.00,\colorgreen 0.04 \colorblack ± \colorred -0.01,\colorgreen 0.02 \colorblack ± \colorred -0.01,\colorgreen 0.02 \colorblack ± \colorgrey 0.00
FID ↓,\colorgreen 0.46 \colorblack ± \colorgreen 0.68,\colorgreen 0.41 \colorblack ± \colorgreen 0.25,\colorgreen 0.61 \colorblack ± \colorgreen 0.45,\colorgreen 0.46 \colorblack ± \colorgreen 0.37,\colorgreen 0.56 \colorblack ± \colorgreen 0.67,\colorgreen 0.34 \colorblack ± \colorgreen 1.36,\colorgreen 0.44 \colorblack ± \colorgreen 0.33
