# Condensed Tables first
## Helpers


In [None]:
import json
datasets = ["cifar10", "imagenet"]
defendeds = [True, False]
threat_model = "Linf"
experiments = ["base", "worst_defended", "best_defended"]
import csv
import numpy as np
from global_settings import surrogate_models_used, not_working_models, worst_and_best_model_names, undefended_model_names
from run import attack_list
from utils import get_not_working_model_list


def get_models_to_ignore(dataset):
    # we take the models that are used as surrogate at any point in any experiment that we present and remove them from the aggregate computation
    models_to_ignore = []
    for key in surrogate_models_used.keys():
        if dataset in surrogate_models_used[key].keys():
            for val in surrogate_models_used[key][dataset]:
                if val not in models_to_ignore:
                    models_to_ignore.append(val)
                    
    if dataset in not_working_models.keys():
        for key in not_working_models[dataset].keys():
            for key_2 in not_working_models[dataset][key].keys():
                for val in not_working_models[dataset][key][key_2]:
                    if val not in models_to_ignore:
                        models_to_ignore.append(val)

    if dataset in worst_and_best_model_names.keys():
        for val in worst_and_best_model_names[dataset].values():
            if val not in models_to_ignore:
                models_to_ignore.append(val)

    return models_to_ignore


def experiment_mapper( e: str):
    if e.startswith("worst_defended"):
        return "worst_defended"
    elif e.startswith("best_defended"):
        return "best_defended"
    else:
        return "base"
    
def get_base_name(name):
    space_parsed = name.split(" ")
    combination_parsed = [a.split("-") for a in space_parsed]
    combination_parsed = [a for b in combination_parsed for a in b]
    combination_parsed = [a.split("+") for a in combination_parsed]
    combination_parsed = [a for b in combination_parsed for a in b]
    combination_parsed = [a.lower() for a in combination_parsed]
    attacks_found = []
    for a in attack_list:
        if a in combination_parsed:
            attacks_found.append(a)     
    if len(attacks_found) == 0:
        if "vni" in combination_parsed and "fgsm" in combination_parsed:
            attacks_found.append("vnifgsm")
    assert len(attacks_found) == 1, print(attacks_found, combination_parsed)
    variant = [c for c in name.split("(") if c.endswith(")")]
    if len(variant) == 1:
        return attacks_found[0].upper() + " (" +variant[0]
    elif len(variant) == 0:
        return attacks_found[0].upper()
    else:
        raise AttributeError
    
def get_undefended_models(dataset):
    undefended_models = undefended_model_names[dataset] + [("Standard" if dataset == "cifar10" else "Standard_R50")]   # robustbench includes an undefended model
    return undefended_models


## Condensed Tables

For each attack variant, for each dataset, we compute the average ASR of each experiment as well as the count of models included in the aggregation for verification purposes.
Since now the support for all attacks is the same, we can actually compute the degradation (diff) as the difference of the averages instead of having to compute the average of the differences (they are the same in this specific case)

In [2]:
with open("../results/results.json", "r") as f:
    results = json.load(f)

aggregate_results = {}
counts = {}
# benign_models = {}

for experiment in experiments:
    aggregate_results[experiment] = {}
    counts[experiment] = {}
    for dataset in datasets:
        undefended_models = get_undefended_models(dataset)
        aggregate_results[experiment][dataset] = {}
        counts[experiment][dataset] = {}
        models_to_ignore = get_models_to_ignore(dataset)
        for defended in defendeds:
            aggregate_results[experiment][dataset][defended] = {}
            counts[experiment][dataset][defended] = {}
            relevant_results = results[dataset][threat_model]
            attacks = list(relevant_results.keys())
            for attack in attacks:
                aggregate_results[experiment][dataset][defended][attack] = {"adv_acc": 0}
                counts[experiment][dataset][defended][attack] = 0
                for model in relevant_results[attack].keys():
                    if model not in models_to_ignore:
                        if (defended and (model not in undefended_models)) or ((not defended) and (model in undefended_models)):
                            for exp, val in relevant_results[attack][model].items():
                                if experiment_mapper(exp) == experiment:
                                    aggregate_results[experiment][dataset][defended][attack]["adv_acc"] +=  val
                                    counts[experiment][dataset][defended][attack] += 1
                                    break
                if counts[experiment][dataset][defended][attack] == 0:
                    continue
                else:
                    aggregate_results[experiment][dataset][defended][attack]["adv_acc"] /= counts[experiment][dataset][defended][attack]
            
# we wait until the first pass to do this in case benign is not the first key
# we also construct the csv
csv_table = [["Dataset", "Defended", "Experiment", "Attack", "Deg", "Adv Acc", "Count"]]
for experiment in experiments:
    for dataset in datasets:
        for defended in defendeds:
            relevant_results = results[dataset][threat_model]
            attacks = list(relevant_results.keys())
            for attack in attacks:
                if counts[experiment][dataset][defended][attack] != 0:
                    # we use the base benign performance because the others are meaningless 
                    # if attack != "Square" and attack != "BIA (B)":
                    assert counts[experiment][dataset][defended][attack] == counts["base"][dataset][defended]["Benign"], f"{attack} {dataset} {defended} {experiment} {counts[experiment][dataset][defended][attack]} {counts["base"][dataset][defended]["Benign"]}"
                    aggregate_results[experiment][dataset][defended][attack]["deg"] = aggregate_results["base"][dataset][defended]["Benign"]["adv_acc"] - aggregate_results[experiment][dataset][defended][attack]["adv_acc"]
                    csv_table.append([dataset, defended, experiment, attack, f"{aggregate_results[experiment][dataset][defended][attack]["deg"]*100:.2f}", f"{aggregate_results[experiment][dataset][defended][attack]["adv_acc"]*100:.2f}", counts[experiment][dataset][defended][attack]])
            # print(experiment, dataset, defended)
            # print(aggregate_results[experiment][dataset][defended])
            # print(counts[experiment][dataset][defended])
            
with open("outputs/results.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(csv_table)

            
# print(counts)
# since 
                
            