# ImageNet Results

In [18]:
from online_attacks.utils.logger import Logger
from online_attacks.online_algorithms import AlgorithmType, StochasticSingleRef
from online_attacks.classifiers import DatasetType, MnistModel, CifarModel, load_classifier, load_dataset
from online_attacks.attacks import Attacker, AttackerParams, create_attacker, compute_attack_success_rate
from online_attacks.scripts.online_attack_params import OnlineAttackParams as Params
from online_attacks import datastream
from typing import Iterable, Any
from collections import defaultdict
import numpy as np
from scipy.stats import wilcoxon
from omegaconf import OmegaConf, MISSING
from dataclasses import dataclass
import torch
from torch.nn import CrossEntropyLoss
import os
import seaborn
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.patches as mpatches
import matplotlib.markers as mmarkers


fontsize = 12
tex_fonts = {
    # Use LaTeX to write all text
    "text.usetex": True,
    "mathtext.fontset": 'stix',
    "font.family": 'STIXGeneral',
    # Use 10pt font in plots, to match 10pt font in document
    "axes.labelsize": fontsize,
    "font.size": fontsize,
    # Make the legend/label fonts a little smaller
    "legend.fontsize": fontsize,
    "xtick.labelsize": fontsize,
    "ytick.labelsize": fontsize
}

plt.rcParams.update(tex_fonts)
seaborn.set_style("whitegrid")
np.set_printoptions(precision=2)


def set_size(width, fraction=1, subplots=(1, 1)):
    """Set figure dimensions to avoid scaling in LaTeX.

    Parameters
    ----------
    width: float or string
            Document width in points, or string of predined document type
    fraction: float, optional
            Fraction of the width which you wish the figure to occupy
    subplots: array-like, optional
            The number of rows and columns of subplots.
    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
    """
    if width == 'thesis':
        width_pt = 426.79135
    elif width == 'beamer':
        width_pt = 307.28987
    else:
        width_pt = width

    # Width of figure (in pts)
    fig_width_pt = width_pt * fraction
    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    # https://disq.us/p/2940ij3
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])

    return (fig_width_in, fig_height_in)


class Dataframe:
    METRICS = ["fool_rate", "num_indices", "knapsack_ratio", "comp_ratio"]
    ALGORITHM = list(AlgorithmType)
    def __init__(self, list_algorithms=None, list_metrics=None, max_runs=None):
        if list_metrics is None:
            list_metrics = self.METRICS
            
        if list_algorithms is None:
            list_algorithms = self.ALGORITHM
            
        self.data = {}
        self.statistics = {}
        self.algorithms = list_algorithms
        for algorithm in list_algorithms:
            for metric in list_metrics:
                self.data["%s.%s"%(algorithm.name, metric)] = []
                self.statistics["%s.%s"%(algorithm.name, metric)] = {}
        
        self.num_runs = 0
        self.max_runs = max_runs
           
    def __getitem__(self, key):
        return self.data[key]
                
    def aggregate_runs(self, path, params):
        list_runs = Logger.list_all_eval(path)
        list_dataframes = []
        for run in list_runs:
            if self.max_runs is not None and self.num_runs >= self.max_runs:
                    break
            try:
                results = Logger.load_eval_results(run)
                #print("SUCCESS:", run)
                self.num_runs += 1
            except:
                #print("FAILURE:", run)
                continue
                
            #print(params)
            list_df = []
            for algorithm, _results in results["metrics"].items():
                dataframe = pd.DataFrame()
                for metric, value in _results.items():
                    field = "%s.%s"%(algorithm, metric)
                    if field in self.data:
                        array = np.array(value)#/params.online_params.K*100
                        if metric == "fool_rate":
                            array = np.array(value)/params.online_params.K   
                        self.data[field] += list(array)
                        dataframe[metric] = array   
                dataframe["algorithm"] = algorithm
                dataframe["K"] = params.online_params.K
                dataframe["attacker"] = params.attacker_type
                dataframe["dataset"] = params.dataset
                list_df.append(dataframe)
            dataframe = pd.concat(list_df)
            dataframe["random_fool_rate"] = dataframe[dataframe["algorithm"] == AlgorithmType.RANDOM.name]["fool_rate"]
            list_dataframes.append(dataframe)
        return list_dataframes
                        
                        
    def aggregate_logger(self, logger, filters=None, params=None):
        list_paths = logger.list_all_eval_models()
        #print(list_paths)
        list_dataframes = []
        for path in list_paths: 
            if filters is None:
                list_dataframes += self.aggregate_runs(path, params)
                continue
            for name in filters:
                if name in path:
                    list_dataframes += self.aggregate_runs(path, params)
                    break
        return list_dataframes
        
    def aggregate_all(self, path, filters = {}, model_eval=None):
        list_exp_id = Logger.list_all_logger(path)
        list_dataframes = []
        #print(list_exp_id)
        for exp_id in list_exp_id:
            logger = Logger(path, exp_id)
            try:
                params = logger.load_hparams()
            except FileNotFoundError:
                print("Can't load hparams with id=%s" % exp_id)
            params = OmegaConf.merge(Params, params)
            #print(params)
            #print()
            aggregate_flag = True
            for key, value in filters.items():

                if OmegaConf.select(params, key) != value:
                    aggregate_flag = False
                    break
            
            #print(aggregate_flag)
            if aggregate_flag:
                #print(params)
                list_dataframes += self.aggregate_logger(logger, model_eval, params)
        self.dataframe = pd.concat(list_dataframes)
                         
    def compute_statistics(self, size=None):
        keys = list(self.data.keys())
        for key in keys:
            array = self.data[key]
            num_elements = len(array)
            if size is not None:
                np.random.shuffle(array)
                list_arrays = np.array_split(array, size)
                for i, x in enumerate(list_arrays):
                    list_arrays[i] = np.mean(x)
                array = list_arrays
            self.statistics[key]["mean"] = np.mean(array)
            self.statistics[key]["std"] = np.std(array)
            self.statistics[key]["ste"] = self.statistics[key]["std"]/np.sqrt(len(array))
        return self.statistics
    
    def average_improvment(self, metric="fool_rate"):
        results = 0
        num = 0
        for algorithm in self.algorithms:
            if algorithm != AlgorithmType.RANDOM.name:
                diff = np.array(self.data["%s.%s"%(algorithm.name, metric)]) - np.array(self.data["%s.%s"%(AlgorithmType.RANDOM.name, metric)])
                results += diff.sum()
                num += len(diff)
        print(results/num)
            
    
    def compute_wilcoxon(self, metric):
        w = np.zeros((len(self.algorithms), len(self.algorithms)))
        p = np.zeros((len(self.algorithms), len(self.algorithms)))     
        for i, name_1 in enumerate(self.algorithms):
            array_1 = self.data["%s.%s"%(name_1.name, metric)]
            for j, name_2 in enumerate(self.algorithms):
                if i == j:
                    continue
                array_2 = self.data["%s.%s"%(name_2.name, metric)]
                w[i, j], p[i,j] = wilcoxon(array_1, array_2, alternative='greater')
        return w, p
    
    def compute_table(self, metric):
        table = np.zeros((len(self.algorithms), len(self.algorithms)), dtype=object)
        for i, name_1 in enumerate(self.algorithms):
            array_1 = self.data["%s.%s"%(name_1.name, metric)]
            for j, name_2 in enumerate(self.algorithms):
                array_2 = self.data["%s.%s"%(name_2.name, metric)]
                num = np.greater(array_1, array_2).sum()
                table[i, j] = "%.1f %% (%i / %i)"%(num/len(array_1)*100, num, len(array_1))
        return table            
    
    def get_statistics(self, algorithm=None, metric=None):
        statistics = self.statistics.copy()
        if algorithm is not None:
            keys = list(statistics.keys())
            for key in keys:
                if algorithm.name not in key:
                    statistics.pop(key)
        if metric is not None:
            keys = list(statistics.keys())
            for key in keys:
                if metric not in key:
                    statistics.pop(key)
            
        return statistics
    
    def plot(self, ax, s=None, alpha=0.5):
        df = self.dataframe[self.dataframe["algorithm"]!=AlgorithmType.RANDOM.name]
        df = df[df["algorithm"]!=AlgorithmType.OFFLINE.name]
        df = df.rename(columns={"algorithm": "Algorithm", "attacker": "Attacker",
                           "random_fool_rate": "Naive Fool Rate", "fool_rate": "Fool Rate"})
        rename = {AlgorithmType.STOCHASTIC_SINGLE_REF.name: r"\textsc{Single-Ref}",
                  AlgorithmType.STOCHASTIC_MODIFIED_VIRTUAL.name: r"\textsc{Virtual+}",
                  AlgorithmType.STOCHASTIC_OPTIMISTIC.name: r"\textsc{Optimistic}",
                  AlgorithmType.STOCHASTIC_VIRTUAL.name: r"\textsc{Virtual}",
                 Attacker.PGD_ATTACK: "PGD",
                 Attacker.FGSM_ATTACK: "FGSM"}
        df = df.replace(rename)
        g = seaborn.scatterplot(data=df, x="Naive Fool Rate", y="Fool Rate", hue="Algorithm", style="Attacker", ax=ax, s=s, alpha=alpha)
        g.set(ylabel=None)
        x = np.linspace(0, 100, 2)
        seaborn.lineplot(x, x, color="black", linestyle='--', ax=ax)
    
    @staticmethod
    def merge(dataframe):
        new_df = Dataframe(dataframe[0].algorithms)
        list_dataframes = []
        for df in dataframe:
            for key in new_df.data:
                new_df.data[key] += df[key] 
            list_dataframes.append(df.dataframe)
            new_df.num_runs += df.num_runs
        new_df.dataframe = pd.concat(list_dataframes)
        return new_df

# Random Eval Results

In [26]:
path = "/home/mila/b/bosejoey/results_iclr/non_robust/imagenet/fgsm/"
dataframe = Dataframe(AlgorithmType, max_runs=1000)
K = 2
dataframe.aggregate_all(path, filters={"online_params.K": K, "attacker_type": Attacker.FGSM_ATTACK})
print(dataframe.num_runs)
stats = dataframe.compute_statistics()

4


In [27]:
stats = dataframe.get_statistics(metric="comp_ratio")
for k in stats:
    mean = stats[k]["mean"]*K
    std = stats[k]["ste"]*K
    print("%s & %.3f $\pm$ %.3f"%(k, mean, std))

print("-------------------")
stats = dataframe.get_statistics(metric="knapsack_ratio")
for k in stats:
    mean = stats[k]["mean"]*100
    std = stats[k]["ste"]*100
    print("%s & %.1f $\pm$ %.1f"%(k, mean, round(std, 1)))
    
print("-------------------")
stats = dataframe.get_statistics(metric="fool_rate")
for k in stats:
    mean = stats[k]["mean"]*100
    std = stats[k]["ste"]*100
    print("%s & %.1f $\pm$ %.1f"%(k, mean, round(std, 1)))

OFFLINE.comp_ratio & 0.250 $\pm$ 0.217
STOCHASTIC_VIRTUAL.comp_ratio & 0.000 $\pm$ 0.000
STOCHASTIC_OPTIMISTIC.comp_ratio & 0.000 $\pm$ 0.000
STOCHASTIC_MODIFIED_VIRTUAL.comp_ratio & 0.000 $\pm$ 0.000
STOCHASTIC_VIRTUAL_REF.comp_ratio & 0.000 $\pm$ 0.000
STOCHASTIC_SINGLE_REF.comp_ratio & 0.250 $\pm$ 0.217
RANDOM.comp_ratio & 0.000 $\pm$ 0.000
-------------------
OFFLINE.knapsack_ratio & 68.0 $\pm$ 6.9
STOCHASTIC_VIRTUAL.knapsack_ratio & 26.6 $\pm$ 9.4
STOCHASTIC_OPTIMISTIC.knapsack_ratio & 40.4 $\pm$ 13.5
STOCHASTIC_MODIFIED_VIRTUAL.knapsack_ratio & 42.3 $\pm$ 14.6
STOCHASTIC_VIRTUAL_REF.knapsack_ratio & 18.6 $\pm$ 3.7
STOCHASTIC_SINGLE_REF.knapsack_ratio & 27.3 $\pm$ 17.7
RANDOM.knapsack_ratio & 10.1 $\pm$ 2.6
-------------------
OFFLINE.fool_rate & 100.0 $\pm$ 0.0
STOCHASTIC_VIRTUAL.fool_rate & 75.0 $\pm$ 12.5
STOCHASTIC_OPTIMISTIC.fool_rate & 87.5 $\pm$ 10.8
STOCHASTIC_MODIFIED_VIRTUAL.fool_rate & 87.5 $\pm$ 10.8
STOCHASTIC_VIRTUAL_REF.fool_rate & 75.0 $\pm$ 12.5
STOCHASTIC_SINGLE_

In [129]:
path = "/checkpoint/hberard/OnlineAttack/results_icml/random-eval-icml-final/different_model_type/pgd"
dataframe = Dataframe(AlgorithmType, max_runs=1000)
K = 4
dataframe.aggregate_all(path, filters={"dataset": DatasetType.MNIST, "online_params.K": K, "attacker_type": Attacker.PGD_ATTACK})
print(dataframe.num_runs)
stats = dataframe.compute_statistics()

899


In [130]:
stats = dataframe.get_statistics(metric="comp_ratio")
for k in stats:
    mean = stats[k]["mean"]*K
    std = stats[k]["ste"]*K
    print("%s & %.3f $\pm$ %.3f"%(k, mean, std))

print("-------------------")
stats = dataframe.get_statistics(metric="knapsack_ratio")
for k in stats:
    mean = stats[k]["mean"]*100
    std = stats[k]["ste"]*100
    print("%s & %.1f $\pm$ %.1f"%(k, mean, round(std, 1)))
    
print("-------------------")
stats = dataframe.get_statistics(metric="fool_rate")
for k in stats:
    mean = stats[k]["mean"]*100
    std = stats[k]["ste"]*100
    print("%s & %.1f $\pm$ %.1f"%(k, mean, round(std, 1)))

OFFLINE.comp_ratio & 0.171 $\pm$ 0.016
STOCHASTIC_VIRTUAL.comp_ratio & 0.089 $\pm$ 0.010
STOCHASTIC_OPTIMISTIC.comp_ratio & 0.119 $\pm$ 0.013
STOCHASTIC_MODIFIED_VIRTUAL.comp_ratio & 0.132 $\pm$ 0.013
STOCHASTIC_VIRTUAL_REF.comp_ratio & 0.006 $\pm$ 0.002
STOCHASTIC_SINGLE_REF.comp_ratio & 0.119 $\pm$ 0.013
RANDOM.comp_ratio & 0.001 $\pm$ 0.001
-------------------
OFFLINE.knapsack_ratio & 49.6 $\pm$ 1.1
STOCHASTIC_VIRTUAL.knapsack_ratio & 35.5 $\pm$ 0.9
STOCHASTIC_OPTIMISTIC.knapsack_ratio & 39.6 $\pm$ 1.0
STOCHASTIC_MODIFIED_VIRTUAL.knapsack_ratio & 42.7 $\pm$ 1.0
STOCHASTIC_VIRTUAL_REF.knapsack_ratio & 25.0 $\pm$ 0.7
STOCHASTIC_SINGLE_REF.knapsack_ratio & 40.8 $\pm$ 1.0
RANDOM.knapsack_ratio & 21.0 $\pm$ 0.6
-------------------
OFFLINE.fool_rate & 79.3 $\pm$ 1.0
STOCHASTIC_VIRTUAL.fool_rate & 70.0 $\pm$ 1.1
STOCHASTIC_OPTIMISTIC.fool_rate & 72.1 $\pm$ 1.1
STOCHASTIC_MODIFIED_VIRTUAL.fool_rate & 74.5 $\pm$ 1.1
STOCHASTIC_VIRTUAL_REF.fool_rate & 66.1 $\pm$ 1.2
STOCHASTIC_SINGLE_REF.fool

In [138]:
table = dataframe.compute_wilcoxon("fool_rate")
table

(array([[0.00e+00, 4.68e+05, 4.81e+05, 3.82e+05, 4.84e+05, 4.92e+05],
        [3.25e+04, 0.00e+00, 4.62e+05, 1.04e+04, 4.89e+05, 5.00e+05],
        [1.98e+04, 1.34e+03, 0.00e+00, 2.38e+03, 4.88e+05, 4.97e+05],
        [8.79e+04, 4.85e+05, 4.78e+05, 0.00e+00, 4.99e+05, 5.00e+05],
        [1.05e+04, 7.20e+01, 2.01e+03, 1.39e+03, 0.00e+00, 4.92e+05],
        [8.76e+03, 6.27e+02, 9.57e+02, 8.19e+02, 8.76e+03, 0.00e+00]]),
 array([[0.00e+000, 7.80e-126, 9.33e-141, 2.98e-064, 4.29e-151, 2.70e-154],
        [1.00e+000, 0.00e+000, 1.41e-157, 1.00e+000, 1.22e-163, 1.06e-164],
        [1.00e+000, 1.00e+000, 0.00e+000, 1.00e+000, 4.28e-161, 8.75e-164],
        [1.00e+000, 2.68e-151, 4.11e-159, 0.00e+000, 1.05e-163, 1.89e-164],
        [1.00e+000, 1.00e+000, 1.00e+000, 1.00e+000, 0.00e+000, 2.71e-154],
        [1.00e+000, 1.00e+000, 1.00e+000, 1.00e+000, 1.00e+000, 0.00e+000]]))

# Random Eval (same architecture)

In [167]:
path = "/checkpoint/hberard/OnlineAttack/results_icml/random-eval-icml-final/same_model_type"
dataframe = Dataframe(AlgorithmType, max_runs=1000)
K = 1000
dataframe.aggregate_all(path, filters={"dataset": DatasetType.MNIST, "online_params.K": K, "attacker_type": Attacker.PGD_ATTACK})
dataframe.num_runs

543

In [168]:
stats = dataframe.compute_statistics()
stats = dataframe.get_statistics(metric="fool_rate")
for k in stats:
    mean = stats[k]["mean"]*100
    std = stats[k]["ste"]*100
    print("%s & %.1f $\pm$ %.1f"%(k, mean, std))

OFFLINE.fool_rate & 97.4 $\pm$ 0.1
STOCHASTIC_VIRTUAL.fool_rate & 94.4 $\pm$ 0.2
STOCHASTIC_OPTIMISTIC.fool_rate & 93.5 $\pm$ 0.2
STOCHASTIC_MODIFIED_VIRTUAL.fool_rate & 97.0 $\pm$ 0.1
STOCHASTIC_SINGLE_REF.fool_rate & 92.0 $\pm$ 0.3
RANDOM.fool_rate & 90.0 $\pm$ 0.3


In [216]:
table = dataframe.compute_wilcoxon("fool_rate")
table

(array([[   0. , 1891. , 1770. ,  231. ,  300. , 3403. ],
        [   0. ,    0. , 1652. ,  315. ,  774. , 3058. ],
        [   0. , 1429. ,    0. ,  301. ,  463.5, 3220.5],
        [   0. , 1701. , 1977. ,    0. ,  511. , 3410.5],
        [   0. , 1854. , 1489.5,  269. ,    0. , 3337. ],
        [   0. ,  428. ,  520.5,   75.5,  149. ,    0. ]]),
 array([[0.00e+00, 1.93e-12, 5.27e-12, 1.34e-05, 7.03e-06, 1.26e-15],
        [1.00e+00, 0.00e+00, 2.84e-01, 1.00e+00, 9.99e-01, 9.21e-10],
        [1.00e+00, 7.16e-01, 0.00e+00, 1.00e+00, 1.00e+00, 2.28e-09],
        [1.00e+00, 6.06e-07, 4.51e-08, 0.00e+00, 4.28e-02, 1.36e-14],
        [1.00e+00, 9.97e-04, 1.31e-04, 9.57e-01, 0.00e+00, 1.62e-13],
        [1.00e+00, 1.00e+00, 1.00e+00, 1.00e+00, 1.00e+00, 0.00e+00]]))

In [237]:
class Test:
    @staticmethod
    def test():
        print("test")

In [239]:
Test().test()

test


In [6]:
import math
k = 1000
int(10 + 0.5*(math.sqrt(1 + 4*(k - 96) - 1)))

40