# Report on Adversarial Examples
This notebook should 
1. Train new models with different parameters
1. for each model, generate both a **targeted** and **untargeted** PGD attack
1. Plot the results


In [None]:
%load_ext autoreload
%autoreload 2

import subprocess
from pathlib import Path 
from matplotlib import pyplot as plt 
import json 
import optuna 
from model_utils import find_best_model, choose_model
import numpy as np
import torch

prefix = 'use_test_data/'
adv_dir = 'attacks/'

## Utilities for reporting accuracies

In [None]:
def load_stats(dir):
    with open(dir) as f:
        stats = json.load(f)
    return stats 

def get_stats_regular(which=1, duration=100, model='max', root=prefix, dir=None):
    """ 
    Load stats of model specified directly using the directory of the model, or using which, duration and model name ('max' for latest)
    """
    if dir is None:
        dir = Path(root) / f'{which=}_{duration=}'
    model = choose_model(dir, model)
    dir = Path(model)
    return load_stats(dir / 'stats.json') 

def get_stats_attack(which=1, duration=100, model='max', root=prefix, adv_dir=adv_dir, dir=None, attacks=['targeted', 'targeted_increase', 'targeted_decrease', 'untargeted']):
    """ 
    Load adversarial stats of model specified directly using the directory of the model, 
    or using which, duration and model name ('max' for latest).
    Returns a dict of dicts, where the outer dict has attack types as keys and the inner uses perturbation sizes
    """
    if dir is None:
        # dir is not specified so we find it based on other information
        dir = Path(root) / f'{which=}_{duration=}'
        model = choose_model(dir, model)
        dir = model / adv_dir 
    # create a dict of dicts based on attack type and perturbation size
    retval = {}
    for p in dir.iterdir():
        key = str(p).split('/')[-1]
        perturbation_size = key.split('=')[-1]
        for attack in attacks:
            if f"attack_type='{attack}'" in key: 
                if not attack in retval.keys():
                    retval[attack] = {}
                try:
                    retval[attack][perturbation_size] = load_stats(p / 'adv_stats.json')
                except FileNotFoundError:
                    retval[attack][perturbation_size] = {'acc': np.nan, 'cm': np.zeros((3, 3))}

    return retval

## Train New Models
Each with a specific set of parameters, including data parameters

In [None]:
def make_args(param):
    """
    turn parameter dictionary into list of arguments
    """
    out = []
    for k, v in param.items():
        out.append('--' + k)
        out.append(str(v))
    return out 

durations = [10] # [50, 100, 150]
whichs = [1]
n_classes = 5
n_epochs = 30
hs = 64 
dropout = 1/3
num_layers = 2
thresholds = 2 # move failure classes closer together
opt_time_seconds = 1 # 15*60
model_params = {
    f'{prefix}{which=}_{duration=}': {
        'duration': duration, 'which': which, 'n_classes': n_classes, 'n_epochs': n_epochs,
        'dropout': dropout, 'hs': hs, 'num_layers': num_layers, 'thresholds': thresholds
    } 
    for duration in durations for which in whichs
}



In [None]:
# create models
for name, v in model_params.items():
    which = v['which']
    duration = v['duration']
    def objective(trial):
        v['dropout'] = trial.suggest_float('dropout', 0, .5)
        v['num_layers'] = trial.suggest_int('num_layers', 1, 4)
        v['hs'] = trial.suggest_int('hs', 64, 256, log=True)
        print('\n\n', v, '\n\n')
        subprocess.run(['python', 'lstm.py'] + make_args(v) + ['--target_directory', name]) 
        acc = get_stats_regular(which=which, duration=duration)['acc']
        return -acc # maximize acc

    study = optuna.create_study()
    study.enqueue_trial({'dropout': dropout, 'num_layers': num_layers, 'hs': hs})
    study.optimize(objective, timeout=opt_time_seconds)

In [None]:
# model = choose_model(Path(f'{prefix}/which=3_{duration=}/'), 'max')
# print(model)
# subprocess.run(['python', 'test_model.py', '--model_path', model])

In [None]:
# # find the best accuracy in the directories
# best_accs = {}
# for name, v in model_params.items():
#     best_accs[name] = find_best_model(name)
# print(*[v[1] for v in best_accs.values()], sep='\n')


## Generate Adversarial Examples
Save the results with the model in the appropriate folder, both for targeted and untargeted attacks. 

Parameters of the attack:
- Attack type (targeted or untargeted)
- Perturbation size

### Issues:
- There is a bug somewhere because the perturbation_sizes are not respected according to the adv_stats.json
- We should implement a keyword for fooling models towards saying models are in better or worse conditions than they really are

In [None]:
# For the case of breaking the process, it is nice to know the order of the scripts executed
# Generate that order
order = []
attack_types = ['untargeted'] # ['targeted_increase', 'targeted_decrease', 'targeted', 'untargeted']
perturbation_sizes = [.1*(i+1) for i in range(5)]
for name in model_params.keys():
    attack_params = {
        f'{adv_dir}{attack_type=}_{perturbation_size=}/': {'attack_type': attack_type, 'perturbation_size': perturbation_size} 
        for attack_type in attack_types for perturbation_size in perturbation_sizes
    }
    order.extend([(name, attack_name) for attack_name in attack_params.keys()])

def later_experiment(comp, ref, order=order):
    """
    Check if comp is later than ref in order
    """
    if ref == None:
        return True
    if ref =='skip_all':
        return False 
    j = len(order)
    for i, o in enumerate(order):
        if str(o[0]) == str(comp[0]) and str(o[1]) == str(comp[1]):
            j = i 
            break 
    for o in order[:j+1]:
        if str(o[0]) == str(ref[0]) and str(o[1]) == str(ref[1]):
            return True 
    return False 

In [None]:
# for each model, make a number of attacks 
# To that end, make a subfolder "attacks" for each of the models

# ref = (f'{prefix}/which=4_duration=150', "attacks/attack_type='targeted'_perturbation_size=0.01/") 
ref = None
# ref = 'skip_all'

for name in model_params.keys():
    attack_params = {
        f'{adv_dir}{attack_type=}_{perturbation_size=}/': {'attack_type': attack_type, 'perturbation_size': perturbation_size} 
        for attack_type in attack_types for perturbation_size in perturbation_sizes
    }
    for attack_name, v in attack_params.items():
        if later_experiment((name, attack_name), ref):
            print('doing', (name, attack_name))
            if 'increase' in name:
                preference = 'increase'
            elif 'decrease' in name:
                preference = 'decrease'
            else:
                preference = 'None'
            subprocess.run(['python', 'attack.py'] + make_args(v) + ['--source_directory', name, '--target_directory', attack_name, '--model_name', 'best', '--preference', preference, '--validation_data', 'False'])
        else:
            print('skipped', (name, attack_name))

## Plot the Results

In [None]:
def cost_of_matrix(cm, critical_class, c_over=15.1, c_under=40):
    """ 
    Compute the cost according to the relevant confusion matrix
    """
    cm = np.array(cm)
    # find the number of cases where we predict a non-critical class when it is in fact critical
    n_underestimate = np.sum(cm[critical_class+1:, :critical_class]) # predicted class is critical_class + 1 or higher
    # find the number of non-critical cases which are predicted as critical
    n_overestimate = np.sum(cm[:critical_class, critical_class+1:])
    return (n_underestimate * c_under + n_overestimate * c_over) / np.sum(cm)


def plot_statistics(stat='acc', critical_class=3, attacks=['targeted', 'untargeted', 'targeted_increase', 'targeted_decrease']):
    def plot_accs(which, attack, filename=None):
        fig, ax = plt.subplots(1, 1)
        ax.set_title(f'{attack}_{which=}')
        for duration in durations:
            key = f'{which=}_{duration=}'
            if stat == 'acc':
                y = [regular_accs[key]['acc']] + [adv_accs[key][attack][str(s)]['acc'] for s in perturbation_sizes]
            elif stat == 'cost':
                y = [cost_of_matrix(regular_accs[key]['confusion_matrix'], critical_class)] + [
                    cost_of_matrix(adv_accs[key][attack][str(s)]['cm'], critical_class) for s in perturbation_sizes
                ]
            ax.plot(x, y, label=key)
            # ax.set_ylim(.5, 1)
            ax.grid()
            ax.set_xlabel('perturbation size')
            ax.set_ylabel(stat)
        ax.legend()
        if filename is not None: 
            dir = Path('/'.join(filename.split('/')[:-1]))
            dir.mkdir(exist_ok=True, parents=True)
            fig.savefig(filename)
        return fig, ax 
    # get accs
    regular_accs = {}
    adv_accs = {}
    for which in whichs:
        for duration in durations:
            key = f'{which=}_{duration=}'
            regular_accs[key] = get_stats_regular(which, duration, model='best')
            adv_accs[key] = get_stats_attack(which, duration, model='best')

    # plot targeted results, resulting in a plot per "which"
    # x=perturbation_size, y=acc
    # where we have one line for each duration
    x = [0.] + perturbation_sizes 
    for which in whichs:
        for attack in attacks:
            fig, ax = plot_accs(which, attack, filename=f'plots/{prefix}/{which=}_{duration=}_{attack=}_{stat=}.png')

        # plt.show()

In [None]:
plot_statistics(stat='acc', attacks=attack_types)

In [None]:
plot_statistics(stat='cost', critical_class=2, attacks=attack_types)

In [None]:
# check out where we could introduce errors
attack = 'untargeted'
perturbation_size = perturbation_sizes[-1]
for which in whichs:
    for duration in durations:
        chosen_model = choose_model(f'{prefix}/{which=}_{duration=}', 'best')
        examples = chosen_model / f'{adv_dir}/attack_type=\'{attack}\'_{perturbation_size=}/examples/'
        attack_ids = torch.load(examples / 'ids.pt')
        engines_times = torch.load(examples/'ids_times.pt')
        adv_lbl = torch.load(examples / 'adv_lbl.pt')
        print(engines_times) 

In [None]:
chosen_model = choose_model(f'{prefix}/{which=}_{duration=}', 'best')
with open(chosen_model /'preds_ys.json') as f:
    preds_ys = json.load(f)

prev_engine = -1
for i, o in enumerate(engines_times):
    engine, time = o 
    if engine != prev_engine: plt.show()
    preds = preds_ys['preds'][engine]
    ys = preds_ys['ys'][engine]
    plt.plot(np.arange(len(preds)), ys, 'o')
    plt.plot(np.arange(len(preds)), preds, 'x')
    plt.plot(time, adv_lbl[i], '*')
    plt.ylim((-.5, 4.5))
    prev_engine = engine 