In [47]:
mnist_results_paths = ['../output/natural-mnist-results.json',
                       '../output/ablation-mnist-results.json',
                       '../output/ablation-mnist-results2.json',
                       '../output/ablation-mnist-results3.json',
                       '../output/ablation-mnist-results-spsa.json']
cifar10_results_paths = ['../output/natural-cifar10-results.json',
                         '../output/natural-cifar10-results2.json',
                         '../output/ablation-cifar10-results.json',
                         '../output/ablation-cifar10-results2.json']

In [48]:
import pandas as pd
import numpy as np
from collections import OrderedDict
import re
import os
from scipy import stats

In [49]:
import json

def read_json(path):
    rows = []
    with open(path) as f:
        for line in f:
            if line.strip():
                row = json.loads(line[:-1])
                rows.append(row)
    df = pd.DataFrame(rows)
    if 'eps' in df.columns:
        df = df.drop_duplicates(['model_path', 'attack_type', 'eps'], keep='last')
    df = df.rename(columns={'attack': 'attack_type'})
    return df

def read_jsons(paths):
    dfs = [read_json(path) for path in paths if os.path.exists(path)]
    return pd.concat(dfs, sort=False).reset_index()

In [50]:
def process(df):
    df['attack_type'] = df.apply(
        lambda r: ('%s(eps=%.1f)' % (r['attack_type'], r['eps'])
                   if 'eps' in r else r['attack_type']), axis=1)
    df['model_name'] = df.model_path.apply(os.path.basename)
    counts = df.groupby(['model_name', 'attack_type']).agg({'accuracies': len})
    duplicates = counts[counts.accuracies >= 2]
    if len(duplicates) > 0:
        print(duplicates.reset_index().values)
    assert len(duplicates) == 0, 'Please remove duplicates'
    return df

# MNIST results

In [51]:
results = process(read_jsons(mnist_results_paths))

In [52]:
results[results.model_name == 'relog-elliptical-maxout_4-max_fit_l1.pkl']

Unnamed: 0,index,model_path,attack_type,accuracies,max_probs,binary_search_steps,max_iterations,learning_rate,batch_size,initial_const,clip_min,clip_max,ord,eps,nb_iter,eps_iter,model_name
4,4,/nfs/home2/minhle/newlogic/output/ablation-mni...,none(eps=nan),"[1.0, 1.0, 1.0, 0.98, 1.0, 0.99, 0.97, 0.99, 0...","[0.999908447265625, 0.9997063279151917, 0.9984...",,,,,,,,,,,,relog-elliptical-maxout_4-max_fit_l1.pkl
9,0,/nfs/home2/minhle/newlogic/output/ablation-mni...,CW(eps=nan),"[0.34, 0.28, 0.36, 0.29, 0.27, 0.34, 0.28, 0.2...","[0.5071215033531189, 0.49341168999671936, 0.50...",1.0,50.0,0.5,100.0,1.0,0.0,1.0,,,,,relog-elliptical-maxout_4-max_fit_l1.pkl
10,1,/nfs/home2/minhle/newlogic/output/ablation-mni...,BIM(eps=0.3),"[0.03, 0.02, 0.02, 0.04, 0.04, 0.0, 0.03, 0.03...","[0.9981439709663391, 0.9998207092285156, 0.948...",,,,,,0.0,1.0,inf,0.3,5.0,0.1,relog-elliptical-maxout_4-max_fit_l1.pkl


In [53]:
results.groupby('model_path').agg({'attack_type': len})

Unnamed: 0_level_0,attack_type
model_path,Unnamed: 1_level_1
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-elliptical-maxout_4-max_fit_l1.pkl,3
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-elliptical-maxout_4-mse-max_fit_l1-overlay.pkl,6
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-elliptical-maxout_4-mse-max_fit_l1.pkl,6
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-elliptical-maxout_4.pkl,5
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-elliptical-mse-max_fit_l1-overlay.pkl,1
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-maxout_4.pkl,5
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog-minmaxout_2_4.pkl,1
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relog.pkl,5
/nfs/home2/minhle/newlogic/output/ablation-mnist-models/relu.pkl,5


In [54]:
def compute_accuracy_and_ttest(col):
    if col.name in ('index', 'model_name'):
        return list(col)
    formatted_accuracies = []
    for prev, curr in zip([None] + list(col), list(col)):
        if isinstance(curr, (str, float)):
            formatted_accuracies.append(curr)
        elif isinstance(curr, list):
            curr = np.around(curr, 2)
            acc = np.mean(curr)
            acc = '%.2f' % acc
            if isinstance(prev, list):
                prev = np.around(prev, 2)
                _, p_value = stats.ttest_ind(prev, curr, equal_var = False)
                acc += '*' if p_value < 0.05 else '<'
            else:
                acc += '<'
            formatted_accuracies.append(acc)
        else:
            raise ValueError("Unsupported type: " + str(curr))
    return pd.Series(formatted_accuracies)

In [55]:
def format_metrics(df, formatted_names, model_order, attack_order, values_col):
    # filtering
    df = df[df.model_name.isin(formatted_names) &
            df.attack_type.isin(attack_order)].copy()
    # convert to wide format
    df = df.pivot(index='model_name', values=values_col, columns='attack_type')
    # change column order
    df = (df.reindex(sorted(df.columns, key=attack_order.__getitem__), axis=1)
          .reset_index().fillna('-'))
    df['model_order'] = df.model_name.apply(model_order.__getitem__)
    df = df.sort_values(['model_order']).drop(columns='model_order')
    # compute mean accuracy and p-values
    df = df.apply(compute_accuracy_and_ttest)
    # format model names last
    df['model_name'] = df.model_name.apply(formatted_names.__getitem__)    
    return df

def format_table(df, formated_names, model_order, attack_order):
    df = format_metrics(df, formatted_names, model_order, attack_order, values_col='accuracies')
    # since the "sigmoid out" network always give 0.22% as max confidence (I shouldn't have
    # add sigmoid to the network), I decided to drop confidence measurement.
    latex = df.to_latex()
    latex = latex.replace('*', '$^*$')
    latex = latex.replace('<', '\hspace{0.5em}')
    return df, latex

## Ablation table

In [56]:
formatted_names = OrderedDict([
    ('relu.pkl', 'Baseline'),
    ('relog.pkl', '+ ReLog'), 
    ('relog-maxout_4.pkl', '+ MaxOut (k=4)'),
    ('relog-minmaxout_2_4.pkl', '+ MinOut (k=2)'),
    ('relog-elliptical-maxout_4.pkl', '+ Elliptical'),
    ('relog-elliptical-maxout_4-max_fit_l1_1.pkl', '+ MaxFit'),  
    ('relog-elliptical-maxout_4-sigmoid_out-max_fit_l1_1.pkl', '+ MSE training'),  
    ('relog-elliptical-maxout_4-sigmoid_out-max_fit_l1_1-overlay.pkl', '+ Negative examples'),  
])
model_order = {n: i for i, n in enumerate(formatted_names)}

In [57]:
used_attacks = ['none(eps=nan)', # (natural)
                'FGM_inf(eps=0.3)', 'FGM_L2(eps=2.0)', 'CW(eps=nan)',
                'BIM(eps=0.3)', 'SPSA(eps=0.3)']
attack_order = {n: i for i, n in enumerate(used_attacks)}

In [58]:
results.attack_type.unique()

array(['none(eps=nan)', 'CW(eps=nan)', 'BIM(eps=0.3)', 'FGM_inf(eps=0.3)',
       'FGM_L2(eps=2.0)', 'MaxConf(eps=0.3)'], dtype=object)

In [59]:
ablation_table, latex = format_table(results, formatted_names, model_order, attack_order)

In [60]:
ablation_table

attack_type,model_name,none(eps=nan),FGM_inf(eps=0.3),FGM_L2(eps=2.0),CW(eps=nan),BIM(eps=0.3)
4,Baseline,0.99<,0.11<,0.73<,0.02<,0.00<
3,+ ReLog,0.99*,0.30*,0.83*,0.15*,0.02*
1,+ MaxOut (k=4),0.99<,0.39*,0.85*,0.14*,0.05*
2,+ MinOut (k=2),0.99<,-,-,-,-
0,+ Elliptical,0.95*,0.60<,0.62<,0.50<,0.02<


In [61]:
print(latex)

\begin{tabular}{lllllll}
\toprule
attack\_type &      model\_name & none(eps=nan) & FGM\_inf(eps=0.3) & FGM\_L2(eps=2.0) & CW(eps=nan) & BIM(eps=0.3) \\
\midrule
4 &        Baseline &         0.99\hspace{0.5em} &            0.11\hspace{0.5em} &           0.73\hspace{0.5em} &       0.02\hspace{0.5em} &        0.00\hspace{0.5em} \\
3 &         + ReLog &         0.99$^*$ &            0.30$^*$ &           0.83$^*$ &       0.15$^*$ &        0.02$^*$ \\
1 &  + MaxOut (k=4) &         0.99\hspace{0.5em} &            0.39$^*$ &           0.85$^*$ &       0.14$^*$ &        0.05$^*$ \\
2 &  + MinOut (k=2) &         0.99\hspace{0.5em} &                - &               - &           - &            - \\
0 &    + Elliptical &         0.95$^*$ &            0.60\hspace{0.5em} &           0.62\hspace{0.5em} &       0.50\hspace{0.5em} &        0.02\hspace{0.5em} \\
\bottomrule
\end{tabular}



## Check a few entries

In [16]:
def print_mean_acc(attack, model):
    row = results[(results.attack_type == attack) & (results.model_name == model)]
    print(np.mean(row.accuracies.iloc[0]))

In [17]:
print_mean_acc('FGM_L2(eps=2.0)', 'relog-elliptical-maxout_4-sigmoid_out-max_fit_l1_1-overlay.pkl')

IndexError: single positional indexer is out-of-bounds

In [None]:
print_mean_acc('BIM(eps=0.3)', 'relog-elliptical-maxout_4-max_fit_l1_1.pkl')

In [None]:
print_mean_acc('SPSA(eps=0.3)', 'relu.pkl')

# CIFAR-10 results

In [35]:
cifar10_results = process(read_jsons(cifar10_results_paths))

In [36]:
cifar10_results.attack_type.unique()

array(['none(eps=nan)', 'FGM_L2(eps=2.0)', 'FGM_inf(eps=0.3)',
       'BIM(eps=0.3)', 'CW(eps=nan)'], dtype=object)

In [37]:
cifar10_results.model_name.unique()

array(['relu.pkl', 'relog.pkl', 'relog-maxout_4.pkl',
       'relog-minmaxout_2_4-max_fit_l2.pkl', 'relog-minmaxout_2_4.pkl',
       'relog-quadratic-max_fit_l2-maxout_4-sigmoid_out-overlay.pkl',
       'relog-quadratic-max_fit_l2-maxout_4-sigmoid_out-scrambling.pkl',
       'relog-quadratic-max_fit_l2-maxout_4-mse.pkl',
       'relog-quadratic-max_fit_l2-maxout_4-sigmoid_out.pkl',
       'relog-quadratic-max_fit_l2-maxout_4-mse-overlay.pkl',
       'relog-quadratic-max_fit_l2-maxout_4.pkl', 'relog-elliptical.pkl',
       'relog-elliptical-maxfit.pkl'], dtype=object)

In [42]:
formatted_names = OrderedDict([
    ('relu.pkl', 'Baseline'),
    ('relog.pkl', '+ ReLog'), 
    ('relog-elliptical.pkl', '+ Elliptical'),
    ('relog-elliptical-maxfit.pkl', '+ MaxFit (L1)'),
    ('relog-elliptical-maxout-maxfit_l1.pkl', '+ MaxOut'),
    ('relog-maxout-elliptical-max_fit_l1-mse.pkl', '+ MSE training'),
    ('relog-maxout-elliptical-max_fit_l1-mse-overlay.pkl', '+ Negative examples')
])
model_order = {n: i for i, n in enumerate(formatted_names)}

In [43]:
ablation_table, latex = format_table(cifar10_results, formatted_names, model_order, attack_order)

In [44]:
ablation_table

attack_type,model_name,none(eps=nan),FGM_inf(eps=0.3),FGM_L2(eps=2.0),CW(eps=nan),BIM(eps=0.3)
3,Baseline,0.87<,-,-,-,-
2,+ ReLog,0.87<,0.18<,0.44<,0.04<,0.02<
1,+ Elliptical,-,0.10*,0.44<,0.02*,0.01*
0,+ MaxFit (L1),-,0.13*,0.45*,0.05*,0.01<


In [46]:
print(latex)

\begin{tabular}{lllllll}
\toprule
attack\_type &     model\_name & none(eps=nan) & FGM\_inf(eps=0.3) & FGM\_L2(eps=2.0) & CW(eps=nan) & BIM(eps=0.3) \\
\midrule
3 &       Baseline &         0.87\hspace{0.5em} &                - &               - &           - &            - \\
2 &        + ReLog &         0.87\hspace{0.5em} &            0.18\hspace{0.5em} &           0.44\hspace{0.5em} &       0.04\hspace{0.5em} &        0.02\hspace{0.5em} \\
1 &   + Elliptical &             - &            0.10$^*$ &           0.44\hspace{0.5em} &       0.02$^*$ &        0.01$^*$ \\
0 &  + MaxFit (L1) &             - &            0.13$^*$ &           0.45$^*$ &       0.05$^*$ &        0.01\hspace{0.5em} \\
\bottomrule
\end{tabular}

