In [1]:
import os
from glob import glob
from pathlib import Path

import numpy as np
import pandas as pd

from adad.torch_utils import NNClassifier, get_correct_examples
from adad.utils import open_csv

In [2]:
PATH_ROOT = Path(os.getcwd()).absolute().parent.parent
print(PATH_ROOT)

/home/lukec/workspace/applicabilityDomain


In [3]:
DATASETS = [
    "abalone",
    "australian",
    "banknote",
    "breastcancer",
    "htru2",
    "phoneme",
    "ringnorm",
    "texture",
]
ATTACKS = [
    'fgsm',
    'apgd',
    'cw2',
]

In [4]:
def get_clf_metrics(index, dataname):
    path_train = os.path.join(PATH_ROOT, 'results', 'numeric', f'run_{index}', 'train', f'{dataname}_train.csv')
    path_validation = os.path.join(PATH_ROOT, 'results', 'numeric', f'run_{index}', 'validation', f'{dataname}_val.csv')
    path_test = os.path.join(PATH_ROOT, 'results', 'numeric', f'run_{index}', 'test', f'{dataname}_test.csv')
    path_model = os.path.join(PATH_ROOT, 'results', 'numeric', f'run_{index}', 'clf', dataname)

    X_train, y_train, _ = open_csv(path_train)
    X_val, y_val, _ = open_csv(path_validation)
    X_test, y_test, _ = open_csv(path_test)

    clf = NNClassifier()
    clf.load(path_model)

    _, y_train_fil = get_correct_examples(clf.clf, X_train, y_train, clf.device)
    _, y_val_fil = get_correct_examples(clf.clf, X_val, y_val, clf.device)
    _, y_test_fil = get_correct_examples(clf.clf, X_test, y_test, clf.device)

    row = pd.Series({
        'Data': dataname,
        'Features': X_train.shape[1],
        'Classes': int(len(np.unique(y_train))),
        'Train': int(y_train.shape[0]),
        'Val': int(y_val.shape[0]),
        'Test': int(y_test.shape[0]),
        'Train-Acc': len(y_train_fil) / y_train.shape[0] * 100.,
        'Val-Acc': len(y_val_fil) / y_val.shape[0] * 100.,
        'Test-Acc': len(y_test_fil) / y_test.shape[0] * 100.,
        })
    return row



In [5]:
df_repeated = []
for i in range(5):
    _df = pd.DataFrame({
        'Data': [],
        'Features': [],
        'Classes': [],
        'Train':[],
        'Val': [],
        'Test': [],
        'Train-Acc':[],
        'Val-Acc': [],
        'Test-Acc': [],
    })
    col_int_names = ['Features', 'Classes', 'Train', 'Val', 'Test']
    _df[col_int_names] = _df[col_int_names].astype(int)
    df_repeated.append(_df)

In [6]:
for i, run in enumerate(range(1, 6)):
    for dataset in DATASETS:
        row = get_clf_metrics(run, dataset)
        df_repeated[i] = df_repeated[i].append(row, ignore_index=True)

In [7]:
df_mean = pd.concat(df_repeated).groupby(by='Data').mean()
df_mean = df_mean.reset_index(level=0)

In [8]:
print(df_mean.to_latex(index=False, float_format="%.2f"))

\begin{tabular}{lrrrrrrrr}
\toprule
        Data &  Features &  Classes &   Train &     Val &    Test &  Train-Acc &  Val-Acc &  Test-Acc \\
\midrule
     abalone &      7.00 &     2.00 & 2504.00 &  835.00 &  835.00 &      80.72 &    78.78 &     79.38 \\
  australian &     14.00 &     2.00 &  414.00 &  138.00 &  138.00 &      96.52 &    84.35 &     82.03 \\
    banknote &      4.00 &     2.00 &  822.00 &  275.00 &  275.00 &     100.00 &   100.00 &    100.00 \\
breastcancer &     30.00 &     2.00 &  341.00 &  114.00 &  114.00 &     100.00 &    95.61 &     96.14 \\
       htru2 &      8.00 &     2.00 & 2457.00 &  820.00 &  820.00 &      95.28 &    95.15 &     95.02 \\
     phoneme &      5.00 &     2.00 & 2379.00 &  793.00 &  793.00 &      85.66 &    83.05 &     83.88 \\
    ringnorm &     20.00 &     2.00 & 4439.00 & 1480.00 & 1480.00 &      99.86 &    96.77 &     96.70 \\
     texture &     40.00 &    11.00 & 3300.00 & 1100.00 & 1100.00 &      99.38 &    99.09 &     99.18 \\
\bottomrul

In [9]:
def get_advx_metrics(run_num, dataname, attack):
    """Get performance metrics for adversarial attack."""
    path_model = os.path.join(PATH_ROOT, 'results', 'numeric', f'run_{run}', 'clf', dataname)
    clf = NNClassifier()
    clf.load(path_model)

    path_advx_re = os.path.join(PATH_ROOT, 'results', 'numeric', f'run_{run_num}', attack, f'{dataname}_{attack}_*.csv')
    paths_advx = sorted(glob(path_advx_re))
    # print(paths_advx)

    _df = pd.DataFrame()
    for path_advx in paths_advx:
        # Get hyperparameter for adversarial attack; FGSM is epsilon, CW2 is C.
        param = float(Path(path_advx).stem.split('_')[-1])
        # print(param)

        X_advx, y_true, _ = open_csv(path_advx)
        acc_advx = clf.score(X_advx, y_true)
        row = pd.Series({
            'Data': dataname,
            'Attack': attack,
            'Param': param,
            'Accuracy': acc_advx,
        })
        _df = _df.append(row, ignore_index=True)
    return _df

In [10]:
df_advx_repeated = []
for i in range(5):
    _df = pd.DataFrame({
        'Data': [],
        'Attack': [],
        'Param': [],
        'Accuracy': [],
    })
    df_advx_repeated.append(_df)

for i, run in enumerate(range(1, 6)):
    for dataname in DATASETS:
        for att in ATTACKS:
            _df = get_advx_metrics(run, dataname, att)
            df_advx_repeated[i] = df_advx_repeated[i].append(_df, ignore_index=True)


In [21]:
df_advx_mean = pd.concat(df_advx_repeated).groupby(by=['Data', 'Attack', 'Param']).mean()
df_advx_mean = df_advx_mean.reset_index()
df_advx_mean['Accuracy'] = df_advx_mean['Accuracy'] * 100.
print(df_advx_mean)

        Data Attack  Param   Accuracy
0    abalone   apgd    0.1  63.841583
1    abalone   apgd    0.3  22.453503
2    abalone   apgd    0.6   6.119547
3    abalone   apgd    1.0   0.607465
4    abalone   apgd    1.5   0.000000
..       ...    ...    ...        ...
99   texture   fgsm    0.1  73.742863
100  texture   fgsm    0.3  17.452203
101  texture   fgsm    0.6   8.161906
102  texture   fgsm    1.0   4.970743
103  texture   fgsm    1.5   3.741727

[104 rows x 4 columns]


In [22]:
df_advx_mean_other = df_advx_mean[df_advx_mean['Attack'] != 'cw2'].pivot(index=['Data', 'Attack'], columns='Param', values='Accuracy')
df_advx_mean_other.to_csv(os.path.join(PATH_ROOT, 'plot_results', 'numeric', 'advx_metrics_other.csv'))

df_advx_mean_cw2 = df_advx_mean[df_advx_mean['Attack'] == 'cw2'].pivot(index=['Data', 'Attack'], columns='Param', values='Accuracy')
df_advx_mean_cw2.to_csv(os.path.join(PATH_ROOT, 'plot_results', 'numeric', 'advx_metrics_cw2.csv'))