## MagNet

Testing the divergence based detector

In [1]:
import argparse
import datetime
import json
import os
import sys
import time

import numpy as np
import torch
import torch.nn as nn
import torchvision as tv
import torchvision.datasets as datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset

In [5]:
sys.path.append('..')
sys.path

['/home/lukec/workspace/baard_exp2/examples',
 '/home/lukec/.vscode/extensions/ms-toolsai.jupyter-2020.12.414227025/pythonFiles',
 '/home/lukec/.vscode/extensions/ms-toolsai.jupyter-2020.12.414227025/pythonFiles/lib/python',
 '/usr/lib/python36.zip',
 '/usr/lib/python3.6',
 '/usr/lib/python3.6/lib-dynload',
 '',
 '/home/lukec/.local/lib/python3.6/site-packages',
 '/usr/local/lib/python3.6/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/home/lukec/.local/lib/python3.6/site-packages/IPython/extensions',
 '/home/lukec/.ipython',
 '/home/lukec/workspace/baard_exp2/examples',
 '/home/lukec/workspace/baard_exp2/examples',
 '..']

In [6]:
from defences.util import (dataset2tensor, get_correct_examples, get_shape,
                           merge_and_generate_labels, score)
from defences.magnet import (Autoencoder1, Autoencoder2, MagNetDetector,
                             MagNetAutoencoderReformer, MagNetOperator)
from defences.feature_squeezing import (GaussianSqueezer, MedianSqueezer,
                                        DepthSqueezer, FeatureSqueezingTorch)
from models.numeric import NumericModel
from models.mnist import BaseModel
from models.cifar10 import Resnet, Vgg
from experiments.util import load_csv
from experiments.train_pt import validate, predict
from defences.region_based_classifier import RegionBasedClassifier
from defences.lid import LidDetector
from defences.baard import (ApplicabilityStage, BAARDOperator,
                            DecidabilityStage, ReliabilityStage)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [17]:
DATA_PATH = os.path.join('..', 'data')
OUTPUT_PATH = 'results'

In [11]:
transforms = tv.transforms.Compose([tv.transforms.ToTensor()])
dataset_train = datasets.MNIST(DATA_PATH, train=True, download=True, transform=transforms)
dataset_test = datasets.MNIST(DATA_PATH, train=False, download=True, transform=transforms)
loader_train = DataLoader(dataset_train, batch_size=512, shuffle=False)
loader_test = DataLoader(dataset_test, batch_size=512, shuffle=False)

In [14]:
model = BaseModel(use_prob=True).to(device)
loss = nn.CrossEntropyLoss()

pretrained_path = os.path.join('..', OUTPUT_PATH, 'mnist_200.pt')
model.load_state_dict(torch.load(pretrained_path))

<All keys matched successfully>

In [15]:
_, acc_train = validate(model, loader_train, loss, device)
_, acc_test = validate(model, loader_test, loss, device)
print('Accuracy on train set: {:.4f}%'.format(acc_train*100))
print('Accuracy on test set: {:.4f}%'.format(acc_test*100))

Accuracy on train set: 99.7117%
Accuracy on test set: 98.5200%


In [22]:
ADV = 'mnist_basic_apgd_0.3'

tensor_train_X, tensor_train_y = get_correct_examples(
    model, dataset_train, device=device, return_tensor=True)
dataset_train = TensorDataset(tensor_train_X, tensor_train_y)
loader_train = DataLoader(dataset_train, batch_size=512, shuffle=True)
_, acc_perfect = validate(model, loader_train, loss, device)
print('Accuracy on {} filtered train set: {:.4f}%'.format(
    len(dataset_train), acc_perfect*100))

tensor_test_X, tensor_test_y = get_correct_examples(
    model, dataset_test, device=device, return_tensor=True)
dataset_test = TensorDataset(tensor_test_X, tensor_test_y)
loader_test = DataLoader(dataset_test, batch_size=512, shuffle=True)
_, acc_perfect = validate(model, loader_test, loss, device)
print('Accuracy on {} filtered test set: {:.4f}%'.format(
    len(dataset_test), acc_perfect*100))

# Load pre-trained adversarial examples
path_benign = os.path.join('..', OUTPUT_PATH, ADV + '_x.npy')
path_adv = os.path.join('..', OUTPUT_PATH, ADV + '_adv.npy')
path_y = os.path.join('..', OUTPUT_PATH, ADV + '_y.npy')
X_benign = np.load(path_benign)
adv = np.load(path_adv)
y_true = np.load(path_y)

dataset = TensorDataset(torch.from_numpy(X_benign), torch.from_numpy(y_true))
loader = DataLoader(dataset, batch_size=512, shuffle=False)
_, acc = validate(model, loader, loss, device)
print('Accuracy on {} benign samples: {:.4f}%'.format(
    len(dataset), acc*100))
dataset = TensorDataset(
    torch.from_numpy(adv), torch.from_numpy(y_true))
loader = DataLoader(dataset, batch_size=512, shuffle=False)
_, acc = validate(model, loader, loss, device)
print('Accuracy on {} adversarial examples: {:.4f}%'.format(
    len(dataset), acc*100))

# Do NOT shuffle the indices, so different defences can use the same test set.
dataset = TensorDataset(torch.from_numpy(adv))
loader = DataLoader(dataset, batch_size=512, shuffle=False)
pred_adv = predict(model, loader, device).cpu().detach().numpy()

# Find the thresholds using the 2nd half
n = len(X_benign) // 2
# Merge benign samples and adversarial examples into one set.
# This labels indicate a sample is an adversarial example or not.
X_val, labels_val = merge_and_generate_labels(
    adv[n:], X_benign[n:], flatten=False)
# The predictions for benign samples are exactly same as the true labels.
pred_val = np.concatenate((pred_adv[n:], y_true[n:]))

X_train = tensor_train_X.cpu().detach().numpy()
y_train = tensor_train_y.cpu().detach().numpy()

Accuracy on 59827 filtered train set: 100.0000%
Accuracy on 9852 filtered test set: 100.0000%
Accuracy on 2000 benign samples: 100.0000%
Accuracy on 2000 adversarial examples: 0.0000%


In [23]:
magnet_detectors = []
magnet_detectors.append(
    MagNetDetector(
        encoder=Autoencoder2(n_channel=1),
        classifier=model,
        lr=0.001,
        batch_size=256,
        weight_decay=1e-9,
        x_min=0.0,
        x_max=1.0,
        noise_strength=0.025,
        algorithm='prob',
        p=2,
        temperature=10,
        device=device))
ENCODER_PATH = os.path.join('..', OUTPUT_PATH, 'autoencoder_mnist_2.pt')
magnet_detectors[0].load(ENCODER_PATH)

tensor_X_test, _ = dataset2tensor(dataset_test)
X_test = tensor_X_test.cpu().detach().numpy()
print('MSE training set: {:.6f}, test set: {:.6f}'.format(
    magnet_detectors[0].score(X_train), 
    magnet_detectors[0].score(X_test)))

MSE training set: 0.000063, test set: 0.000063


In [24]:
reformer = MagNetAutoencoderReformer(
    encoder=magnet_detectors[0].encoder,
    batch_size=256,
    device=device)

In [26]:
detector = MagNetOperator(
    classifier=model,
    detectors=magnet_detectors,
    reformer=reformer,
    batch_size=256,
    device=device)

In [28]:
X_test, labels_test = merge_and_generate_labels(
    adv[:n], X_benign[:n], flatten=False)
pred_test = np.concatenate((pred_adv[:n], y_true[:n]))
y_test = np.concatenate((y_true[:n], y_true[:n]))

In [29]:
detector.score(X_test, m, labels_test)

0.9995