In [1]:
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from art.classifiers import PyTorchClassifier
from art.utils import load_mnist
from art.attacks import DeepFool

In [2]:
# Step 1: Load the MNIST dataset
(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

In [3]:
# Step 1a: Swap axes to PyTorch's NCHW format
x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(min_pixel_value, max_pixel_value)

(60000, 1, 28, 28)
(60000, 10)
(10000, 1, 28, 28)
0.0 1.0


In [4]:
# Step 2: Create the model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=1)
        self.conv_2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=5, stride=1)
        self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100)
        self.fc_2 = nn.Linear(in_features=100, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4 * 4 * 10)
        x = F.relu(self.fc_1(x))
        x = self.fc_2(x)
        return x

In [5]:
model = Net()

# Step 2a: Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [6]:
# Step 3: Create the ART classifier
classifier = PyTorchClassifier(
    model=model, 
    clip_values=(min_pixel_value, max_pixel_value), 
    loss=criterion,
    optimizer=optimizer, 
    input_shape=(1, 28, 28), 
    nb_classes=10)

In [7]:
# Step 4: Train the ART classifier
since = time.time()
epochs = 10
classifier.fit(x_train, y_train, batch_size=64, nb_epochs=epochs)
time_elapsed = time.time() - since
print('Time taken for {} epochs: {:2.0f}m {:3.1f}s'.format(epochs, time_elapsed//60, time_elapsed%60))

Time taken for 10 epochs:  0m 12.5s


In [8]:
# Step 5: Evaluate the ART classifier on benign test examples
def evaluate(x, y):
    pred = classifier.predict(x)
    acc = np.sum(np.argmax(pred, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
    print('Accuracy on benign test examples: {}%'.format(acc * 100))

evaluate(x_test, y_test)

Accuracy on benign test examples: 98.86%


## TODO: added adversarial training

https://github.com/IBM/adversarial-robustness-toolbox/blob/master/examples/adversarial_training_cifar10.py

In [9]:
print('Create DeepFool attack')
adv_crafter = DeepFool(classifier)
print('Craft attack on training examples')
x_train_adv = adv_crafter.generate(x_train)
print('Craft attack test examples')
x_test_adv = adv_crafter.generate(x_test)


Create DeepFool attack
Craft attack on training examples
Craft attack test examples


In [10]:
evaluate(x_test_adv, y_test)

Accuracy on benign test examples: 36.65%


In [11]:
# Data augmentation: expand the training set with the adversarial samples
x_train = np.append(x_train, x_train_adv, axis=0)
y_train = np.append(y_train, y_train, axis=0)

In [12]:
classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

In [13]:
# Evaluate the adversarially trained classifier on the test set
evaluate(x_test_adv, y_test)

Accuracy on benign test examples: 97.25%


In [14]:
evaluate(x_test_adv, y_test)

Accuracy on benign test examples: 97.25%
