In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np

from art.attacks import ZooAttack
from art.classifiers import PyTorchClassifier
from art.utils import load_mnist
from art.attacks.evasion import ZooAttack
from art.defences import FeatureSqueezing

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=1)
        self.conv_2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=5, stride=1)
        self.fc_1 = nn.Linear(in_features=4 * 4 * 10, out_features=100)
        self.fc_2 = nn.Linear(in_features=100, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4 * 4 * 10)
        x = F.relu(self.fc_1(x))
        x = self.fc_2(x)
        return x

In [3]:
# Step 1: Load the MNIST dataset
(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(min_pixel_value, max_pixel_value)

(60000, 28, 28, 1)
(60000, 10)
(10000, 28, 28, 1)
0.0 1.0


In [4]:
# Step 1a: Swap axes to PyTorch's NCHW format
x_train = np.swapaxes(x_train, 1, 3).astype(np.float32)
x_test = np.swapaxes(x_test, 1, 3).astype(np.float32)
y_train = np.argmax(y_train, axis=1)
print(y_train.shape)

(60000,)


In [5]:
# Step 2: Create the model
model = Net()

# Step 2a: Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [6]:
# Step 3: Create the ART classifier
classifier = PyTorchClassifier(
    model=model, 
    clip_values=(min_pixel_value, max_pixel_value), 
    loss=criterion,
    optimizer=optimizer, 
    input_shape=(1, 28, 28), 
    nb_classes=10)

In [7]:
# Step 4: Train the ART classifier
classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10)

In [8]:
# Step 5: Evaluate the ART classifier on benign test examples
predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

Accuracy on benign test examples: 98.75%


In [9]:
# Step 6: Generate adversarial test examples
N = 30
attack = ZooAttack(
    classifier=classifier,
    confidence=0.0,
    targeted=False,
    learning_rate=0.1,
    max_iter=100,
    binary_search_steps=10,
    initial_const=1e-1,
    abort_early=True,
    use_importance=False,
    nb_parallel=128,
    batch_size=1,
    variable_h=0.01,
)
x_test_adv = attack.generate(x=x_test[:N])
print(x_test_adv.shape)

(30, 1, 28, 28)


In [10]:
predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:N], axis=1)) / len(y_test[:N])
print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100))

Accuracy on adversarial test examples: 76.66666666666667%


In [None]:
defence = FeatureSqueezing(
    clip_values=(min_pixel_value, max_pixel_value), 
    bit_depth=8,
    apply_fit=False,
    apply_predict=True
)

In [None]:
# NOTE: return a tuple. y_out is same as y_in
x_adv_squeezed, y_squeezed = defence(x_test_adv, y_test[:N])
print(x_adv_squeezed.shape, y_squeezed.shape)

In [None]:
predictions = classifier.predict(x_adv_squeezed)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test[:N], axis=1)) / len(y_test)
print('Accuracy on adversarial test examples: {}%'.format(accuracy * N))

In [None]:
N = 1000
x = x_test[:N]
y = y_test[:N]
predictions = classifier.predict(x)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y, axis=1)) / len(y_test)
print('Accuracy on adversarial test examples: {}%'.format(accuracy * N))

In [None]:
N = 1000
x_squeezed, y_squeezed = defence(x_test[:N], y_test[:N])
x = x_squeezed
y = y_squeezed
predictions = classifier.predict(x)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y, axis=1)) / len(y_test)
print('Accuracy on adversarial test examples: {}%'.format(accuracy * N))