In [1]:
import datetime
import os
import time

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision as tv
import torchvision.datasets as datasets
from art.attacks.evasion import BasicIterativeMethod
from art.estimators.classification import PyTorchClassifier
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

from region_based_classifier import RegionBasedClassifier
from util import dataset2tensor, get_correct_examples

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device: {}'.format(device))

n_threads = os.cpu_count()
print('CPU threads: {}'.format(n_threads))

Device: cuda
CPU threads: 24


In [4]:
PATH_DATA = 'data'
PATH_RESULTS = os.path.join('results', 'mnist_base_model.pt')
BATCH_SIZE = 128
EPOCHS = 40
N_ADV = 2000

In [5]:
# Fetch dataset
transforms = tv.transforms.Compose([tv.transforms.ToTensor()])
dataset_train = datasets.MNIST(PATH_DATA, train=True, download=True, transform=transforms)
dataset_test = datasets.MNIST(PATH_DATA, train=False, download=True, transform=transforms)

# From PyTorch dataset to Numpy array
tensor_X_train, tensor_y_train = dataset2tensor(dataset_train)
X_train = tensor_X_train.cpu().detach().numpy()
y_train = tensor_y_train.cpu().detach().numpy()

# Split model training set into training set and validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=5000)
dataset_train = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.int64))

dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=True)

In [6]:
print(len(dataset_train))
print(dataset_test.data.size())

55000
torch.Size([10000, 28, 28])


## Train point-based classifier

In [7]:
# Create Neural Network model
class BaseModel(nn.Module):
    def __init__(self):
        super(BaseModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.relu2 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        self.flatten = nn.Flatten(1)
        self.fc1 = nn.Linear(9216, 200)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(200, 10)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu1(self.conv1(x))
        x = self.relu2(self.conv2(x))
        x = self.pool1(x)
        x = self.flatten(x)
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        x = self.softmax(x)
        return x

In [8]:
# Testing outputs
model = BaseModel().to(device)
model.eval()
it = iter(dataloader_train)
x, y = next(it)
x = x.to(device)
output0 = model(x)
print(output0.size())

seq1 = nn.Sequential(*list(model.children())).to(device)
seq1.eval()
output1 = seq1(x)
print(output1.size())
print(torch.equal(output0, output1))

seq2 = nn.Sequential(*list(model.children())[:-1]).to(device)
seq2.eval()
output2 = seq2(x)
print(output2.size())
print(torch.equal(output0, output2))
print(output2[:3])

torch.Size([128, 10])
torch.Size([128, 10])
True
torch.Size([128, 10])
False
tensor([[-0.0561,  0.0076,  0.0220,  0.0651,  0.0687,  0.0099, -0.0804, -0.0003,
          0.0149,  0.0563],
        [-0.0506,  0.0042,  0.0187,  0.0563,  0.0869,  0.0086, -0.0676, -0.0015,
         -0.0037,  0.0657],
        [-0.0565, -0.0070,  0.0219,  0.0542,  0.0714,  0.0092, -0.0732, -0.0160,
          0.0153,  0.0533]], device='cuda:0', grad_fn=<SliceBackward>)


In [9]:
def train(model, loader, loss, optimizer):
    model.train()
    total_loss = 0.
    corrects = 0.
    
    for x, y in loader:
        x = x.to(device)
        y = y.to(device)
        batch_size = x.size(0)
        
        optimizer.zero_grad()
        outputs = model(x)
        l = loss(outputs, y)
        l.backward()
        optimizer.step()

        # for display
        total_loss += l.item() * batch_size
        preds = outputs.max(1, keepdim=True)[1]
        corrects += preds.eq(y.view_as(preds)).sum().item()
    
    n = len(loader.dataset)
    total_loss = total_loss / n
    accuracy = corrects / n
    return total_loss, accuracy

In [10]:
def validate(model, loader, loss, device=device):
    model.eval()
    total_loss = 0.
    corrects = 0.
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            batch_size = x.size(0)
            outputs = model(x)
            l = loss(outputs, y)
            total_loss += l.item() * batch_size
            preds = outputs.max(1, keepdim=True)[1]
            corrects += preds.eq(y.view_as(preds)).sum().item()
    
    n = len(loader.dataset)
    total_loss = total_loss / n
    accuracy = corrects / n
    return total_loss, accuracy

In [11]:
def predict(model, X, batch_size=BATCH_SIZE, device=device):
    model.eval()
    dataset = TensorDataset(X)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    tensor_pred = -torch.ones(len(X), dtype=torch.int64)
    
    start = 0
    with torch.no_grad():
        for x in loader:
            x = x[0].to(device)
            n = x.size(0)
            end = start + n
            outputs = model(x)
            tensor_pred[start:end] = outputs.max(1)[1].type(torch.int64)
            start += n

    return tensor_pred

In [12]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
loss=nn.CrossEntropyLoss()

In [13]:
# DO NOT RUN THIS IF YOU DON'T NEED RETRAIN!
# since = time.time()
# for epoch in range(EPOCHS):
#     start = time.time()
#     tr_loss, tr_acc = train(model, dataloader_train, loss, optimizer)
#     va_loss, va_acc = validate(model, dataloader_test, loss)
    
#     time_elapsed = time.time() - start
#     print(('{:2d}/{:d}[{:s}] Train Loss: {:.4f} Acc: {:.4f}%, ' +
#         'Test Loss: {:.4f} Acc: {:.4f}%').format(
#             epoch+1, EPOCHS, str(datetime.timedelta(seconds=time_elapsed)),
#             tr_loss, tr_acc*100.,
#             va_loss, va_acc*100.))
    
# time_elapsed = time.time() - since
# print('Total run time: {:.0f}m {:.1f}s'.format(
#     time_elapsed // 60,
#     time_elapsed % 60))

In [14]:
# DO NOT RUN THIS IF YOU DON'T NEED RETRAIN!
# if not os.path.exists('results'):
#     os.makedirs('results')

# torch.save(model.state_dict(), PATH_RESULTS)

In [15]:
# Load model from saved file
model.load_state_dict(torch.load(PATH_RESULTS))

_, acc_test = validate(model, dataloader_test, loss)
print('Test set accuracy: {:.4f}'.format(acc_test*100))

Test set accuracy: 98.2200


## Remove missclassified samples

In [16]:
print('Training set: {}'.format(len(dataset_train)))
tensor_train_X, tensor_train_y = get_correct_examples(model, dataset_train, device=device, return_tensor=True)
dataset_train_perfect = TensorDataset(tensor_train_X, tensor_train_y)
dataloader_train_perfect = DataLoader(dataset_train_perfect, batch_size=512, shuffle=True)
_, acc = validate(model, dataloader_train_perfect, loss)
print('Accuracy on {} filtered training examples: {:.4f}%'.format(len(dataloader_train_perfect.dataset), acc*100))

print('Test set: {}'.format(len(dataset_test)))
tensor_test_X, tensor_test_y = get_correct_examples(model, dataset_test, device=device, return_tensor=True)
dataset_test_perfect = TensorDataset(tensor_test_X, tensor_test_y)
dataloader_test_perfect = DataLoader(dataset_test_perfect, batch_size=512, shuffle=True)
_, acc = validate(model, dataloader_test_perfect, loss)
print('Accuracy on {} filtered test examples: {:.4f}%'.format(len(dataloader_test_perfect.dataset), acc*100))

Training set: 55000
Accuracy on 54473 filtered training examples: 100.0000%
Test set: 10000
Accuracy on 9822 filtered test examples: 100.0000%


## Create adversarial examples 

In [17]:
classifier = PyTorchClassifier(
    model=model, 
    loss=loss, 
    input_shape=(1, 28, 28), 
    optimizer=optimizer,
    nb_classes=10,
    clip_values=(0.0, 1.0),
    device_type=device
)

# attack = FastGradientMethod(estimator=classifier, eps=0.2)
attack = BasicIterativeMethod(estimator=classifier, eps=0.2)

In [18]:
# Prepare dataset for adversarial examples
n = len(dataset_test_perfect)
indices = torch.randperm(n)[:N_ADV]

pt_subset_X = tensor_test_X[indices]  # PyTorch Tensor
pt_subset_y = tensor_test_y[indices]

subset_X = pt_subset_X.cpu().detach().numpy()
subset_y = pt_subset_y.cpu().detach().numpy()

In [19]:
# Create adversarial examples
subset_pred = np.argmax(classifier.predict(subset_X), axis=1)
accuracy = np.sum(subset_pred == subset_y) / float(len(subset_pred))
print("Model accuracy on clean examples: {:.4f}%".format(accuracy * 100))

# Generate adversarial examples
subset_adv = attack.generate(x=subset_X)
subset_pred = np.argmax(classifier.predict(subset_adv), axis=1)

accuracy = np.sum(subset_pred == subset_y) / float(len(subset_pred))
print("Model accuracy on adversarial examples: {:.4f}%".format(accuracy * 100))

PGD - Batches:   0%|          | 0/63 [00:00<?, ?it/s]Model accuracy on clean examples: 100.0000%
Model accuracy on adversarial examples: 10.5500%


## MagNet

In [20]:
X_benign = subset_X[:1000]
X_adv = subset_adv[:1000]
y_true = subset_y[:1000]

print('Test set (Adversarial examples):', X_adv.shape)
print('Test set (True labels) :', y_true.shape)

print('Validation set (Benign samples):', X_val.shape)
print('Validation set (True labels) :', y_val.shape)

Test set (Adversarial examples): (1000, 1, 28, 28)
Test set (True labels) : (1000,)
Validation set (Benign samples): (5000, 1, 28, 28)
Validation set (True labels) : (5000,)


In [21]:
# Train encoder
class Encoder1(nn.Module):
    def __init__(self, n_channel=1):
        super(Encoder1, self).__init__()
        self.n_channel = n_channel
        self.conv1 = nn.Conv2d(self.n_channel, 3, 3, padding=1)
        self.conv2 = nn.Conv2d(3, 3, 3, padding=1)
        self.conv3 = nn.Conv2d(3, 3, 3, padding=1)
        self.conv4 = nn.Conv2d(3, 3, 3, padding=1)
        self.conv5 = nn.Conv2d(3, self.n_channel, 3, padding=1)
    
    def forward(self, x):
        x = F.sigmoid(self.conv1(x))
        x = F.avg_pool2d(x, kernel_size=2)
        x = F.sigmoid(self.conv2(x))
        x = F.sigmoid(self.conv3(x))
        x = F.upsample(x, scale_factor=2)
        x = F.sigmoid(self.conv4(x))
        x = F.sigmoid(self.conv5(x))
        return x

In [22]:
# Testing outputs
ae = Encoder1(n_channel=1).to(device)
it = iter(dataloader_train)
x, y = next(it)
x = x.to(device)
score = ae(x)
score.size()

torch.Size([128, 1, 28, 28])

In [23]:
class Encoder2(nn.Module):
    def __init__(self, n_channel=1):
        super(Encoder2, self).__init__()
        self.n_channel = n_channel
        self.conv1 = nn.Conv2d(self.n_channel, 3, 3, padding=1)
        self.conv2 = nn.Conv2d(3, 3, 3, padding=1)
        self.conv3 = nn.Conv2d(3, self.n_channel, 3, padding=1)

    def forward(self, x):
        x = F.sigmoid(self.conv1(x))
        x = F.sigmoid(self.conv2(x))
        x = F.sigmoid(self.conv3(x))
        return x

In [24]:
# Testing outputs
ae = Encoder2(n_channel=1).to(device)
it = iter(dataloader_train)
x, y = next(it)
x = x.to(device)
score = ae(x)
score.size()

torch.Size([128, 1, 28, 28])

In [25]:
EPOCHS_ENCODER = 100
LR_ENCODER = 0.001
WEIGHT_DECAY = 1e-9  # The `reg_strength` term in `keras.regularizers.l2(reg_strength)`
BATCH_SIZE_ENCODER = 256

In [26]:
encoder1 = Encoder1(n_channel=1).to(device)
optimizer_encoder = optim.AdamW(encoder1.parameters(), lr=LR_ENCODER, weight_decay=WEIGHT_DECAY)
loss_encoder = nn.MSELoss()

In [27]:
class Autoencoder(nn.Module):
    def __init__(self, in_channel = 1):
        super(Autoencoder, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, 3, 3, padding = 1)
        self.conv2 = nn.Conv2d(3, 3, 3, padding = 1)
        self.conv3 = nn.Conv2d(3, 3, 3, padding = 1)
        self.conv4 = nn.Conv2d(3, 3, 3, padding = 1)
        self.conv5 = nn.Conv2d(3, in_channel, 3, padding = 1)
        self.avg = nn.AvgPool2d(2)
        self.up = nn.Upsample(scale_factor = 2)
        self.sig = nn.Sigmoid()
        
    def forward(self, x):
        x = self.sig(self.conv1(x))
        x = self.avg(x)
        x = self.sig(self.conv2(x))
        x = self.sig(self.conv3(x))
        x = self.up(x)
        x = self.sig(self.conv4(x))
        x = self.sig(self.conv5(x))
        return x

In [28]:
# Testing outputs
ae = Autoencoder(in_channel=1).to(device)
it = iter(dataloader_train)
x, y = next(it)
x = x.to(device)
score = ae(x)
score.size()  

torch.Size([128, 1, 28, 28])

In [29]:
class Autoencoder2(nn.Module):
    def __init__(self, in_channel):
        super(Autoencoder2, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, 3, 3, padding = 1)
        self.conv2 = nn.Conv2d(3, 3, 3, padding = 1)
        self.conv3 = nn.Conv2d(3, in_channel, 3, padding = 1)
        
        self.sig = nn.Sigmoid()
        
    def forward(self, x):
        x = self.sig(self.conv1(x))
        x = self.sig(self.conv2(x))
        x = self.sig(self.conv3(x))
        return x

In [30]:
# Testing outputs
ae = Autoencoder2(in_channel=1).to(device)
it = iter(dataloader_train)
x, y = next(it)
x = x.to(device)
score = ae(x)
score.size()

torch.Size([128, 1, 28, 28])