# Boilerplate

Packae installation, loading, and dataloaders. There's also a simple model defined. You can change it your favourite architecture if you want.

In [None]:
!pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt

from torchvision import datasets, transforms
from tensorboardX import SummaryWriter

use_cuda = True
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## Simple NN. You can change this if you want. If you change it, mention the architectural details in your report.
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc = nn.Linear(28*28, 200)
        self.fc2 = nn.Linear(200,10)

    def forward(self, x):
        x = x.view((-1, 28*28))
        x = F.relu(self.fc(x))
        x = self.fc2(x)
        return x

class Normalize(nn.Module):
    def forward(self, x):
        return (x - 0.1307)/0.3081

# Add the data normalization as a first "layer" to the network
# this allows us to search for adverserial examples to the real image, rather than
# to the normalized image
model = nn.Sequential(Normalize(), Net())

model = model.to(device)
model.train()

# Implement the Attacks

Functions are given a simple useful signature that you can start with. Feel free to extend the signature as you see fit.

You may find it useful to create a 'batched' version of PGD that you can use to create the adversarial attack.

In [None]:
import torch
import torch.nn.functional as F

def fgsm(model, x, y, eps, targeted=False):
    model.eval()
    x = x.clone().detach().requires_grad_(True)
    outputs = model(x)
    if targeted:
        loss = -F.cross_entropy(outputs, y)
    else:
        loss = F.cross_entropy(outputs, y)
    loss.backward()
    perturbation = eps * x.grad.sign()
    x_adv = x + perturbation
    x_adv = torch.clamp(x_adv, 0, 1)

    return x_adv.detach()

def pgd_untargeted(model, x, y, k, eps, eps_step):
    model.eval()
    x_adv = x.clone().detach()
    for i in range(k):
        x_adv = fgsm(model, x_adv, y, eps_step, targeted=False)
        delta = torch.clamp(x_adv - x, -eps, eps)
        x_adv = torch.clamp(x + delta, 0, 1)

    return x_adv.detach()

# Implement Adversarial Training

In [None]:
from torch.utils.data import DataLoader

def train_model(model, num_epochs, enable_defense=True, attack='pgd', eps=0.1):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)

            if enable_defense:
                if attack == 'pgd':
                    data = pgd_untargeted(model, data, target, k=10, eps=eps, eps_step=eps/4)
                elif attack == 'fgsm':
                    data = fgsm(model, data, target, eps)

            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            if batch_idx % 100 == 0:
                print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

    return model

In [None]:
def test_model_on_attacks(model, attack='pgd', eps=0.1):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    standard_correct = 0
    robust_correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)

            standard_output = model(data)
            _, standard_predicted = torch.max(standard_output.data, 1)
            standard_correct += (standard_predicted == target).sum().item()

            if attack == 'pgd':
                adv_data = pgd_untargeted(model, data, target, k=10, eps=eps, eps_step=eps/4)
            elif attack == 'fgsm':
                adv_data = fgsm(model, data, target, eps)
            else:
                adv_data = data

            robust_output = model(adv_data)
            _, robust_predicted = torch.max(robust_output.data, 1)
            robust_correct += (robust_predicted == target).sum().item()

            total += target.size(0)

    standard_accuracy = 100 * standard_correct / total
    robust_accuracy = 100 * robust_correct / total

    print(f'Standard Accuracy: {standard_accuracy:.2f}%')
    print(f'Robust Accuracy under {attack.upper()} attack (eps={eps}): {robust_accuracy:.2f}%')

    return standard_accuracy, robust_accuracy

# Study Accuracy, Quality, etc.

Compare the various results and report your observations on the submission.

In [None]:
## train the original model
model = nn.Sequential(Normalize(), Net())
model = model.to(device)
model.train()

train_model(model, 20, False)
torch.save(model.state_dict(), 'weights.pt')

In [None]:
## PGD attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', eps=eps)

In [None]:
## PGD based adversarial training
model = nn.Sequential(Normalize(), Net())
eps = 0.1
train_model(model, 20, True, 'pgd', eps)
torch.save(model.state_dict(), f'weights_AT_{eps}.pt')

In [None]:
## PGD attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', eps=eps)

In [None]:
## FGSM attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='fgsm', eps=eps)

In [None]:
## FGSM based adversarial training
model = nn.Sequential(Normalize(), Net())
eps = 0.1
train_model(model, 20, True, 'fgsm', eps)
torch.save(model.state_dict(), f'weights_AT_{eps}.pt')

In [None]:
## FGSM attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='fgsm', eps=eps)

In [None]:
## PGD attack
model = nn.Sequential(Normalize(), Net())
model.load_state_dict(torch.load('weights.pt'))

for eps in [0.05, 0.1, 0.15, 0.2]:
    test_model_on_attacks(model, attack='pgd', eps=eps)