In [1]:
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as functional
import torch.nn.init as init

In [2]:
class SimpleCNN(nn.Module):

    def __init__(self, width_multiplier: int):
        super(SimpleCNN, self).__init__()
        self.width_multiplier = width_multiplier
        self.conv1: nn.Conv2d = nn.Conv2d(1, 2 * width_multiplier, 5, padding=2)
        self.conv2: nn.Conv2d = nn.Conv2d(2 * width_multiplier, 4 * width_multiplier, 5, padding=2)
        self.linear: nn.Linear = nn.Linear(4 * width_multiplier * 28 * 28, 64)
        self.output: nn.Linear = nn.Linear(64, 10)
    
    def forward(self, x: autograd.Variable) -> autograd.Variable:
        y = functional.relu(self.conv1(x))
        y = functional.relu(self.conv2(y))
        y = functional.relu(self.linear(y.view(-1, 4*self.width_multiplier*28*28)))
        y = self.output(y)
        return y
    
    
# Now, we test the network to see if it works. 
net = SimpleCNN(1)
print(net(autograd.Variable(torch.rand((2, 1, 28, 28)))))

Variable containing:
 0.1232 -0.0254 -0.0345 -0.0751  0.0528  0.0952  0.0551 -0.0612  0.0660 -0.0427
 0.1223 -0.0272 -0.0351 -0.0742  0.0530  0.0955  0.0543 -0.0609  0.0664 -0.0430
[torch.FloatTensor of size 2x10]



In [3]:
import typing
import numpy.testing as testing

def fsgm(image_batch: torch.FloatTensor,
         label_batch: torch.LongTensor,
         model: typing.Callable[[autograd.Variable], autograd.Variable],
         objective: typing.Callable[[autograd.Variable, autograd.Variable], autograd.Variable],
         eps: float):
    """Takes a batch of images, and modifies each image using the FGSM attack."""
    for i in range(image_batch.shape[0]):
        x = autograd.Variable(torch.unsqueeze(image_batch[i], 0), requires_grad=True)
        label = autograd.Variable(label_batch[i:i+1])
        output = model(x)
        loss = objective(output, label)
        loss.backward()
        x.data += eps*torch.sign(x.grad.data)
        torch.clamp(x.data, min=0.0, max=1.0, out=x.data)


# Now, we test to see there are no obvious errors. 
def test_fsgm():
    net = SimpleCNN(1)
    image = torch.zeros((1, 1, 28, 28)) + 0.5
    label = torch.LongTensor([2])
    perturbed_image = image.clone()
    fsgm(perturbed_image, label, net, nn.CrossEntropyLoss(), 0.3)
    perturbation = torch.abs(perturbed_image - image)
    testing.assert_almost_equal(
            perturbation.numpy(),
            ((perturbation > 0).float()*0.3).numpy(),
            15)

for i in range(10):
    test_fsgm()

# What did we learn from this test? How to handle possible zero-gradients. Also, when doing random
# initializations, test a few times to make sure that nothing can go wrong with tests. 

In [5]:
def pgd(image_batch: torch.FloatTensor,
        label_batch: torch.LongTensor,
        model: typing.Callable[[autograd.Variable], autograd.Variable],
        objective: typing.Callable[[autograd.Variable, autograd.Variable], autograd.Variable],
        eps: float,
        alpha: float,
        num_steps: int,
        num_restarts: int):
    """Runs PGD on the negative of the given loss function with the given parameters on the given image."""
    
    def pgd_without_restarts(sample_index: int):
        """PGD on negative of the loss function. This has no random restarts."""
        image = image_batch[sample_index]
        x_min = torch.clamp(image - eps, min=0.0)
        x_max = torch.clamp(image + eps, max=1.0)
        random_start = torch.clamp(image + torch.rand(image.shape)*eps, min=0.0, max=1.0)
        x = autograd.Variable(torch.unsqueeze(random_start, 0), requires_grad=True)
        for i in range(num_steps):
            output = model(x)
            label = autograd.Variable(label_batch[sample_index:sample_index + 1])
            loss = objective(output, label)
            loss.backward()
            x.data += alpha*torch.sign(x.grad.data)
            x.data = torch.min(torch.max(x.data, x_min), x_max)
            x.grad.data.fill_(0)
        return x.data, loss.data[0]
    
    max_loss = -float("inf")
    best_perturbed_image = None
    for i in range(image_batch.shape[0]):
        for _ in range(num_restarts):
            perturbed_image, loss = pgd_without_restarts(i)
            if loss > max_loss:
                max_loss = loss
                best_perturbed_image = perturbed_image
        image_batch[i] = best_perturbed_image

def test_no_runtime_errors():
    net = SimpleCNN(1)
    image = torch.rand((2, 1, 28, 28))
    label = torch.LongTensor([2, 2])
    net.zero_grad()
    pgd(image, label, net, nn.CrossEntropyLoss(), 0.3, 0.6, 4, 2)


# A more fine-grained test. We will create a specific linear model and test that the resulting images fall within a
# certain range. 
class LinearModel(nn.Module):
    def __init__(self, label: int):
        super(LinearModel, self).__init__()
        self.linear: nn.Linear = nn.Linear(1*28*28, 10)
        self.linear.weight.data.fill_(0)
        self.linear.weight.data[label, :].fill_(1)
    
    def forward(self, x: autograd.Variable):
        y: autograd.Variable = x.view((-1, 1*28*28,))
        return y


def test_linear_model():
    lin = LinearModel(2)
    testing.assert_almost_equal(lin.linear.weight.data[0:2, :].numpy(), torch.zeros((2, 28*28)).numpy(), 15)
    testing.assert_almost_equal(lin.linear.weight.data[2, :].numpy(), torch.ones((28*28,)).numpy(), 15)
    testing.assert_almost_equal(lin.linear.weight.data[3:, :].numpy(), torch.zeros((7, 28*28)).numpy(), 15)


def dummy_loss_function(output_batch: autograd.Variable, label_batch: autograd.Variable) -> autograd.Variable:
    return -0.5*torch.sum(nn.MSELoss(reduce=False)(output_batch, autograd.Variable(torch.zeros(
        output_batch.data.shape))), dim=1)


def test_dummy_loss_function():
    identity: autograd.Variable = autograd.Variable(torch.eye(2, 10))
    expected_result: autograd.Variable = autograd.Variable(-0.5*torch.ones((2,)))
    testing.assert_equal(dummy_loss_function(identity, None).data.numpy(), expected_result.data.numpy())


def test_single_step_pgd():
    image: torch.FloatTensor = torch.zeros(2, 1, 28, 28) + 0.5
    labels: torch.LongTensor = torch.LongTensor([2, 2])
    perturbed_image: torch.FloatTensor = image.clone()
    pgd(perturbed_image, labels, LinearModel(2), dummy_loss_function, 0.3, 1.0, 1, 20)
    testing.assert_almost_equal(perturbed_image.numpy(), torch.zeros(image.shape) + 0.2)


def test_multistep_pgd():
    image: torch.FloatTensor = torch.zeros(2, 1, 28, 28) + 0.5
    labels: torch.LongTensor = torch.LongTensor([2, 2])
    perturbed_image: torch.FloatTensor = image.clone()
    pgd(perturbed_image, labels, LinearModel(2), dummy_loss_function, eps=0.3, alpha=0.01, num_restarts=1, num_steps=10)
    
    image_min: torch.FloatTensor = torch.clamp(image - 0.3, min=0.0)
    
    # Calculate image max
    image_max: torch.FloatTensor = image + 0.3
    for _ in range(10):
        image_max = image_max - 0.01*image_max
    image_max = torch.max(image_max, image_min)
    
    testing.assert_array_less(image_min, perturbed_image)
    testing.assert_array_less(perturbed_image, image_max)


test_no_runtime_errors()
test_dummy_loss_function()
test_linear_model()
test_single_step_pgd()
test_multistep_pgd()

In [None]:
from torchvision import datasets

# Now we need to import MNIST and transform it. 