In [1]:
import torch
import torch.nn as nn

### Problem 1(h)

In [2]:
# fix seed so that random initialization always performs the same 
torch.manual_seed(1)

# create the model N as described in the question
N = nn.Sequential(nn.Linear(10, 10, bias=False),
                  nn.ReLU(),
                  nn.Linear(10, 10, bias=False),
                  nn.ReLU(),
                  nn.Linear(10, 3, bias=False))

# random input
x = torch.rand((1,10)) # the first dimension is the batch size; the following dimensions the actual dimension of the data
x.requires_grad_() # this is required so we can compute the gradient w.r.t x

t = 1 # target class

epsReal = 0.4 #depending on your data this might be large or small
eps = epsReal - 1e-7 # small constant to offset floating-point erros

# The network N classfies x as belonging to class 2
original_class = N(x).argmax(dim=1).item()  # TO LEARN: make sure you understand this expression
print("Original Class: ", original_class)
assert(original_class == 2)

# compute gradient
# note that CrossEntropyLoss() combines the cross-entropy loss and an implicit softmax function
L = nn.CrossEntropyLoss()
loss = L(N(x), torch.tensor([t], dtype=torch.long)) # TO LEARN: make sure you understand this line
loss.backward() 

Original Class:  2


In [3]:
# your code here
# adv_x should be computed from x according to the fgsm-style perturbation such that the new class of xBar is the target class t above
# hint: you can compute the gradient of the loss w.r.t to x as x.grad
adv_x = x - eps * x.grad.sign()

new_class = N(adv_x).argmax(dim=1).item()
if new_class == 0:
    print("New Class: ", new_class)


assert(new_class == t)
# it is not enough that adv_x is classified as t. We also need to make sure it is 'close' to the original x. 
assert( torch.norm((x-adv_x), p=float('inf')) <= epsReal)

In [4]:
torch.norm((x-adv_x)) 

tensor(1.2649, grad_fn=<NormBackward0>)

### Problem 1(i)

In [5]:
import torch
import torch.nn as nn
import numpy as np

In [6]:
t = 0
epsReal = 0.5
eps = epsReal - 1e-7
alpha = 0.2 - 1e-7

In [7]:
# fix seed so that random initialization always performs the same 
torch.manual_seed(1)
# create the model N as described in the question
N = nn.Sequential(nn.Linear(10, 10, bias=False),
                  nn.ReLU(),
                  nn.Linear(10, 10, bias=False),
                  nn.ReLU(),
                  nn.Linear(10, 3, bias=False))

# random input
x = torch.rand((1,10))

In [8]:
adv_x = x.clone()

In [9]:
pred=N(x).argmax(dim=1).item()
num_iter=0
while pred !=t:
    adv_x = adv_x.clone().detach()
    adv_x.requires_grad_()
    
    L = nn.CrossEntropyLoss()
    loss = L(N(adv_x), torch.tensor([t], dtype=torch.long))
    N.zero_grad()
    loss.backward() 
   
    x_grad = adv_x.grad
    x_iter = adv_x - alpha * x_grad.detach().sign()
    
    adv_x=torch.max(torch.min(x_iter, x + eps), x - eps)  
    
    pred=N(adv_x).argmax(dim=1).item()
    
    num_iter=num_iter+1
print(f"number of iterations in iterative fgsm till success: {num_iter}")

number of iterations in iterative fgsm till success: 5


In [10]:
assert(N(adv_x).argmax(dim=1).item() == t)

In [11]:
assert(torch.norm((x-adv_x), p=float('inf')) <= epsReal)

In [12]:
torch.norm((x-adv_x), p=float('inf'))

tensor(0.5000, grad_fn=<NormBackward0>)

In [13]:
torch.norm((x-adv_x))

tensor(1.2042, grad_fn=<NormBackward0>)