In [1]:
#import packages
#feel free to import more if you need
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
#evaluate the benign accuracy of a model
def test(model, x,y,batch_size):
    model.eval()
    total=x.shape[0]
    batches=np.ceil(total/batch_size).astype(int)
    success=0
    loss=0
    for i in range(batches):
        start_index=i*batch_size
        end_index=np.minimum((i+1)*batch_size,total)
        x_batch=torch.tensor(x[start_index:end_index]).float()
        y_batch=torch.tensor(y[start_index:end_index]).long()
        output=model(x_batch)
        pred=torch.argmax(output,dim=1)
        loss+=F.cross_entropy(output,y_batch).item()
        success+=(pred==y_batch).sum().item()
    #print ("accuracy: "+str(success/total))
    accuracy = success/total
    return accuracy


In [3]:
#define model architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x=self.conv1(x)
        x=F.max_pool2d(x, 2)
        x = F.relu(x)
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

In [4]:
#untargeted attack
#you may add parameters as you wish
# Reference (Recitation 3 Code)

def untargeted_attack(x, y, model_1, model_2, min_val, max_val):
    batch_size = 512
    num_batches = (len(x) + batch_size - 1) // batch_size
    accs = []
    models = [model_1, model_2]

    for model in models:
        all_accs = {}
        for eps in range(8, 64, 8):
            epsilon = eps

            adv_batch_x_all = np.zeros((len(x), x.shape[1], x.shape[2], x.shape[3]))
            print(adv_batch_x_all.shape)
            label_all = np.zeros((len(y),))

            for batch_idx in range(num_batches):
                start_idx = batch_idx * batch_size
                end_idx = min(start_idx + batch_size, len(x))
                batch_x = torch.tensor(x[start_idx:end_idx]).float()
                batch_y = torch.tensor(y[start_idx:end_idx]).long()
                adv_batch_x = batch_x.detach().clone()

                for _ in range(20):
                    adv_batch_x.requires_grad = True
                    pred = model(adv_batch_x)
                    #pred_B = model_2(adv_batch_x)
                    model.zero_grad()
                    #model_2.zero_grad()
                    loss = -nn.CrossEntropyLoss()(pred, batch_y)
                    loss.backward()
                    #loss_B = -nn.CrossEntropyLoss()(pred_B, batch_y)
                    #loss_B.backward()
                    grads = adv_batch_x.grad
                    with torch.no_grad():
                        adv_batch_x = adv_batch_x - epsilon * grads.sign()
                        eta = torch.clamp(adv_batch_x - batch_x, min=-epsilon, max=epsilon)
                        adv_batch_x = torch.clamp(batch_x + eta, min=min_val, max=max_val).detach().clone()
                        #adv_batch_x = torch.round(adv_batch_x*255)/255

                adv_batch_x_all[start_idx:end_idx] = adv_batch_x.numpy()
                label_all[start_idx:end_idx] = batch_y.numpy()

            accuracy = test(model_1, adv_batch_x_all, label_all, 512)
            #accuracy_B = test(model_2, adv_batch_x_all, label_all, 512)
            all_accs[eps] = accuracy

        accs.append(all_accs)

    return accs

In [93]:
#targeted attack
#you may add parameters as you wish
def targeted_attack(x, y, model_1, model_2, min_val, max_val):
    #TODO
    
    alpha = 10
    batch_size = 512
    num_batches = (len(x) + batch_size - 1) // batch_size
    accs = []
    models = [model_1, model_2]

    for model in models:
        all_accs = {}
        for eps in range(8, 64, 8):
            #epsilon = eps / 255.
            epsilon = eps

            adv_batch_x_all = np.zeros((len(x), x.shape[1], x.shape[2], x.shape[3]))
            print(adv_batch_x_all.shape)
            label_all = np.zeros((len(y),))

            for batch_idx in range(num_batches):
                start_idx = batch_idx * batch_size
                end_idx = min(start_idx + batch_size, len(x))
                batch_x = torch.tensor(x[start_idx:end_idx]).float()
                batch_y = torch.tensor(y[start_idx:end_idx]).long()
                adv_batch_x = batch_x.detach().clone()
                target = torch.full((end_idx - start_idx,), 8, dtype=torch.long)
                m=torch.zeros(adv_batch_x.shape)
                v=torch.zeros(adv_batch_x.shape)

                for i in range(100):
                    adv_batch_x.requires_grad = True
                    pred = model(adv_batch_x)
                    model.zero_grad()
                    loss = nn.CrossEntropyLoss()(pred, target)
                    loss.backward()
                    grads = adv_batch_x.grad
                    with torch.no_grad():
                        t=i+1
                        m=0.9*m+0.1*grads
                        v=0.999*v+0.001*grads*grads
                        mhat=m/(1.0 - 0.9**t)
                        vhat=v/(1.0 - 0.999**t)
                        grads=mhat / (torch.sqrt(vhat) + 1e-8)
                        adv_batch_x = adv_batch_x - alpha * grads.sign()
                        eta = torch.clamp(adv_batch_x - batch_x, min=-epsilon, max=epsilon)
                        adv_batch_x = torch.clamp(batch_x + eta, min=min_val, max=max_val).detach().clone()

                adv_batch_x_all[start_idx:end_idx] = adv_batch_x.numpy()
                label_all[start_idx:end_idx] = batch_y.numpy()

            accuracy = test(model_1, adv_batch_x_all, label_all, 512)
            #accuracy_B = test(model_2, adv_batch_x_all, label_all, 512)
            all_accs[eps] = accuracy

        accs.append(all_accs)

    return accs

In [94]:
#load MNIST
dataset_train = datasets.MNIST('../data', train=True, download=True)
dataset_test = datasets.MNIST('../data', train=False, download=True)

# reshape MNIST
x_train=dataset_train.data.numpy()
y_train=dataset_train.targets.numpy()
x_test=dataset_test.data.numpy()
y_test=dataset_test.targets.numpy()
x_train=np.reshape(x_train,(60000,28,28,1))
x_test=np.reshape(x_test,(10000,28,28,1))
x_train=np.swapaxes(x_train, 1, 3)
x_test=np.swapaxes(x_test, 1, 3)


#REMINDER: the range of inputs is different from what we used in the recitation
print (x_test.min(),x_test.max())

modelA=Net()
modelA.load_state_dict(torch.load("modelA.zip"))
#accuracy_A_no_attack = test(modelA,x_test,y_test,512)
modelB=Net()
modelB.load_state_dict(torch.load("modelB.zip"))
#accuracy_B_no_attack = test(modelB,x_test,y_test,512)

0 255


<All keys matched successfully>

In [95]:
# Task - 2 - Targeted attack

indices_label_1 = np.where(y_test == 1)[0]
x_test_1s = x_test[indices_label_1]
y_test_1s = y_test[indices_label_1]
all_accs_task_2 = targeted_attack(x_test_1s, y_test_1s, modelA, modelB, x_test_1s.min(), x_test_1s.max())
print(all_accs_task_2)

(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
[{8: 0.9806167400881057, 16: 0.5612334801762114, 24: 0.000881057268722467, 32: 0.0, 40: 0.0, 48: 0.0, 56: 0.0}, {8: 0.9938325991189427, 16: 0.9920704845814978, 24: 0.986784140969163, 32: 0.9700440528634361, 40: 0.9330396475770925, 48: 0.8202643171806168, 56: 0.641409691629956}]


In [98]:
def targeted_attack_improved(x, y, model_1, model_2, min_val, max_val):
    #TODO
    
    #alpha = 1.5
    #weight_decay = 1e-4
    batch_size = 512
    num_batches = (len(x) + batch_size - 1) // batch_size
    accs = []
    models = [model_1, model_2]

    for model in models:
        all_accs = {}
        for eps in range(8, 64, 8):
            #epsilon = eps / 255.
            epsilon = eps

            adv_batch_x_all = np.zeros((len(x), x.shape[1], x.shape[2], x.shape[3]))
            print(adv_batch_x_all.shape)
            label_all = np.zeros((len(y),))

            for batch_idx in range(num_batches):
                start_idx = batch_idx * batch_size
                end_idx = min(start_idx + batch_size, len(x))
                batch_x = torch.tensor(x[start_idx:end_idx]).float()
                batch_y = torch.tensor(y[start_idx:end_idx]).long()
                adv_batch_x = batch_x.detach().clone()
                target = torch.full((end_idx - start_idx,), 8, dtype=torch.long)
                m=torch.zeros(adv_batch_x.shape)
                v=torch.zeros(adv_batch_x.shape)
                alpha = 10

                for i in range(100):
                    adv_batch_x.requires_grad = True
                    pred = model(adv_batch_x)
                    model.zero_grad()
                    loss = nn.CrossEntropyLoss()(pred, target)
                    loss.backward()
                    grads = adv_batch_x.grad
                    with torch.no_grad():
                        t=i+1
                        m=0.9*m+0.1*grads
                        v=0.999*v+0.001*grads*grads
                        mhat=m/(1.0 - 0.9**t)
                        vhat=v/(1.0 - 0.999**t)
                        grads=mhat / (torch.sqrt(vhat) + 1e-8)
                        adv_batch_x = adv_batch_x - alpha * grads.sign()
                        eta = torch.clamp(adv_batch_x - batch_x, min=-epsilon, max=epsilon)
                        adv_batch_x = torch.clamp(batch_x + eta, min=min_val, max=max_val).detach().clone()
                    
                    
                    alpha = alpha / (2 ** (i/20))
                    
                adv_batch_x_all[start_idx:end_idx] = adv_batch_x.numpy()
                label_all[start_idx:end_idx] = batch_y.numpy()

            accuracy = test(model_1, adv_batch_x_all, label_all, 512)
            #accuracy_B = test(model_2, adv_batch_x_all, label_all, 512)
            all_accs[eps] = accuracy

        accs.append(all_accs)

    return accs

In [99]:
# Task - 3 - Targeted attack - Optimized

all_accs_task_3 = targeted_attack_improved(x_test_1s, y_test_1s, modelA, modelB, x_test_1s.min(), x_test_1s.max())
print(all_accs_task_3)

(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
(1135, 1, 28, 28)
[{8: 0.9770925110132158, 16: 0.7066079295154185, 24: 0.02643171806167401, 32: 0.0, 40: 0.0, 48: 0.0, 56: 0.0}, {8: 0.9947136563876652, 16: 0.9955947136563876, 24: 0.9859030837004406, 32: 0.9770925110132158, 40: 0.9550660792951542, 48: 0.9136563876651982, 56: 0.8255506607929516}]
