## General setup and import functions

In [0]:
from __future__ import print_function, division
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import pandas as pd
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import matplotlib.pyplot as plt
import time
import os
import copy
from PIL import Image
import random

plt.ion()   # interactive mode

In [74]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [0]:
cifar10_mean = [0.49139968 , 0.48215841 , 0.44653091]
cifar10_std = [0.24703223,  0.24348513 , 0.26158784]

In [76]:
transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(cifar10_mean,cifar10_std)
])

testset = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform=transform_test)

testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

Files already downloaded and verified


#### Helper functions

In [0]:
class mymodel(nn.Module):
    """Custom Model"""

    def __init__(self, in_features, hidden_dim, n_classes):
        """Custom Model Builder."""
        super(mymodel, self).__init__()
        
        self.fc1 = nn.Linear(in_features, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim//2)
        self.fc3 = nn.Linear(hidden_dim//2, n_classes)
   
  
    def forward(self, images):
        """Perform forward."""
        
        images_vec = images.view(images.size(0), -1)
        hidden_out = F.relu(self.fc1(images_vec))
        hidden_out = F.relu(self.fc2(hidden_out))
        scores = self.fc3(hidden_out)

        return scores

In [0]:
def calculate_accuracy(model, loader, print_freq=1000, attack=False):
    model.eval()
    total = 0
    correct = 0
    # with torch.no_grad():
    for data in loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)

        if attack:
            images = create_adversarial(model, images, labels)
        outputs = model(images)
        _,predicted = torch.max(outputs.data,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()

    if total % print_freq == 0:
        print("Current Accuracy: {} / {} = {}".format(correct, total, correct/total*100))
    return (correct/total)*100, total

In [0]:
# FGSM Gradient attack code
def gradient_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

## Atack functions

### FGSM Attack

In [0]:
# FGSM Attack
def fgsm_attack(model, data, target, loss_fn, epsilon):
    data.requires_grad = True
    
    output = model(data)
    loss = loss_fn(output,target) 
    model.zero_grad()
    loss.backward()
    data_grad = data.grad.data
    perturbed_data = gradient_attack(data, epsilon, data_grad)

    return perturbed_data

### Transferability Attack

Load pretrained model on CIFAR-10

In [88]:
# Load the proxy model for Transfer attack
proxy_model = torch.load("proxy_model.pt", map_location=torch.device(device))

# We don't want to train the model, so tell PyTorch not to compute gradients
# with respect to model parameters.
for param in proxy_model.parameters():
    param.requires_grad = False



In [0]:
# proxy_acc, data_size = calculate_accuracy(proxy_model, testloader)
# print(proxy_acc, data_size)

81.91000000000001 10000


In [0]:
def transfer_attack(data, target, loss_fn, epsilon):
    # Attack setup
    data.requires_grad = True
    
    proxy_output = proxy_model(data)
    # Calculate the loss, forward pass
    loss = loss_fn(proxy_output, target)
    # Zero all existing gradients
    proxy_model.zero_grad()
    # Calculate gradients of model in backward pass
    loss.backward()
    # Collect datagrad
    data_grad = data.grad.data
    
    # Call FGSM Attack
    perturbed_data = gradient_attack(data, epsilon, data_grad)
    
    return perturbed_data

### CW Attack

In [83]:
pip install foolbox

Collecting foolbox
[?25l  Downloading https://files.pythonhosted.org/packages/d2/fa/4c7e7caf3caa11b63acb84d280e2a589d91aeca27edd5023cc19aa2441fc/foolbox-2.2.1.tar.gz (1.9MB)
[K     |▏                               | 10kB 21.5MB/s eta 0:00:01[K     |▍                               | 20kB 2.2MB/s eta 0:00:01[K     |▌                               | 30kB 3.2MB/s eta 0:00:01[K     |▊                               | 40kB 2.1MB/s eta 0:00:01[K     |▉                               | 51kB 2.6MB/s eta 0:00:01[K     |█                               | 61kB 3.1MB/s eta 0:00:01[K     |█▎                              | 71kB 3.6MB/s eta 0:00:01[K     |█▍                              | 81kB 4.0MB/s eta 0:00:01[K     |█▋                              | 92kB 4.5MB/s eta 0:00:01[K     |█▊                              | 102kB 3.4MB/s eta 0:00:01[K     |██                              | 112kB 3.4MB/s eta 0:00:01[K     |██                              | 122kB 3.4MB/s eta 0:00:01[K 

In [0]:
def cw_attack(model, images, labels):
  import foolbox
  pymodel = foolbox.models.PyTorchModel(model, bounds=(0, 255), num_classes=10)
  # images, labels = foolbox.utils.samples(dataset='cifar10', batchsize=16, data_format='channels_first', bounds=(0, 1))
  attack = foolbox.attacks.CarliniWagnerL2Attack(pymodel)
  adversarials = attack(images, labels)
  return adversarials


### Create Adversarial samples from one of the three attacks

In [0]:
def create_adversarial(model, images, labels, epsilon=0.1):
    loss_fn = nn.CrossEntropyLoss()
    idx = random.uniform(0,1)
    ##############################################################
    #########  Different attack functions called here  ###########
    ##############################################################
    
    if idx < 0.33:
        perturbed_images = fgsm_attack(model, images, labels, loss_fn, epsilon)
    elif idx < 0.66:
        perturbed_images = cw_attack(model, images, labels)
    else:
        perturbed_images = transfer_attack(images, labels, loss_fn, epsilon)

    return perturbed_images

### Load the target model and compare accuracies


In [0]:
model = torch.load("mymodel.pt", map_location=torch.device(device))

# We don't want to train the model, so tell PyTorch not to compute gradients
# with respect to model parameters.
for param in model.parameters():
    param.requires_grad = False

##### Target Model accuracy (no attack)

In [0]:
model_acc, data_size = calculate_accuracy(model, testloader)
print(model_acc, data_size)

90.36 10000


#### Model accuracy (after attack)

Do we need the labels from the test dataset. Can we attack without that?        
Should we always attack and not only when the label is correct?

In [0]:
def attack_accuracy(model, loader, print_freq=1000, epsilon=0.1):
    model.eval()
    total = 0
    correct = 0

    for data in loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        images = create_adversarial(model, images, labels, epsilon)

        with torch.no_grad():
          outputs = model(images)

        _,predicted = torch.max(outputs.data,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()

        if total % print_freq == 0:
            print("Current Accuracy: {} / {} = {}".format(correct, total, correct/total*100))
    return (correct/total)*100, total

In [86]:
# Calculate model accuracy after attack 
# 0.3 FGSM - 0.7 Transfer
attack_acc, data_size = calculate_accuracy(model, testloader, attack=True)
print(attack_acc, data_size)

Current Accuracy: 1560 / 10000 = 15.6
15.6 10000


In [96]:
# Calculate model accuracy after attack 
# 0.3 FGSM - 0.7 Transfer
attack_acc, data_size = attack_accuracy(model, testloader, print_freq=1000, epsilon=0.15)
print(attack_acc, data_size)

Current Accuracy: 1341 / 10000 = 13.41
13.41 10000


In [106]:
# Calculate model accuracy after attack 
# 50-50 FGSM-Transfer
attack_acc, data_size = attack_accuracy(model, testloader, print_freq=1000, epsilon=0.15)
print(attack_acc, data_size)

Current Accuracy: 122 / 1000 = 12.2
Current Accuracy: 252 / 2000 = 12.6
Current Accuracy: 407 / 3000 = 13.566666666666666
Current Accuracy: 543 / 4000 = 13.575000000000001
Current Accuracy: 672 / 5000 = 13.44
Current Accuracy: 788 / 6000 = 13.133333333333333
Current Accuracy: 924 / 7000 = 13.200000000000001
Current Accuracy: 1076 / 8000 = 13.450000000000001
Current Accuracy: 1203 / 9000 = 13.366666666666665
Current Accuracy: 1340 / 10000 = 13.4
13.4 10000


In [0]:
# Calculate model accuracy after attack 
# 33-33-33 FGSM-Transfer-CW
attack_acc, data_size = attack_accuracy(model, testloader, print_freq=1000, epsilon=0.15)
print(attack_acc, data_size)