In [1]:
import foolbox
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
import json
from models import Model # Define different models to adversarially train
from helpers import adv_train, adv_eval
from advertorch.attacks import LinfPGDAttack
    
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

with open('config.json') as config_file:
    config = json.load(config_file)
    
model = Model()
modeln = "madryCNN"
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Adversary used to train the network
train_adv = LinfPGDAttack(
                model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
                eps=config['epsilon'], nb_iter=config['k'],
                eps_iter=config['a'],  rand_init=config['random_start'], 
                clip_min=0.0, clip_max=1.0, targeted=False)

# Adversary used to test the network (the difference is the k_eval)
test_adv = LinfPGDAttack(
                model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), 
                eps=config['epsilon'], nb_iter=config['k_eval'],
                eps_iter=config['a'],  rand_init=config['random_start'], 
                clip_min=0.0, clip_max=1.0, targeted=False)
# Train the clean version
#adv_train(model, modeln, optimizer, device, config, False)
# Train the adversarial version
#adv_train(model, modeln, optimizer, device, config, True, train_adv)

In [2]:
import os
model = Model()
# Load model from serialized file from training
model.load_state_dict(
    torch.load(os.path.join(config['model_dir'], 'mnist_madryCNN_clntrained.pt')))
model.to(device)
model.eval()
                                   
adv_eval(model, device, test_adv, config['eval_batch_size'], True)



Test set: avg cln loss: 0.0553, cln acc: 9910/10000 (99%)

Test set: avg adv loss: 0.0343, adv acc: 9898/10000 (99%)



In [None]:
# # instantiate the model
# model = Model()
# if torch.cuda.is_available():
#     model = model.cuda()
    
# # Model, bounds of pixel values, # prediction classes, 
# # Elementise subtract from the input the mean and then divide by std
# mean = .1307 # TODO: change later 0,1 for no change
# std = .3081
# fmodel = foolbox.models.PyTorchModel(
#     model, bounds=(0, 1), num_classes=10, preprocessing=(mean, std))

# # get source image and label
# image, label = foolbox.utils.imagenet_example(data_format='channels_first')
# image = image / 255.  # because our model expects values in [0, 1]

# print('label', label)
# print('predicted class', np.argmax(fmodel.predictions(image)))

# # apply attack on source image
# attack = foolbox.attacks.FGSM(fmodel)
# adversarial = attack(image, label)

# print('adversarial class', np.argmax(fmodel.predictions(adversarial)))