In [1]:
%load_ext autoreload
%autoreload 2

from models import *
from helpers import *
from torchattacks import PGD, FGSM

import os, sys
current_dir = os.getcwd()
path_to_append = os.path.join(current_dir, "configs")
if path_to_append not in sys.path:
    sys.path.append(path_to_append)

In [2]:
from importlib import import_module
def load_configs_from_file(config_file):
    configs = import_module(config_file.split(".")[0])
    
    return configs.model_path, configs.hiddens_config, configs.batch_size, configs.epsilon, configs.T, configs.c, configs.lr, configs.lr_sigma, \
        configs.lr_c, configs.perturb_freq, configs.noise_scale, configs.grad_threshold, configs.c_reduce_rate

"""Can replace with `adv_MNIST_clean_config` or `adv_MNIST_pgd_config`"""
# config_filename = "adv_MNIST_clean_config" 
config_filename = "adv_MNIST_pgd_config" 

input_dim, input_size = 1, 28 # MNIST images
model_path, hiddens_config, batch_size, epsilon, T, c, lr, lr_sigma, lr_c, perturb_freq, noise_scale, grad_threshold, c_reduce_rate  = load_configs_from_file(config_filename)

In [3]:
model = CNN(input_size, input_dim,
            hiddens_config)
model.load_state_dict(torch.load(model_path))
print ("Classifier loaded!\nEvaluating...")

_, test_loader, _ = load_MNIST_dataset(batch_size = batch_size)

entropy_loss = nn.CrossEntropyLoss()
fc_model = convert_CNN_to_FC(model, [28, 28, 1])

"""Initializing PGD attacks"""
pgd_atk = PGD(model, eps=epsilon, steps=1000)

num_correct, num_all = 0, 0
for images, labels in test_loader: 
    pred_logits = model(images.cuda())
    num_all += labels.shape[0]
    num_correct += (torch.argmax(pred_logits, axis = 1) == labels.cuda()).sum()
print ("\nValidation accuracy: {:.3f}%".format(num_correct / num_all * 100))

Classifier loaded!
Evaluating...

Validation accuracy: 98.940%


In [4]:
image, label = draw_image_randomly(test_loader)
clean_logits = model(image.reshape([1, *image.shape]))
clean_loss = entropy_loss(clean_logits,
                          label.reshape([1, *label.shape]))

print ("Randomly draw an image...")
print ("\tClean classification label: {}\n\tGround truth label: {}, objective: {:.3f}\n".format(torch.argmax(clean_logits[0]), 
                                                                                                label, clean_loss))

attacked_image = pgd_atk(image.reshape([1, *image.shape]), 
                         label.reshape([1, *label.shape]))
pgd_loss = entropy_loss( model(attacked_image)[0], label)
pgd_label = torch.argmax(model(attacked_image)[0])
print ("Epsilon: {}\n".format(epsilon))
print ("PGD attacking...")
print ("\tClassification label: {}\n\tobjective: {:.3f}\n".format(pgd_label, pgd_loss))

attacked_image_ncvx, obj_cvx = adversarial_attack_nonconvexOpt(image.reshape([-1]).cuda(), 
                                                 label.cuda(), fc_model, 
                                                 epsilon, T, lr, lr_sigma,
                                                 c = c, lr_c = lr_c,
                                                 perturb_freq = perturb_freq,
                                                 noise_scale = noise_scale,
                                                 grad_threshold = grad_threshold,
                                                 c_reduce_rate = c_reduce_rate, set_proper_sigma_freq = 16)
print ("ADR-GD attacking...")
adagd_label = torch.argmax(model(attacked_image_ncvx.reshape([1, 28, 28])))
print ("\tClassification label: {}\n\tobjective: {:.3f}\n".format(adagd_label, obj_cvx))


Randomly draw an image...
	Clean classification label: 6
	Ground truth label: 6, objective: 0.000

Epsilon: 0.25

PGD attacking...
	Classification label: 6
	objective: 0.174

ADR-GD attacking...
	Classification label: 8
	objective: 4.110



In [7]:
image, label = draw_image_randomly(test_loader)
clean_logits = model(image.reshape([1, *image.shape]))
clean_loss = entropy_loss(clean_logits,
                          label.reshape([1, *label.shape]))

print ("Randomly draw an image...")
print ("\tClean classification label: {}\n\tGround truth label: {}, objective: {:.3f}\n".format(torch.argmax(clean_logits[0]), 
                                                                                                label, clean_loss))

attacked_image = pgd_atk(image.reshape([1, *image.shape]), 
                         label.reshape([1, *label.shape]))
pgd_loss = entropy_loss( model(attacked_image)[0], label)
pgd_label = torch.argmax(model(attacked_image)[0])
print ("Epsilon: {}\n".format(epsilon))
print ("PGD attacking...")
print ("\tClassification label: {}\n\tobjective: {:.3f}\n".format(pgd_label, pgd_loss))

attacked_image_ncvx, obj_cvx = adversarial_attack_nonconvexOpt(image.reshape([-1]).cuda(), 
                                                 label.cuda(), fc_model, 
                                                 epsilon, T, lr, lr_sigma,
                                                 c = c, lr_c = lr_c,
                                                 perturb_freq = perturb_freq,
                                                 noise_scale = noise_scale,
                                                 grad_threshold = grad_threshold,
                                                 c_reduce_rate = c_reduce_rate, set_proper_sigma_freq = 16)
print ("ADR-GD attacking...")
adagd_label = torch.argmax(model(attacked_image_ncvx.reshape([1, 28, 28])))
print ("\tClassification label: {}\n\tobjective: {:.3f}\n".format(adagd_label, obj_cvx))


Randomly draw an image...
	Clean classification label: 4
	Ground truth label: 4, objective: 0.000

Epsilon: 0.25

PGD attacking...
	Classification label: 4
	objective: 0.160

ADR-GD attacking...
	Classification label: 8
	objective: 7.116

