## Generate adversarial examples

author: Christina Funke

In [None]:
import foolbox # pip3 install foolbox==1.8.0
import numpy as np
import torch
import os
import torchvision.models as models
import torch.nn as nn
import sys
sys.path.append(os.path.abspath('../network/'))
import cc_utils
import my_models
from PIL import Image
import matplotlib.pyplot as plt
DEVICE  = torch.device('cuda')
TOP_DIR = '../'

In [None]:
class two_classes_from_one_neuron(nn.Module):
    """
    get two classes from one output neuron, such that one is -logit and the other is +logit
    """

    def __init__(self):
        super(two_classes_from_one_neuron, self).__init__()

    def forward(self, x):
        x = x.repeat(1, 2)
        x[0][0] = -x[0][0]
        return x

In [None]:
# load model
resnet = my_models.load_model("resnet50")
exp_name = "resnet50_lr0.0003_numtrain14000_augment1_unique_batchsize64_optimizerAdam_contrast0_reg0_otf0_cropmargin1_5152019_v0"
resnet.load_state_dict(torch.load(TOP_DIR + "network/cc_checkpoints/" + exp_name + "/best_prec.pt"))
new_resnet = nn.Sequential(resnet, two_classes_from_one_neuron())
new_resnet.eval()

# define preprocessing
mean = np.expand_dims(np.expand_dims(np.array([0.485, 0.456, 0.406]) * 255, 1), 1).repeat(256, 1).repeat(256, 2)
std = np.expand_dims(np.expand_dims(np.array([0.229, 0.224, 0.225]) * 255, 1), 1).repeat(256, 1).repeat(256, 2)
preprocessing = (mean, std)

# transfer to foolbox model
fmodel = foolbox.models.PyTorchModel(new_resnet, bounds=(0, 255), num_classes=2, channel_axis=2, preprocessing=preprocessing,)

# loop over different images
for n in [2002]:
    fig_dir = TOP_DIR + "figures/"
    
    # load image
    dataloader, dataset = cc_utils.load_dataset_cc(
        set_num="1", contrast="contrast0", batch_size=1, split="val", prep_method="orig", num_trainimages=None, dat_augment=0, unique="pairs", return_dataset=1,
    )
    image = np.array(dataset[n][0])[16 : 288 - 16, 16 : 288 - 16, :]
    label = dataset[n][1]
    image = image.swapaxes(0, 2).swapaxes(1, 2)  # transform from (288, 288, 3) to (3, 288, 288)
    image = image.astype("float32")  # was 'uint8'
    print("image:", image.shape, image.min(), image.max())
    print("label: ", label)

    # apply attack on source image
    attack = foolbox.attacks.CarliniWagnerL2Attack(fmodel)
    adversarial = attack(image, label, max_iterations=1000, learning_rate=10e-3)

    if adversarial is None:
        print("attack failed: adversarial is None: no adversarial found")
    elif np.array_equal(adversarial, image):
        print("attack failed: they are equal: image was misclassified")
    else:
        # plot and save result
        plt.figure()

        plt.subplot(1, 3, 1)
        plt.title("Original")
        plt.imshow(image.swapaxes(1, 2).swapaxes(0, 2) / 255)  # division by 255 to convert [0, 255] to [0, 1]
        plt.axis("off")

        plt.subplot(1, 3, 2)
        plt.title("Adversarial")
        plt.imshow(adversarial.swapaxes(1, 2).swapaxes(0, 2) / 255)  # ::-1 to convert BGR to RGB
        plt.axis("off")

        plt.subplot(1, 3, 3)
        plt.title("Difference")
        difference = adversarial.swapaxes(1, 2).swapaxes(0, 2) - image.swapaxes(1, 2).swapaxes(0, 2)
        plt.imshow(difference / abs(difference).max() * 0.2 + 0.5)
        plt.axis("off")
        plt.savefig(fig_dir + "adversarial_n" + str(n) + ".pdf", dpi=512, bbox_inches="tight")

        plt.show()
