In [1]:
import torch, torchvision
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import tqdm
import os, pickle
import common

In [2]:
# Reproducibility
common.set_seed(156)

In [3]:
class NN(torch.nn.Module):
    def __init__(self, ni, nh, no):
        super(NN, self).__init__()
        self.A = torch.nn.Linear(ni, nh)
        self.relu = torch.nn.ReLU()
        self.B = torch.nn.Linear(nh, no)
        self.logsoftmax = torch.nn.LogSoftmax(dim=-1)
    def forward(self, x):
        # Two layer neural network
        x = self.B(self.relu(self.A(x)))
        x = self.logsoftmax(x)
        return x
class DeepNN(torch.nn.Module):
    def __init__(self, ni, nh, no):
        super(DeepNN, self).__init__()
        self.fwd = torch.nn.Sequential(
            torch.nn.Linear(ni, nh), torch.nn.ReLU(),
            torch.nn.Linear(nh, nh), torch.nn.ReLU(),
            torch.nn.Linear(nh, no),
            torch.nn.LogSoftmax(dim=-1))
    def forward(self, x):
        # Two layer neural network
        x = self.fwd(x)
        return x
class DeepNNwBN(torch.nn.Module):
    def __init__(self, ni, nh, no):
        super(DeepNNwBN, self).__init__()
        self.fwd = torch.nn.Sequential(
            torch.nn.BatchNorm1d(ni),
            torch.nn.Linear(ni, nh), torch.nn.ReLU(),
            torch.nn.BatchNorm1d(nh),
            torch.nn.Linear(nh, nh), torch.nn.ReLU(),
            torch.nn.BatchNorm1d(nh),
            torch.nn.Linear(nh, no),
            torch.nn.LogSoftmax(dim=-1))
    def forward(self, x):
        # Two layer neural network
        x = self.fwd(x)
        return x

In [4]:
# Train dataset
train_dataset = torchvision.datasets.MNIST('.', train=True, download=True,
                       transform=torchvision.transforms.ToTensor())
# Train data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
# Point estimate NN
net = NN(28*28, 1024, 10)

In [6]:
FFNN = torch.load("models/FFNN_ep10.pt")

## Generate Adversarial Examples

In [25]:
# Train dataset
train_dataset = torchvision.datasets.MNIST('.', train=True, download=True,
                       transform=torchvision.transforms.ToTensor())

# Train data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=torchvision.transforms.ToTensor())

# Test data loader with batch_size 1
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True, generator=torch.Generator().manual_seed(156))

In [26]:
# Get a batch and flatten the input
images, targets = next(iter(train_loader))
images = images.reshape(-1, 28*28)

In [38]:
torch.randint(0,9,(1,))

tensor([1])

In [52]:
def false_targets(targets):
    ftargets = []
    for target in targets:
        ftargets.append(torch.tensor(list(set(range(10)) - set([target])))[torch.randint(0,9,(1,))][0])
    return torch.stack(ftargets)

In [93]:
loss_fcn = torch.nn.NLLLoss()
model = FFNN
EPS = 0.05
success_rate = 0
adv_images_lst = []

for images, targets in train_loader:    
    loss_fcn.zero_grad()    
    # Collect noises (saliencies)
    falseTargets = false_targets(targets)
    # target = torch.tensor([1])
    # print("\r Processing " + str(k+1) + "/%s" % len(sampled_models), end="")
    adv_images      = images.clone()
    # adv_images.grad = None
    adv_images = adv_images.view(-1,28**2)
    adv_images.requires_grad = True
    preds = model(adv_images)
    loss  = loss_fcn(preds,falseTargets)
    loss.backward()
    new_images = otcm(adv_images, EPS, adv_images.grad.sign())
    # Forward pass on adv. example
    new_class = torch.argmax(model(new_images), dim=1)
    success_rate += torch.sum(new_class != targets)
    adv_images_lst.extend((new_images.reshape(-1,28, 28)*255).type(torch.uint8).detach())
success_rate = success_rate/len(train_loader.dataset.targets)


In [95]:
images_FF   = {'images': torch.stack(adv_images_lst),  'labels': train_loader.dataset.targets.detach()}
with open("" + 'train_images_FF0.05.pickle', 'wb') as handle:
    pickle.dump(images_FF, handle, protocol  = pickle.HIGHEST_PROTOCOL)

In [96]:
loss_fcn = torch.nn.NLLLoss()
model = FFNN
EPS = 0.05
success_rate = 0
adv_images_lst = []

for images, targets in test_loader:    
    loss_fcn.zero_grad()    
    # Collect noises (saliencies)
    falseTargets = false_targets(targets)
    # target = torch.tensor([1])
    # print("\r Processing " + str(k+1) + "/%s" % len(sampled_models), end="")
    adv_images      = images.clone()
    # adv_images.grad = None
    adv_images = adv_images.view(-1,28**2)
    adv_images.requires_grad = True
    preds = model(adv_images)
    loss  = loss_fcn(preds,falseTargets)
    loss.backward()
    new_images = otcm(adv_images, EPS, adv_images.grad.sign())
    # Forward pass on adv. example
    new_class = torch.argmax(model(new_images), dim=1)
    success_rate += torch.sum(new_class != targets)
    adv_images_lst.extend((new_images.reshape(-1,28, 28)*255).type(torch.uint8).detach())
success_rate = success_rate/len(test_loader.dataset.targets)


In [97]:
images_FF   = {'images': torch.stack(adv_images_lst),  'labels': test_loader.dataset.targets.detach()}
with open("" + 'test_images_FF0.05.pickle', 'wb') as handle:
    pickle.dump(images_FF, handle, protocol  = pickle.HIGHEST_PROTOCOL)

In [98]:
# Collect noises (saliencies)
# EPS = 0.18
saliencies      = []
how_many_fooled = []
# target = torch.tensor([1])
falseTarget = torch.tensor([0])
smax = torch.nn.LogSoftmax(dim=-1)

for model in sampled_models:
    # Forward pass
    # Compute loss w.r.t. an incorrect class
    # Note that we just have to ensure this class is different from targets
    # print("\r Processing " + str(k+1) + "/%s" % len(sampled_models), end="")
    adv_images = images.clone()
    adv_images = adv_images.view(-1,28**2)
    adv_images.grad = None
    adv_images.requires_grad = True
    preds = smax(model(adv_images))
    loss  = loss_fcn(preds,falseTargets)
    loss.backward()
    # Compute adversarial example
    new_images = otcm(adv_images, EPS, adv_images.grad.sign())
    # Forward pass on adv. example
    new_class = torch.argmax(smax(model(new_images)), dim=1) 
    if targets != new_class:
        # How many models can this adv. example fool? 
        how_many_fooled += sun([torch.argmax(smax(m(new_images)), dim=1) != targets for m in sampled_models])
        saliencies += [images.grad.sign().view(28, 28)]
# print("\nFinished")


NameError: name 'sampled_models' is not defined

In [16]:
loss_fcn = torch.nn.NLLLoss()
def generate_saliency(EPS,false_target,images,targets, model):
    # Collect noises (saliencies)
    # EPS = 0.18
    saliencies = []
    torch.set_printoptions(sci_mode=False)
    # target = torch.tensor([1])
    target = torch.tensor([target])
    for k in range(len(sampled_models)):
        # Forward pass
        # Compute loss w.r.t. an incorrect class
        # Note that we just have to ensure this class is different from targets
        # print("\r Processing " + str(k+1) + "/%s" % len(sampled_models), end="")
        adv_images      = images.clone()
        adv_images.grad = None
        adv_images.requires_grad = True
        preds = model(adv_images)
        loss  = loss_fcn(preds,targets)
        loss.backward()
        new_images = otcm(images, EPS, adv_images.grad.sign())
        # Forward pass on adv. example
        new_class = torch.argmax(model(new_images), dim=1)
    return saliencies, how_many_fooled
    

In [15]:
def forward_pass(model, images, loss_target = None):
    output = model(images)
    output = torch.nn.LogSoftmax(dim=-1)(output)
    which_class = torch.argmax(output).item()
    if loss_target:
        loss, target = loss_target
        loss(output, target).backward()
    return which_class

In [15]:
def otcm(images, eps, saliency):
    return torch.clamp(images.clone()-eps*saliency, 0, 1)

In [17]:
# How many models can an adversarial example fool?
def how_many_can_it_fool(sampled_models, eps, saliency,images):
    fool = 0
    for k in range(len(sampled_models)):
        # Forward pass on sampled model k
        old_class = forward_pass(sampled_models[k], images)
        # One step Target Class Method (OTCM); saliency is noise
        new_images = otcm(images, eps, saliency)
        # Forward pass again on adv. example
        new_class = forward_pass(sampled_models[k], new_images)
        # If we change the class, we fool the model
        fool += int(old_class != new_class)
    return fool/len(sampled_models)

In [18]:
def generate_saliency(EPS,target,images):
    # Collect noises (saliencies)
    # EPS = 0.18
    saliencies = []
    how_many_fooled = []
    torch.set_printoptions(sci_mode=False)
    # target = torch.tensor([1])
    target = torch.tensor([target])
    for k in range(len(sampled_models)):
        # Forward pass
        # Compute loss w.r.t. an incorrect class
        # Note that we just have to ensure this class is different from targets
        # print("\r Processing " + str(k+1) + "/%s" % len(sampled_models), end="")
        images.grad = None
        images.requires_grad = True
        old_class = forward_pass(sampled_models[k], images, [torch.nn.NLLLoss(), target])
        # Compute adversarial example
        new_images = otcm(images, EPS, images.grad.sign())
        # Forward pass on adv. example
        new_class = forward_pass(sampled_models[k], new_images)
        if old_class != new_class:
            # How many models can this adv. example fool?
            how_many_fooled += [how_many_can_it_fool(sampled_models, EPS, images.grad.sign(), images)]
            saliencies += [images.grad.sign().view(28, 28)]
    # print("\nFinished")
    return saliencies, how_many_fooled
    

In [19]:
def combine_saliencies(saliencies,success):
    # distributional saliency map
    saliencies = torch.stack(saliencies)
    # print(saliencies.shape)
    combined_med  = torch.zeros(28, 28)
    combined_mean = torch.zeros(28, 28)
    for i in range(28):
        for j in range(28):
            # choose median perturbation
            combined_med[i, j] = np.percentile(saliencies[:, i, j].numpy(), 50)
            combined_mean[i, j] = saliencies[:, i, j].mean().item()
    combined_med  = combined_med.flatten()
    combined_mean = combined_mean.flatten()
    champ         = saliencies[success.index(max(success))].flatten()
    return combined_med, combined_mean, champ
    

plt.figure()
plt.subplot(1, 3, 1)
plt.imshow(images.reshape(28, 28).detach().numpy())
plt.subplot(1, 3, 2)
plt.imshow((combined_med*EPS).reshape(28, 28).detach().numpy(), vmin=-1., vmax=1.)
plt.subplot(1, 3, 3)
plt.imshow(new_images.reshape(28, 28).detach().numpy())
plt.show()

plt.figure()
plt.subplot(1, 3, 1)
plt.imshow(images.reshape(28, 28).detach().numpy())
plt.subplot(1, 3, 2)
plt.imshow((champ*EPS).reshape(28, 28).detach().numpy(), vmin=-1., vmax=1.)
plt.subplot(1, 3, 3)
plt.imshow(new_images.reshape(28, 28).detach().numpy())
plt.show()

## Generate Adversarial Examples

In [20]:
# Train dataset
train_dataset = torchvision.datasets.MNIST('.', train=True, download=True,
                       transform=torchvision.transforms.ToTensor())
# Train data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, 
                                          generator=torch.Generator().manual_seed(156))

In [21]:
len(train_loader)

469

In [50]:
EPS = 0.18
SAVE_DIR = "mnist_adv/"
# for i in range(len(train_dataset.targets)):
target_len = len(train_dataset.classes)
targets    = set(range(10))
counter    = 1 
successes  = []
for data in train_loader:
    images_med   = []
    images_mean  = []
    images_champ = []
    tru_labels   = []
    images, labels = data
    images = images.view(-1, 28*28)
    print("\r Batch %s" % counter, end="")
    for i in range(images.shape[0]): #
        # the real target
        target_org = labels[i].item() 
        # the target that wanted to be resulted in
        target     = int(np.random.choice(list(targets - set([target_org])),size=1))
        image      = images[i:i+1,:] 
        # generating saliency maps using each sampled network
        temp_sals, success = generate_saliency(EPS,target,image)
        successes.append(success)
        # combining maps into three types
        combined_med, combined_mean, champ = combine_saliencies(temp_sals,success)
        # creating image
        images_med.append(otcm(image, EPS, combined_med))
        images_mean.append(otcm(image, EPS, combined_mean))
        images_champ.append(otcm(image, EPS, champ))
        tru_labels.append(target_org)
    tru_labels   = torch.tensor(tru_labels)

    images_med   = (torch.vstack(images_med).reshape(-1,28, 28)*255).type(torch.uint8).detach()
    images_mean  = (torch.vstack(images_mean).reshape(-1,28, 28)*255).type(torch.uint8).detach()
    images_champ = (torch.vstack(images_champ).reshape(-1,28, 28)*255).type(torch.uint8).detach()
    images_med   = {'images': images_med,  'labels': tru_labels}
    images_mean  = {'images': images_mean, 'labels': tru_labels}
    images_champ = {'images': images_champ,'labels': tru_labels}
    
    
    with open(SAVE_DIR + 'train_images_med_%s.pickle'   % counter, 'wb') as handle:
        pickle.dump(images_med, handle, protocol  = pickle.HIGHEST_PROTOCOL)
    with open(SAVE_DIR + 'train_images_mean_%s.pickle'  % counter, 'wb') as handle:
        pickle.dump(images_mean, handle, protocol = pickle.HIGHEST_PROTOCOL)
    with open(SAVE_DIR + 'train_images_champ_%s.pickle' % counter, 'wb') as handle:
        pickle.dump(images_champ, handle, protocol= pickle.HIGHEST_PROTOCOL)
    counter += 1 
    if counter > 8:
        break


 Batch 8

## Generate Test Examples


In [38]:
# Test dataset
test_dataset = torchvision.datasets.MNIST('.', train=False, download=True,
                       transform=torchvision.transforms.ToTensor())

# Test data loader with batch_size 1
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True,)

In [51]:
EPS = 0.18
SAVE_DIR = "mnist_adv/"
# for i in range(len(train_dataset.targets)):
target_len = len(train_dataset.classes)
targets    = set(range(10))
counter    = 1 
successes  = []
for data in train_loader:
    images_med   = []
    images_mean  = []
    images_champ = []
    tru_labels   = []
    images, labels = data
    images = images.view(-1, 28*28)
    print("\r Batch %s" % counter, end="")
    for i in range(images.shape[0]): #
        # the real target
        target_org = labels[i].item() 
        # the target that wanted to be resulted in
        target     = int(np.random.choice(list(targets - set([target_org])),size=1))
        image      = images[i:i+1,:] 
        # generating saliency maps using each sampled network
        temp_sals, success = generate_saliency(EPS,target,image)
        successes.append(success)
        # combining maps into three types
        combined_med, combined_mean, champ = combine_saliencies(temp_sals,success)
        # creating image
        images_med.append(otcm(image, EPS, combined_med))
        images_mean.append(otcm(image, EPS, combined_mean))
        images_champ.append(otcm(image, EPS, champ))
        tru_labels.append(target_org)
    tru_labels   = torch.tensor(tru_labels)
    images_med   = (torch.vstack(images_med).reshape(-1,28, 28)*255).type(torch.uint8).detach()
    images_mean  = (torch.vstack(images_mean).reshape(-1,28, 28)*255).type(torch.uint8).detach()
    images_champ = (torch.vstack(images_champ).reshape(-1,28, 28)*255).type(torch.uint8).detach()
    images_med   = {'images': images_med,  'labels': tru_labels}
    images_mean  = {'images': images_mean, 'labels': tru_labels}
    images_champ = {'images': images_champ,'labels': tru_labels}
    
    
    with open(SAVE_DIR + 'test_images_med_%s.pickle'   % counter, 'wb') as handle:
        pickle.dump(images_med, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open(SAVE_DIR + 'test_images_mean_%s.pickle'  % counter, 'wb') as handle:
        pickle.dump(images_mean, handle, protocol=pickle.HIGHEST_PROTOCOL)
    with open(SAVE_DIR + 'test_images_champ_%s.pickle' % counter, 'wb') as handle:
        pickle.dump(images_champ, handle, protocol=pickle.HIGHEST_PROTOCOL)
    counter += 1
    if counter > 16:
        break

 Batch 16