In [None]:
import os
import glob
from PIL import Image
from tqdm import tqdm
import random

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torchvision as tv

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
ROOT_DIR = ""
IMG_SIZE = 64
BATCH_SIZE = 128
LATENT_DIMS = 16
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
train_csv = pd.read_csv(ROOT_DIR + "Train.csv")
test_csv = pd.read_csv(ROOT_DIR + "Test.csv")

train_files = train_csv[["Path", "ClassId"]]
test_files = test_csv[["Path", "ClassId"]]

In [None]:
tfms = tv.transforms.Compose([tv.transforms.Resize((IMG_SIZE, IMG_SIZE)), tv.transforms.ToTensor()])
filenames = [os.path.join(dirpath,filename) for dirpath, _, filenames in os.walk(ROOT_DIR + "Train/") for filename in filenames if filename.endswith('.png')]

In [None]:
# Load data into memory

file_arr = []
for i in tqdm(range(len(filenames))):
    image = Image.open(filenames[i])
    tens = tfms(image)
    conv_filename = filenames[i].split("gtsrb/")[-1]
    class_id = int(train_files[train_files["Path"] == conv_filename]["ClassId"].astype(int))
    tens_id_arr = [tens, class_id]
    file_arr.append(tens_id_arr)

In [None]:
# make sure that classes are mixed before splitting array into train and validation set

random.shuffle(file_arr)

train_files = file_arr[:-1000]
valid_files = file_arr[-1000:]

In [None]:
class TSDataset(Dataset):
    def __init__(self, files, transform=None):
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        x = self.files[idx][0]
        label = self.files[idx][1]
            
        return x, label

In [None]:
training_data = TSDataset(train_files, tfms)
valid_data = TSDataset(valid_files, tfms) 

In [None]:
train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE, shuffle=True)
# shuffle = false to be able to compare output(-improvements) during training
valid_dataloader = DataLoader(valid_data, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)


class UnFlatten(nn.Module):
    def forward(self, input, size=1024):
        return input.view(input.size(0), size, 1, 1)

In [None]:
# https://www.kaggle.com/code/muhammad4hmed/anime-vae/notebook

class CVAE(nn.Module):
    def __init__(self, image_channels=3, h_dim=1024, z_dim=16):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(image_channels, 32, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(128, 256, kernel_size=4, stride=2),
            nn.ReLU(),
            Flatten()
        )
        
        self.signclass_embedding = nn.Embedding(43, 10)
        
        self.h2mu = nn.Linear(h_dim, z_dim)
        self.h2sigma = nn.Linear(h_dim, z_dim)
        self.z2h = nn.Linear(z_dim + 10, h_dim)
        
        self.decoder = nn.Sequential(
            UnFlatten(),
            nn.ConvTranspose2d(h_dim, 128, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, image_channels, kernel_size=6, stride=2),
            nn.Sigmoid(),
        )
        
    # Enforce latent space well-formedness by ijnecting random gaussian noise    
    def reparameterize(self, mu, logvar):          
        std = logvar.mul(0.5).exp_()
        eps = torch.randn(*mu.size()).to(DEVICE)
        z = mu + std * eps
        return z
    
    def bottleneck(self, h, label):
        mu = self.h2mu(h)
        logvar = self.h2sigma(h)
        z = self.reparameterize(mu, logvar)
        return z, mu, logvar
        
    def encode(self, x, label):
        return self.bottleneck(self.encoder(x), label)[0]

    def decode(self, z):
        return self.decoder(self.z2h(z))
    
    def forward(self, x, label):
        h = self.encoder(x)
        z_small, mu, logvar = self.bottleneck(h, label)     
        signclass = self.signclass_embedding(label.long())
        signclass = signclass.squeeze(dim=1)
        z_small_cat = torch.cat([z_small, signclass], dim=1)
        z = self.z2h(z_small_cat)
        return self.decoder(z), mu, logvar, z_small, z

# Transfer test

In [None]:
# Benchmark classifier
# https://github.com/poojahira/gtsrb-pytorch

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # CNN layers
        self.conv1 = nn.Conv2d(3, 100, kernel_size=5)
        self.bn1 = nn.BatchNorm2d(100)
        self.conv2 = nn.Conv2d(100, 150, kernel_size=3)
        self.bn2 = nn.BatchNorm2d(150)
        self.conv3 = nn.Conv2d(150, 250, kernel_size=3)
        self.bn3 = nn.BatchNorm2d(250)
        self.conv_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(250*2*2, 350)
        self.fc2 = nn.Linear(350, nclasses)

        self.localization = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=7),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True),
            nn.Conv2d(8, 10, kernel_size=5),
            nn.MaxPool2d(2, stride=2),
            nn.ReLU(True)
            )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 4 * 4, 32),
            nn.ReLU(True),
            nn.Linear(32, 3 * 2)
            )
   
        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))


    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 10 * 4 * 4)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)
        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)
        return x

    def forward(self, x):
        # transform the input
        x = F.interpolate(x, size=(32,32), mode='bilinear')
        x = self.stn(x)

        # Perform forward pass
        x = self.bn1(F.max_pool2d(F.leaky_relu(self.conv1(x)),2))
        x = self.conv_drop(x)
        x = self.bn2(F.max_pool2d(F.leaky_relu(self.conv2(x)),2))
        x = self.conv_drop(x)
        x = self.bn3(F.max_pool2d(F.leaky_relu(self.conv3(x)),2))
        x = self.conv_drop(x)
        x = x.view(-1, 250*2*2)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [None]:
# Local classifier

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv3 = nn.Conv2d(20, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 43)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv3(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [None]:
# Load pre-trained models for classifier and cvae

classifier_local = Classifier()
classifier_bench = Net()
cvae = CVAE()

classifier_local.eval()
classifier_bench.eval()
cvae.eval()

classifier_local.load_state_dict(torch.load(""))
classifier_bench.load_state_dict(torch.load(""))
cvae.load_state_dict(torch.load(""))

classifier_local.to(DEVICE);
classifier_bench.to(DEVICE);
cvae.to(DEVICE)

In [None]:
# Ensemble architecture (combining cvae and classifier)

class Ensemble(nn.Module):
    def __init__(self, embeddings, upscaler, decoder, classifier):
        super(Ensemble, self).__init__()
        self.embeddings = embeddings
        self.upscaler = upscaler
        self.decoder = decoder
        self.classifier = classifier
        
    def forward(self, z, label):
        enc_label = self.embeddings(label.long())
        enc_label = enc_label.squeeze(dim=1)
        x = torch.cat((z, enc_label), dim=1)
        x = self.upscaler(x)
        x = self.decoder(x)
        x = self.classifier(x)
        return x
    
    def get_img(self, z, label):
        enc_label = self.embeddings(label.long())
        x = torch.cat((z, enc_label), dim=1)
        x = self.upscaler(x)
        x = self.decoder(x)
        return x

In [None]:
# Load cvae and classifier into ensemble

embeddings, upscaler, decoder = cvae.extract_model()
ensemble = Ensemble(embeddings, upscaler, decoder, classifier)
ensemble.to(DEVICE);

In [None]:
# Calculate adversarial example
# https://adversarial-ml-tutorial.org/adversarial_training/

def pgd_linf(model, X, y, epsilon, alpha, num_iter):
    delta = torch.zeros_like(X, requires_grad=True)
    for t in range(num_iter):
        pred = model(X + delta, y)
        loss = nn.CrossEntropyLoss()(pred, y.squeeze(dim=1))
        loss.backward(retain_graph=True)
        delta.data = (delta + alpha*delta.grad.detach().sign()).clamp(-epsilon,epsilon)
        delta.grad.zero_()
    return delta.detach()

In [None]:
cvae_data, cvae_labels = next(iter(train_dataloader))
cvae_data, cvae_labels = cvae_data.to(DEVICE), cvae_labels.to(DEVICE)
cvae_labels = cvae_labels.unsqueeze(dim=1)
cvae_data.shape, cvae_labels.shape

In [None]:
# Calculate adversarial perturbations

mu_local = mu
delta = pgd_linf(ensemble, mu_, cvae_labels, epsilon=0.2, alpha=2e-2, num_iter=50)
yp = ensemble(mu_local + delta, cvae_labels)
prob_orig = F.softmax(yp)

mu_adv = mu_local + delta

danger_array = prob_orig.gather(1, cvae_labels).squeeze(dim=1)
l2_array = torch.linalg.norm(mu_local, dim=1, ord=2)

In [None]:
# Get the 200 most dangerous examples

danger_order = torch.argsort(danger_array, dim=0)
top = danger_order[:200]

ordered_200 = danger_array[danger_order][:200]
ordered_200 = ordered_200.cpu().detach()

In [None]:
# Save the outputs of the predicted true class probabilities of the 200 most adversarial examples (ordered_200) and the examples themselves
# Perform the same procedure for the other classifier
# Plot the transfered losses

# Investigate extremes

In [None]:
# plot magnitudes
plt.scatter(linf_array, danger_array, c ="blue")
plt.xlabel('L-Infinity Norm')
plt.ylabel('Flipped Classes (of 43)')
plt.show()

In [None]:
mu_range = torch.zeros((160, 16)).to(DEVICE)

# for every dimension, insert the range -X SDs to X SD
for i in range(16):
    tens = torch.range(-3.25, 3.5, 0.75).to(DEVICE)
    mu_range[i*10:(i+1)*10, i] = tens

In [None]:
# Plot all dimensions (here: 16 along specified range)

yp = ensemble(mu_range, labels)

imgs = ensemble.get_img((mu_range), labels.squeeze(dim=1))
imgs = imgs.detach().cpu().numpy()
imgs = imgs.transpose(0, 2, 3, 1)
plot_images(imgs, labels, yp, 16, 10)