In [1]:
import os
import yaml

import foolbox as fb
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

from adversarial_generators.fgsm import generate_adversarial_images
from model.VGG19 import VGG19
from preprocess.preprocess import load_data

# Config

In [2]:
with open('config/config.yml', 'r') as stream:
    config = yaml.safe_load(stream)

ROOT_DIRECTORY = os.path.dirname(os.path.abspath('__file__'))

# Load weights to model

In [3]:
input_shape = (config["img_height"], config["img_width"], 3)
model_a = VGG19(input_shape = input_shape, num_classes = config["num_classes"])
model_b = VGG19(input_shape = input_shape, num_classes = config["num_classes"])

eps = 0.05
path_to_weight_a = "../weights/{}_weight_a.h5".format(eps)
path_to_weight_b = "../weights/{}_weight_b.h5".format(eps)
model_a.load_weights(path_to_weight_a)
model_b.load_weights(path_to_weight_b)

# Load Data

In [4]:
path_to_set = os.path.join(ROOT_DIRECTORY, config["path_to_data"])
path_to_test_csv = os.path.join(ROOT_DIRECTORY, config["path_to_test_csv"])
(X_test, y_test) = load_data(path_to_test_csv, path_to_set, config["img_width"], config["img_height"])
X_test, _, y_test, _ = train_test_split(X_test, y_test, test_size = 0.5, random_state = 0)

loaded: 0
loaded: 500
loaded: 1000
loaded: 1500
loaded: 2000
loaded: 2500
loaded: 3000
loaded: 3500
loaded: 4000
loaded: 4500
loaded: 5000
loaded: 5500
loaded: 6000
loaded: 6500
loaded: 7000
loaded: 7500
loaded: 8000
loaded: 8500
loaded: 9000
loaded: 9500
loaded: 10000
loaded: 10500
loaded: 11000
loaded: 11500
loaded: 12000
loaded: 12500


# Compile Model

In [5]:
optimizer = tf.keras.optimizers.Adam(lr = config["learning_rate"], decay = config["learning_rate"] / (config["epochs"]))
loss = config["loss_function"]
metrics = config["metrics"]
model_a.compile(optimizer = optimizer, loss = loss, metrics = [metrics])
model_b.compile(optimizer = optimizer, loss = loss, metrics = [metrics])



# Normalize Data

In [6]:
X_test = X_test.astype("float32") / 255.0

# One-Hot Encode Target value

In [7]:
y_test = tf.keras.utils.to_categorical(y_test, config["num_classes"])

# Evaluate accuracy of both models

In [8]:
print('Model A')
model_a.evaluate(X_test, y_test)
print('Model B')
model_b.evaluate(X_test, y_test)

Model A
Model B


[0.047482799738645554, 0.9906571507453918]

# Test transferability

In [9]:
# From A to B
predictions = model_a.predict_on_batch(X_test)

correctly_classified = (np.argmax(predictions, axis = 1) == np.argmax(y_test, axis = 1))

X_correctly_classified, y_correctly_classified = X_test[correctly_classified], y_test[correctly_classified]

adversarial_images = generate_adversarial_images(X_correctly_classified, y_correctly_classified, eps, model_a).numpy()

new_predictions = model_a.predict_on_batch(adversarial_images)
indexes_of_wrong_images = (np.argmax(new_predictions, axis = 1) != np.argmax(y_correctly_classified, axis = 1))

# Get the images wrongly classified by model A
wrong_classified_images = adversarial_images[indexes_of_wrong_images]

model_b.evaluate(wrong_classified_images, y_correctly_classified[indexes_of_wrong_images])



[2.6356091499328613, 0.5927171111106873]

In [10]:
# From B to A
predictions = model_b.predict_on_batch(X_test)

correctly_classified = (np.argmax(predictions, axis = 1) == np.argmax(y_test, axis = 1))

X_correctly_classified, y_correctly_classified = X_test[correctly_classified], y_test[correctly_classified]

adversarial_images = generate_adversarial_images(X_correctly_classified, y_correctly_classified, eps, model_b).numpy()

new_predictions = model_b.predict_on_batch(adversarial_images)
indexes_of_wrong_images = (np.argmax(new_predictions, axis = 1) != np.argmax(y_correctly_classified, axis = 1))

# Get the images wrongly classified by model A
wrong_classified_images = adversarial_images[indexes_of_wrong_images]

model_a.evaluate(wrong_classified_images, y_correctly_classified[indexes_of_wrong_images])



[5.050490379333496, 0.1371428519487381]

# Using foolbox

In [11]:
(X_test, y_test) = load_data(path_to_test_csv, path_to_set, config["img_width"], config["img_height"])
X_test, _, y_test, _ = train_test_split(X_test, y_test, test_size = 0.5, random_state = 0) # Get the initial label instead of one-hot encoded version
X_test = X_test.astype("float32") / 255.0

loaded: 0
loaded: 500
loaded: 1000
loaded: 1500
loaded: 2000
loaded: 2500
loaded: 3000
loaded: 3500
loaded: 4000
loaded: 4500
loaded: 5000
loaded: 5500
loaded: 6000
loaded: 6500
loaded: 7000
loaded: 7500
loaded: 8000
loaded: 8500
loaded: 9000
loaded: 9500
loaded: 10000
loaded: 10500
loaded: 11000
loaded: 11500
loaded: 12000
loaded: 12500


In [12]:
preprocessing = dict()
bounds = (0, 1)
fmodel_a = fb.TensorFlowModel(model_a, bounds=bounds, preprocessing=preprocessing)
fmodel_b = fb.TensorFlowModel(model_b, bounds=bounds, preprocessing=preprocessing)

X_test = tf.convert_to_tensor(X_test)
y_test = tf.convert_to_tensor(y_test)

# Check if model loaded properly
print(fb.utils.accuracy(fmodel_a, X_test, y_test))
print(fb.utils.accuracy(fmodel_b, X_test, y_test))

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
0.6292953491210938
0.9906571507453918


# Projected Gradient Descent

In [13]:
attack = fb.attacks.L2CarliniWagnerAttack(binary_search_steps = 6, steps = 3)

In [14]:
# From A to B
predictions = model_a.predict_on_batch(X_test)

correctly_classified = (np.argmax(predictions, axis = 1) == y_test)

X_correctly_classified, y_correctly_classified = X_test[correctly_classified], y_test[correctly_classified]
raw, adversarial_images, is_adv = attack(fmodel_a, X_correctly_classified, y_correctly_classified, epsilons = eps)

# Get the images wrongly classified by model A
indexes_of_wrong_images = is_adv.numpy()
adversarial_images = adversarial_images.numpy()
wrong_classified_images = adversarial_images[indexes_of_wrong_images]

fb.utils.accuracy(fmodel_b, wrong_classified_images, y_correctly_classified[indexes_of_wrong_images])

0.9861111044883728

In [15]:
# From B to A
predictions = model_b.predict_on_batch(X_test)

correctly_classified = (np.argmax(predictions, axis = 1) == y_test)

X_correctly_classified, y_correctly_classified = X_test[correctly_classified], y_test[correctly_classified]
raw, adversarial_images, is_adv = attack(fmodel_b, X_correctly_classified, y_correctly_classified, epsilons = eps)

# Get the images wrongly classified by model B
indexes_of_wrong_images = is_adv.numpy()
adversarial_images = adversarial_images.numpy()
wrong_classified_images = adversarial_images[indexes_of_wrong_images]

fb.utils.accuracy(fmodel_a, wrong_classified_images, y_correctly_classified[indexes_of_wrong_images])

0.23880596458911896