In [1]:
%matplotlib inline
import sys, os, time
# Select GPU
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import gc
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
# from tensorflow.keras.applications.mobilenet import preprocess_input, MobileNet
# from tensorflow.keras.applications.resnet50 import preprocess_input, ResNet50

from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input

from tqdm import tqdm

In [2]:
def top_k_accuracy(y_true, y_pred, k=1):
    '''From: https://github.com/chainer/chainer/issues/606
    
    Expects both y_true and y_pred to be one-hot encoded.
    '''
    argsorted_y = np.argsort(y_pred)[:,-k:]
    return np.any(argsorted_y.T == y_true.argmax(axis=1), axis=0).mean()

<h3> Define data paths and load data </h3>

In [3]:
BASE_PATH = "/home/ailie/Repos/BBAttacks/data/"

In [4]:
IMAGES_PATH = os.path.join(BASE_PATH, "x_val_0_10000.npy")
LABELS_PATH = os.path.join(BASE_PATH, "y_val.npy")
SYNSET_WORDS_PATH = os.path.join(BASE_PATH, "synset_words.txt")

In [5]:
keras_idx_to_name = {}
f = open(SYNSET_WORDS_PATH,"r")
idx = 0
for line in f:
    parts = line.split(" ")
    keras_idx_to_name[idx] = " ".join(parts[1:])
    idx += 1
f.close()

In [6]:
x_val_raw = np.load(IMAGES_PATH) # loaded as RGB
# didn't upload imagenet data here as it is too large, but it is publicl available
# and the experiments are reproducible 
x_val = preprocess_input(x_val_raw.copy()) # converted to BGR

In [7]:
y_val = np.load(LABELS_PATH)

<h3> Load model and only keep a small batch of correctly classified images </h3>

In [8]:
# Only keep a few images in memory and drop the others
TOTAL_SAMPLE_SIZE = 5000
x_val = x_val[:TOTAL_SAMPLE_SIZE]
x_val_raw = x_val_raw[:TOTAL_SAMPLE_SIZE]

y_val = y_val[:TOTAL_SAMPLE_SIZE]
y_val_one_hot = to_categorical(y_val, 1000)

In [9]:
gc.collect()

262

In [10]:
K.clear_session()
model = InceptionV3(include_top=True, weights='imagenet')

2022-10-05 13:06:52.902536: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-10-05 13:06:54.556558: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:04:00.0 name: NVIDIA GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2022-10-05 13:06:54.556881: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-10-05 13:06:54.559400: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-10-05 13:06:54.561513: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2022-10-05 13:06:54.561878: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcu

In [11]:
y_pred = model.predict(x_val, verbose=1)



2022-10-05 13:07:01.849003: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-10-05 13:07:01.996825: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7




In [12]:
for k in [1, 3, 5]:
    top_k_acc = top_k_accuracy(y_val_one_hot, y_pred, k)
    print(f"Top-{k} accuracy: {top_k_acc}")

Top-1 accuracy: 0.7146
Top-3 accuracy: 0.8612
Top-5 accuracy: 0.8976


In [13]:
# Get indices of correctly classified images
correct_indices = []
agreements = np.argmax(y_pred, axis=1) == np.argmax(y_val_one_hot, axis=1)
for x in range(len(agreements)):
    if agreements[x]:
        correct_indices.append(x)

In [14]:
RANDOM_SAMPLE_SIZE = 50
RANDOM_SEED = 1337

import random
random.seed(RANDOM_SEED)
sample_correct_indices = random.choices(correct_indices, k=RANDOM_SAMPLE_SIZE)

In [15]:
# plt.imshow(x_val_raw[458]/255)

In [16]:
# sample_indices = range(500)
y_val_one_hot_sample = y_val_one_hot[sample_correct_indices]
x_val_sample = x_val[sample_correct_indices]
x_val_raw_sample = x_val_raw[sample_correct_indices]

In [17]:
y_pred_sample = model.predict(x_val_sample, verbose=1)
assert top_k_accuracy(y_val_one_hot_sample, y_pred_sample, k=1) == 1



In [18]:
# Check there are enough correctly classified images
assert len(x_val_sample) == RANDOM_SAMPLE_SIZE

<h3> Import attack utils </h3>

In [19]:
from tensorflow.keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Dropout, MaxPooling2D, Dense, Flatten, BatchNormalization
import importlib
import tensorflow as tf

In [20]:
import sys
sys.path.append("/home/ailie/Repos/BBAttacks/attacks/")
sys.path.append("/home/ailie/Repos/BBAttacks/utils/")

import utils
from data_manager import load_data

# Black Box Attacks
import random_noise
import EvoStrategyUniformProbs
import SimbaWrapper

<h3>Evolutionary attack</h3>

In [46]:
evoba_params = {
    "gen_size": 3,
    "px_count": 4
}

In [47]:
importlib.reload(EvoStrategyUniformProbs)
perturbed_images = 0
adv_evo_strategy = {}
failed_indices = []

for index in tqdm(range(RANDOM_SAMPLE_SIZE)):
    img = x_val_raw_sample[index]
    label = np.argmax(y_val_one_hot_sample[index])
    
    adv_evo_strategy[index] = EvoStrategyUniformProbs.AdversarialPerturbationEvoStraegy(
        model=model,
        img=img,
        label=label,
        generation_size=evoba_params["gen_size"], 
        one_step_perturbation_pixel_count=evoba_params["px_count"],
        verbose=False,
        zero_one_scale=False,
        range_scale_int=True,
        preprocess = preprocess_input
    )
    
    no_steps = adv_evo_strategy[index].run_adversarial_attack(steps=10000)
    if adv_evo_strategy[index].is_perturbed() and no_steps > 0:
        perturbed_images += 1
    
    if not adv_evo_strategy[index].is_perturbed():
        failed_indices.append(index)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [10:20<00:00, 12.40s/it]


In [72]:
utils.generate_mlflow_logs(
    strategy_objects=adv_evo_strategy, 
    attack_type=utils.AttackType.EVOBA, 
    unperturbed_images=x_val_raw_sample, 
    run_name="EvoBA", 
    experiment_name="IMAGENET",
    additional_params=evoba_params,
)

Traceback (most recent call last):
  File "/home/ailie/anaconda3/envs/tf_env/lib/python3.7/site-packages/mlflow/store/tracking/file_store.py", line 261, in list_experiments
    experiment = self._get_experiment(exp_id, view_type)
  File "/home/ailie/anaconda3/envs/tf_env/lib/python3.7/site-packages/mlflow/store/tracking/file_store.py", line 344, in _get_experiment
    meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "/home/ailie/anaconda3/envs/tf_env/lib/python3.7/site-packages/mlflow/utils/file_utils.py", line 175, in read_yaml
    raise MissingConfigException("Yaml file '%s' does not exist." % file_path)
mlflow.exceptions.MissingConfigException: Yaml file '/home/ailie/Repos/BBAttacks/mlruns/mlruns/meta.yaml' does not exist.


Ended previous run
Logging run EvoBA under experiment IMAGENET


'SUCCESS'

<Figure size 432x288 with 0 Axes>

In [70]:
evoba_stats = utils.get_evoba_stats(adv_evo_strategy)
utils.print_evoba_stats(evoba_stats)

In [None]:
# importlib.reload(EvoStrategy)
# # perturbed_images = 0
# # adv_evo_strategy = {}
# for index in range(50,100):
#     print()
#     print(index)
#     img = x_val_raw_sample[index]
# #     print(img)
#     label = np.argmax(y_val_one_hot_sample[index])
#     adv_evo_strategy[index] = EvoStrategy.AdversarialPerturbationEvoStraegy(
#         model=model,
#         img=img,
#         label=label,
#         generation_size=20, 
#         one_step_perturbation_pixel_count=1,
#         verbose=True,
#         zero_one_scale=False,
#         range_scale_int=True,
# #         max_rand=int(x_val.max()),
# #         min_rand=int(x_val.min())
#         preprocess = preprocess_input
#     )
#     no_steps = adv_evo_strategy[index].run_adversarial_attack(steps=1000)
#     if adv_evo_strategy[index].stop_criterion() and no_steps > 0:
#         perturbed_images += 1

In [29]:
for index in range(50):
    if not adv_evo_strategy[index].stop_criterion():
        print(index)

AttributeError: 'AdversarialPerturbationEvoStraegy' object has no attribute 'stop_criterion'

In [None]:
count_q = 0
sum_q = 0
query_list = []
fair_indices = []
for index in range(50):
    if(adv_evo_strategy[index].queries > 1):
        count_q +=1
        sum_q += adv_evo_strategy[index].queries
        query_list.append(adv_evo_strategy[index].queries)
        fair_indices.append(index)
        
print("Average queries for EVO algo:", sum_q/count_q)
print("Total count of perturbed images (classified correctly initially):", perturbed_images)

In [None]:
np.median(query_list)

In [None]:
np.mean(query_list)

In [None]:
plt.hist(query_list)

In [None]:
plt.rcParams.update({'font.size': 22})
plt.figure(figsize=(12,8))
plt.title("Query histogram for EvoBA(1, 15) on ImageNet", fontdict={"size":22})
plt.hist(query_list)
plt.show()

In [None]:
import math
from tqdm import tqdm
l0_dists = []
for index_diff in tqdm(fair_indices):
    diff = np.abs(adv_evo_strategy[index_diff].get_best_candidate() - x_val_raw_sample[index_diff])
#     diff = np.reshape(diff, (32, 32, 3))
    diff = (diff!=0)
    l0_dist = np.sum(diff)
    l0_dists.append(l0_dist)
#     print("L2 distance:", math.sqrt(np.sum(np.reshape(diff, (-1))**2)))
#     plt.imshow(np.reshape(adv_evo_strategy[index_diff].get_best_candidate(), (28, 28)))
#     plt.show()
#     print("Prediction:", model.predict(np.array([adv_evo_strategy[index_diff].get_best_candidate()])))

In [None]:
np.median(l0_dists)

In [None]:
plt.hist(queries)

In [None]:
np.median(l2_dists)/255

In [None]:
plt.rcParams.update({'font.size': 22})
plt.figure(figsize=(12,8))
plt.title("L0 histogram for EvoBA(1, 15) on ImageNet", fontdict={"size":22})
plt.hist(l0_dists)
plt.show()

In [None]:
import math
l2_dists = []
for index_diff in tqdm(fair_indices):
    diff = np.abs(adv_evo_strategy[index_diff].get_best_candidate() - x_val_raw_sample[index_diff])
#     diff = np.reshape(diff, (32, 32, 3))
    l2_dist = math.sqrt(np.sum(np.reshape(diff, (-1))**2))
    l2_dists.append(l2_dist)
#     print("L2 distance:", math.sqrt(np.sum(np.reshape(diff, (-1))**2)))
#     plt.imshow(np.reshape(adv_evo_strategy[index_diff].get_best_candidate(), (28, 28)))
#     plt.show()
#     print("Prediction:", model.predict(np.array([adv_evo_strategy[index_diff].get_best_candidate()])))

In [None]:
np.shape(x_val_sample[0])

In [None]:
np.mean(l2_dists)/255

In [None]:
np.mean(l2_dists)/(255 * 224 * 224 * 3)

In [None]:
w=10
h=10
fig=plt.figure(figsize=(20, 9))
fig.tight_layout()
plt.subplots_adjust(top = 0.99, bottom=0.01, hspace=0.1, wspace=0.2)

columns = 5
rows = 2
img_curr = 0
for i in range(1, columns + 1):
    img_indx = fair_indices[imgss[img_curr]]
    initial = (keras_idx_to_name[adv_evo_strategy[img_indx].label])
    if len(initial) > 30:
        initial = initial[:21] + "\n" + initial[21:]
    fig.add_subplot(rows, columns, i)
    plt.title(f"Original, \n {initial}", fontdict={"size":18})
    img_start = adv_evo_strategy[img_indx].img.astype(int)
    plt.imshow(img_start)
    
    fig.add_subplot(rows, columns, i + 5)
    
    img_final = adv_evo_strategy[img_indx].get_best_candidate().astype(int)
    predss = model.predict(np.expand_dims(img_final, axis=0))[0]
    predicted = np.argmax(predss)
    diff = math.sqrt(np.sum((img_final - img_start) **2))
    l2_distance = int((diff/(255)) * 100)/100
    l0_distance = (img_start != img_final).sum()
    final = (keras_idx_to_name[predicted])
    plt.title(f"Perturbed, \n {final} L2 distance:{l2_distance}\n L0 distance:{l0_distance}", fontdict={"size":18})
    plt.imshow(img_final)
    img_curr += 1
plt.show()

In [None]:
for index_diff in tqdm(np.array(fair_indices)[imgss]):
    print(index_diff)
    initial = (keras_idx_to_name[adv_evo_strategy[index_diff].label])
    img = adv_evo_strategy[index_diff].get_best_candidate().astype(int)
    plt.imshow(img.astype(int))
    predicted = (np.argmax(model.predict(np.expand_dims(img, axis=0))[0]))
    final = (keras_idx_to_name[predicted])
    plt.show()

In [None]:
importlib.reload(EvoStrategy)
from tqdm import tqdm
perturbed_images_bf = 0
adv_evo_strategy_bf = {}
for index in tqdm(range(len(x_val_sample))):
    if index % 10 == 0:
        verbose = True
    else:
        verbose = False
    img = x_val_sample[index]
    label = np.argmax(y_val_one_hot_sample[index])
    adv_evo_strategy_bf[index] = EvoStrategy.AdversarialPerturbationBFStraegy(
        model=model,
        img=img,
        label=label,
        generation_size=20, 
        one_step_perturbation_pixel_count=5,
        verbose=verbose
    )
    no_steps_bf = adv_evo_strategy_bf[index].run_adversarial_attack(steps=100)
    if adv_evo_strategy_bf[index].stop_criterion() and no_steps_bf > 0:
        perturbed_images_bf += 1
    adv_evo_strategy_bf[index].active_generation = []
    adv_evo_strategy_bf[index].fitness_scores = []
    gc.collect()

In [None]:
count_q_bf = 0
sum_q_bf = 0
for index in tqdm(range(len(x_val_sample))):
    if(adv_evo_strategy_bf[index].queries > 1):
        count_q_bf +=1
        sum_q_bf += adv_evo_strategy_bf[index].queries
        
print("Average queries for EVO algo:", sum_q_bf/count_q_bf)
print("Total count of perturbed images (classified correctly initially):", perturbed_images_bf)

In [None]:
671/len(x_val_sample)

In [None]:
import math
from tqdm import tqdm
l2_dists = []
for index_diff in tqdm(range(50)):
    diff = np.abs(adv_evo_strategy[index_diff].get_best_candidate() - x_val_sample[index_diff])
#     diff = np.reshape(diff, (32, 32, 3))
    l2_dist = math.sqrt(np.sum(np.reshape(diff, (-1))**2))
    l2_dists.append(l2_dist)
#     print("L2 distance:", math.sqrt(np.sum(np.reshape(diff, (-1))**2)))
#     plt.imshow(np.reshape(adv_evo_strategy[index_diff].get_best_candidate(), (28, 28)))
#     plt.show()
#     print("Prediction:", model.predict(np.array([adv_evo_strategy[index_diff].get_best_candidate()])))

In [None]:
np.shape(x_val_sample[0])

In [None]:
np.mean(l2_dists)/(255 * 224 * 224)

In [None]:
importlib.reload(EvoStrategy)
# TODO: add per channel perturbation, verify that it succeeds faster
# TODO: momentum approach
perturbed_images = 0
adv_evo_strategy = {}
for index in range(100):
    print()
    print(index)
    img = x_val_sample[index]
    label = np.argmax(y_val_one_hot_sample[index])
    adv_evo_strategy[index] = EvoStrategy.AdversarialPerturbationEvoStraegy(
        model=model,
        img=img,
        label=label,
        generation_size=20, 
        one_step_perturbation_pixel_count=10
    )
    no_steps = adv_evo_strategy[index].run_adversarial_attack(steps=50)
    if adv_evo_strategy[index].stop_criterion() and no_steps > 0:
        perturbed_images += 1

In [None]:
count_q = 0
sum_q = 0
for index in range(0,100):
    if(adv_evo_strategy[index].queries > 1):
        count_q +=1
        sum_q += adv_evo_strategy[index].queries
        
print("Average queries for EVO algo:", sum_q/count_q)
print("Total count of perturbed images (classified correctly initially):", perturbed_images)

In [None]:
importlib.reload(EvoStrategy)
import gc
# TODO: add per channel perturbation, verify that it succeeds faster
# TODO: momentum approach
perturbed_images = 0
adv_evo_strategy = {}
for index in range(100):
    print()
    print(index)
    img = x_val_sample[index]
    label = np.argmax(y_val_one_hot_sample[index])
    adv_evo_strategy[index] = EvoStrategy.AdversarialPerturbationEvoStraegy(
        model=model,
        img=img,
        label=label,
        generation_size=50, 
        one_step_perturbation_pixel_count=10
    )
    no_steps = adv_evo_strategy[index].run_adversarial_attack(steps=40)
    if adv_evo_strategy[index].stop_criterion() and no_steps > 0:
        perturbed_images += 1
    gc.collect()

In [None]:
count_q = 0
sum_q = 0
for index in range(0,100):
    if(adv_evo_strategy[index].queries > 1):
        count_q +=1
        sum_q += adv_evo_strategy[index].queries
        
print("Average queries for EVO algo:", sum_q/count_q)
print("Total count of perturbed images (classified correctly initially):", perturbed_images)

In [None]:
print("HERE")

In [None]:
importlib.reload(SimbaWrapper)
simba_wrapper = SimbaWrapper.SimbaWrapper(model, x_val_sample, y_val_one_hot_sample, 0.1, max_queries=2000, max_l0_distance=150)

In [None]:
simba_wrapper.run_simba()

In [None]:
simba_wrapper.X_modified[0].min()

In [None]:
x_val_sample[0].min()