In [None]:
# mount google drive on your runtime using and authorization code.
# more details here: https://colab.research.google.com/notebooks/io.ipynb
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# install adversarial robustness toolbox for attacking (NEED TO RESTART RUNTIME AT FIRST TIME)
!pip install adversarial-robustness-toolbox



In [None]:
# import library
import numpy as np
import pandas as pd
import random

import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import os
import cv2
from google.colab.patches import cv2_imshow
from imutils import paths
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

from art.estimators.classification import KerasClassifier
from art.attacks.evasion import FastGradientMethod,ProjectedGradientDescent,DeepFool

# check GPU is available
print("GPUs Available: ", tf.config.list_physical_devices('GPU'))

# set random seed of tensorflow
random.seed(1)
np.random.seed(1)
tf.compat.v1.set_random_seed(1)
tf.random.set_seed(1)
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1,log_device_placement =True)
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(graph = tf.compat.v1.get_default_graph(), config = config)
tf.compat.v1.keras.backend.set_session(sess)
tf.compat.v1.disable_eager_execution()

GPUs Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0



In [None]:
# Path to the directory containing the project files (CHANGE THIS PATH TO THE DIRECTORY ON YOUR COMPUTER OR GOOGLE DRIVE)
PROJECT_ROOT_DIR = 'drive/My Drive/CS504(AML)/HW1/BUSI/'

# Path to the directory containing the dataset (DOWNLOAD THE BUSI_DATASET DIRECTORY FROM FOLLOWING LINK)
# DOWNLOAD BUSI dataset here: https://scholar.cu.edu.eg/?q=afahmy/pages/dataset
DATA_DIR = 'Dataset_BUSI_with_GT/'

# Load the BUSI images and labels

In [None]:
# Funciton for loading the dataset
# reference: https://www.pyimagesearch.com/2018/09/10/keras-tutorial-how-to-get-started-with-keras-deep-learning-and-python/
def load_image():
  # initialize the data and labels for each class
  data = []
  labels = []

  data_aside = []
  labels_aside = []

  benign_data = []
  benign_labels = []

  malignant_data = []
  malignant_labels = []

  normal_data = []
  normal_labels = []

  # load benign image
  for i in range(1,438):
    image = cv2.imread(PROJECT_ROOT_DIR + DATA_DIR + 'benign/' + 'benign (' + str(i) + ').png')
    # resize image to 224 * 224 * 3
    image = cv2.resize(image, (224, 224))
    benign_data.append(image)
    # extract the class label from the image folder
    benign_labels.append('benign')
  # random Select 120 images and set them aside as Adversarial Attack images. These include 60 benign images, 30 malignant images, and 30 normal images
  # select 60 benign images
  random.seed(1)
  random_sample = random.sample(range(len(benign_data)),60)
  for i in range(0,437):
    if i in random_sample:
      data_aside.append(benign_data[i])
      labels_aside.append(benign_labels[i])
    else:
      data.append(benign_data[i])
      labels.append(benign_labels[i])

  # load malignant image
  for i in range(1,211):
    image = cv2.imread(PROJECT_ROOT_DIR + DATA_DIR + 'malignant/' + 'malignant (' + str(i) + ').png')
    # resize image to 224 * 224 * 3
    image = cv2.resize(image, (224, 224))
    malignant_data.append(image)
    # extract the class label from the image folder
    malignant_labels.append('malignant')
  # select 30 malignant images
  random.seed(1)
  random_sample = random.sample(range(len(malignant_data)),30)
  for i in range(0,210):
    if i in random_sample:
      data_aside.append(malignant_data[i])
      labels_aside.append(malignant_labels[i])
    else:
      data.append(malignant_data[i])
      labels.append(malignant_labels[i])
  
  # load normal image
  for i in range(1,134):
    image = cv2.imread(PROJECT_ROOT_DIR + DATA_DIR + 'normal/' + 'normal (' + str(i) + ').png')
    # resize image to 224 * 224 * 3
    image = cv2.resize(image, (224, 224))
    normal_data.append(image)
    # extract the class label from the image folder
    normal_labels.append('normal')
  # select 30 normal images
  random.seed(1)
  random_sample = random.sample(range(len(normal_data)),30)
  for i in range(0,133):
    if i in random_sample:
      data_aside.append(normal_data[i])
      labels_aside.append(normal_labels[i])
    else:
      data.append(normal_data[i])
      labels.append(normal_labels[i])  

  return data, labels, data_aside, labels_aside
# Function for image preprocessing 
def preprocess(data,labels,data_aside, labels_aside):
  # Save training and test image to numpy, Scale image features to be in [0, 1]
  data = np.array(data, dtype = np.float32) / 255.0
  # Save labels to numpy encode label to integer catergory 0 = 'benign', 1 = 'malignant', 2 = 'normal'
  labels = np.array(labels)
  new_label_encoder = preprocessing.LabelEncoder()
  new_label_encoder.fit(labels)
  targets = new_label_encoder.transform(labels)
  # Save aside image to numpy, Scale image features to be in [0, 1]
  data_aside = np.array(data_aside, dtype = np.float32) / 255.0
  # Save labels to numpy encode label to integer catergory 0 = 'benign', 1 = 'malignant', 2 = 'normal' 
  labels_aside = np.array(labels_aside)
  new_labels_aside_encoder = preprocessing.LabelEncoder()
  new_labels_aside_encoder.fit(labels_aside)
  targets2 = new_label_encoder.transform(labels_aside)
  return data, targets, data_aside, targets2

In [None]:
# Load the images and labels
data, labels, data_aside, labels_aside = load_image()
data, labels, data_aside, labels_aside = preprocess(data,labels,data_aside, labels_aside)

# split data into 80% train and 20% test, shuffle the data with
(imgs_train, imgs_test, labels_train, labels_test) = train_test_split(data, labels, test_size = 0.2, random_state=42, shuffle = True)
# split data into 60% train data and 20% validation data
(imgs_train, imgs_val, labels_train, labels_val) = train_test_split(imgs_train, labels_train, test_size = 0.2, random_state=42, shuffle = True)

In [None]:
# Display the shapes of train, validation, and test datasets
print('Images train shape: {} - Labels train shape: {}'.format(imgs_train.shape, labels_train.shape))
print('Images validation shape: {} - Labels validation shape: {}'.format(imgs_val.shape, labels_val.shape))
print('Images test shape: {} - Labels test shape: {}'.format(imgs_test.shape, labels_test.shape))
print('Aside images shape: {} - Aside Labels shape: {}'.format(data_aside.shape, labels_aside.shape))
# Display the range of images (to make sure they are in the [0, 1] range)
print('\nMax pixel value', np.max(imgs_train))
print('Min pixel value', np.min(imgs_train))
print('Average pixel value', np.mean(imgs_train))
print('Data type', imgs_train[0].dtype)

Images train shape: (422, 224, 224, 3) - Labels train shape: (422,)
Images validation shape: (106, 224, 224, 3) - Labels validation shape: (106,)
Images test shape: (132, 224, 224, 3) - Labels test shape: (132,)
Aside images shape: (120, 224, 224, 3) - Aside Labels shape: (120,)

Max pixel value 1.0
Min pixel value 0.0
Average pixel value 0.32330388
Data type float32


In [None]:
# Set constants (BUSI)
NUM_LABELS = 3                             # Number of labels
BATCH_SIZE = 4                             # Size of batch
HEIGHT = 224                                 # Height of input image
WIDTH = 224                                  # Width of input image
N_CHANNEL = 3                               # Number of channels
OUTPUT_DIM = 3                             # Number of output dimension

# Set training hyperparameters
NUM_EPOCH = 100                             # Number of epoch to train
LR = 0.01                                 # Learning rate

INPUT_SHAPE = (HEIGHT, WIDTH, N_CHANNEL)  # Input shape of model
IMG_SHAPE = (HEIGHT, WIDTH, N_CHANNEL)

In [None]:
# Convert the labels to one-hot encoding (to input to the models)
labels_train = keras.utils.to_categorical(labels_train, NUM_LABELS)
labels_test = keras.utils.to_categorical(labels_test, NUM_LABELS)
labels_val = keras.utils.to_categorical(labels_val, NUM_LABELS)
labels_aside = keras.utils.to_categorical(labels_aside, NUM_LABELS)
print('Labels train shape: {}'.format(labels_train.shape))

Labels train shape: (422, 3)


# Load ResNet50 model

In [None]:
# load ResNet50 and build model
model = keras.models.load_model(PROJECT_ROOT_DIR + 'BUSI_dataset_resnet50.h5')
model.build((224,224,3))
# SGD optimizer
optimizer = keras.optimizers.SGD(learning_rate=LR)
# compile the keras model
model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])

In [None]:
# evaluate the keras model with aside images
results = model.evaluate(data_aside, labels_aside)
print("orignial resnet50 aside test loss, test acc:", results)

  updates = self.state_updates


orignial resnet50 aside test loss, test acc: [0.9014058637122313, 0.85]


# Create and apply Deepfool Attack with ART onResNet 50 (using 120 aside images)

In [None]:
# Create a ART Keras classifier for the TensorFlow Keras model.
classifier = KerasClassifier(model = model,clip_values=(0, 1), use_logits=False)

In [None]:
# Create a ART Deepfool attack.
attack_df = DeepFool(classifier = classifier, max_iter = 100, epsilon = 0.1,verbose = False)
# Generate adversarial test data.
x_test_adv = attack_df.generate(data_aside)
# Evaluate accuracy on adversarial test data and calculate average perturbation.
loss_test, accuracy_test = model.evaluate(x_test_adv, labels_aside)
perturbation = np.mean(np.abs((x_test_adv - data_aside)))
print('Accuracy on adversarial test data: {:4.2f}%'.format(accuracy_test * 100))
print('Average perturbation: {:4.2f}'.format(perturbation))

  updates=self.state_updates,


Accuracy on adversarial test data: 16.67%
Average perturbation: 0.15


# Implement adversarial aside examples to ML models

In [None]:
# copy all images into a new variable
x_train = imgs_train.copy()
# flatten image features in train, test, aside and adversarial aside images
x_train = x_train.reshape(x_train.shape[0],224*224*3)

x_test = imgs_test.copy()
x_test = x_test.reshape(x_test.shape[0],224*224*3)

x_aside = data_aside.copy()
x_aside = x_aside.reshape(x_aside.shape[0],224*224*3)

x_df = x_test_adv.copy()
x_df = x_df.reshape(x_df.shape[0],224*224*3)

In [None]:
# K nearest neighbors
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors = 3)
neigh.fit(x_train, np.argmax(labels_train,axis = 1))

print('test accuracy: ',neigh.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',neigh.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',neigh.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.5606060606060606
aside accuracy:  0.5416666666666666
Deepfool aside accuracy:  0.49166666666666664


In [None]:
# Decision tree
from sklearn import tree

tr = tree.DecisionTreeClassifier()
tr.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',tr.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',tr.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',tr.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.48484848484848486
aside accuracy:  0.525
Deepfool aside accuracy:  0.4083333333333333


In [None]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, 
                           class_weight='balanced', random_state=0, solver='newton-cg', max_iter=100, 
                           multi_class='auto', verbose=0, warm_start=False, n_jobs=None)
lr.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',lr.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',lr.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',lr.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.6439393939393939
aside accuracy:  0.6166666666666667
Deepfool aside accuracy:  0.5833333333333334


In [None]:
# Support vector machine
from sklearn import svm

svm_1 = svm.SVC()
svm_1.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',svm_1.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',svm_1.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',svm_1.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.6742424242424242
aside accuracy:  0.6083333333333333
Deepfool aside accuracy:  0.5416666666666666


In [None]:
# Navie Bayers
from sklearn.naive_bayes import BernoulliNB

nb = BernoulliNB()
nb.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',nb.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',nb.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',nb.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.5757575757575758
aside accuracy:  0.4666666666666667
Deepfool aside accuracy:  0.275


In [None]:
# Bagging classifier
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier

bag = BaggingClassifier(base_estimator=SVC(), n_estimators=10, random_state=0)
bag.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',bag.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',bag.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',bag.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.6893939393939394
aside accuracy:  0.6
Deepfool aside accuracy:  0.5416666666666666


In [None]:
# Random forest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=2, random_state=0)
rf.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',rf.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',rf.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',rf.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.6515151515151515
aside accuracy:  0.575
Deepfool aside accuracy:  0.525


In [None]:
# Extra trees
from sklearn.ensemble import ExtraTreesClassifier

ec =  ExtraTreesClassifier(n_estimators=100, random_state=0)
ec.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',ec.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',ec.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',ec.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.7348484848484849
aside accuracy:  0.6666666666666666
Deepfool aside accuracy:  0.5


In [None]:
# Gradient boosting
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
gb.fit(x_train, np.argmax(labels_train,axis = 1))
print('test accuracy: ',gb.score(x_test,np.argmax(labels_test,axis = 1)))
print('aside accuracy: ',gb.score(x_aside,np.argmax(labels_aside,axis = 1)))
print('Deepfool aside accuracy: ',gb.score(x_df, np.argmax(labels_aside,axis = 1)))

test accuracy:  0.6287878787878788
aside accuracy:  0.6166666666666667
Deepfool aside accuracy:  0.39166666666666666


In [None]:
# load MobileNet and build model
SAVE_ROOT_DIR = 'drive/My Drive/CS504(AML)/Course Project/'
model_MobileNet = keras.models.load_model(SAVE_ROOT_DIR + 'BUSI_dataset_MobileNet.h5')
model_MobileNet.build((224,224,3))
# Adam optimizer
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
# compile the keras model
model_MobileNet.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
# evaluate the keras model with aside images
results = model_MobileNet.evaluate(x_test_adv, labels_aside)
print("Deepfool MobileNet aside test loss, test acc:", results)

  updates = self.state_updates


Deepfool MobileNet aside test loss, test acc: [6.492273410161336, 0.55]
