#### The detection include feature squeezing (FS) and MagNet.

##### FS

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation, Dropout
import numpy as np
from art.attacks.evasion import DeepFool
from art.estimators.classification import KerasClassifier
from art.utils import load_dataset
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from art.attacks.evasion import FastGradientMethod, BasicIterativeMethod, CarliniL2Method, CarliniLInfMethod, ProjectedGradientDescent, DeepFool, ThresholdAttack, PixelAttack, SpatialTransformation, SquareAttack, ZooAttack, BoundaryAttack, HopSkipJump

(x_train, y_train), (x_test, y_test), min_, max_ = load_dataset(str("mnist"))
x_train, y_train = x_train[:5000], y_train[:5000]
x_test, y_test = x_test[:500], y_test[:500]
im_shape = x_train[0].shape

In [None]:
from keras.models import load_model
model = load_model("./adv_data/models/natural_attack_model.h5")
from scipy import ndimage
import numpy as np

def get_match_pred_vec(Y_pred, Y_label):
    assert len(Y_pred) == len(Y_label)
    Y_pred_class = np.argmax(Y_pred, axis = 1)
    Y_label_class = np.argmax(Y_label, axis = 1)
    return Y_pred_class == Y_label_class


def calculate_accuracy(Y_pred, Y_label):
    match_pred_vec = get_match_pred_vec(Y_pred, Y_label)

    accuracy = np.sum(match_pred_vec) / float(len(Y_label))
    # pdb.set_trace()
    return accuracy

def reduce_precision_py(x, npp):
    """
    Reduce the precision of image, the numpy version.
    :param x: a float tensor, which has been scaled to [0, 1].
    :param npp: number of possible values per pixel. E.g. it's 256 for 8-bit gray-scale image, and 2 for binarized image.
    :return: a tensor representing image(s) with lower precision.
    """
    # Note: 0 is a possible value too.
    npp_int = npp - 1
    x_int = np.rint(x * npp_int)
    x_float = x_int / npp_int
    return x_float

def bit_depth_py(x, bits):
    precisions = 2**bits
    return reduce_precision_py(x, precisions)

def median_filter_py(x, width, height=-1):
    """
    Median smoothing by Scipy.
    :param x: a tensor of image(s)
    :param width: the width of the sliding window (number of pixels)
    :param height: the height of the window. The same as width by default.
    :return: a modified tensor with the same shape as x.
    """
    if height == -1:
        height = width
    return ndimage.filters.median_filter(x, size=(1,width,height,1), mode='reflect')

def get_distance(model, dataset, X1):
    X1_pred = model.predict(X1)
    vals_squeezed = []
    if dataset == 'mnist':
        X1_seqeezed_bit = bit_depth_py(X1, 1) #bit_depth_py
        vals_squeezed.append(model.predict(X1_seqeezed_bit))
        X1_seqeezed_filter_median = median_filter_py(X1, 2)
        vals_squeezed.append(model.predict(X1_seqeezed_filter_median))
    else:
        X1_seqeezed_bit = bit_depth_py(X1, 5)
        vals_squeezed.append(model.predict(X1_seqeezed_bit))
        X1_seqeezed_filter_median = median_filter_py(X1, 2) #median_filter_py
        vals_squeezed.append(model.predict(X1_seqeezed_filter_median))
        X1_seqeezed_filter_local = non_local_means_color_py(X1, 13, 3, 2)
        vals_squeezed.append(model.predict(X1_seqeezed_filter_local))
    dist_array = []
    for val_squeezed in vals_squeezed:
        dist = np.sum(np.abs(X1_pred - val_squeezed), axis=tuple(range(len(X1_pred.shape))[1:]))
        dist_array.append(dist)
    dist_array = np.array(dist_array)
    return np.max(dist_array, axis=0)

def train_fs(model, dataset, X1, train_fpr):
    distances = get_distance(model, dataset, X1)
    selected_distance_idx = int(np.ceil(len(X1) * (1-train_fpr)))
    threshold = sorted(distances)[selected_distance_idx-1]
    threshold = threshold
    print ("Threshold value: %f" % threshold)
    return threshold

def test(model, dataset, X, threshold):
    distances = get_distance(model, dataset, X)
    Y_pred = distances > threshold
    return Y_pred, distances

In [None]:
import random
y_pred_all = model.predict(x_test)
accuracy_all = calculate_accuracy(y_pred_all, y_test)
print('Test accuracy on raw legitimate examples %.4f' % (accuracy_all))
inds_correct = np.where(y_pred_all.argmax(axis=1) == y_test.argmax(axis=1))[0]
X_test = x_test[inds_correct]
Y_test = y_test[inds_correct]
Y_pred = y_pred_all[inds_correct]
indx_train = random.sample(range(len(X_test)), int(len(X_test)/2))
indx_test = list(set(range(0, len(X_test)))-set(indx_train))
print("Number of correctly predict images: %s" % (len(inds_correct)))
x_train_0 = X_test[indx_train]
y_train_0 = Y_test[indx_train]
x_test_0 = X_test[indx_test]
y_test_0 = Y_test[indx_test]
# #compute thresold - use test data to compute that
threshold = train_fs(model, "mnist", x_train_0, 0.05)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, roc_curve, auc

def get_tpr_fpr(true_labels, pred_labels):
    TP = np.sum(np.logical_and(pred_labels == 1, true_labels == 1))
    FP = np.sum(np.logical_and(pred_labels == 1, true_labels == 0))

    AP = np.sum(true_labels)
    AN = np.sum(1-true_labels)

    tpr = TP/AP if AP>0 else np.nan
    fpr = FP/AN if AN>0 else np.nan

    return tpr, fpr, TP, AP, FP, AN

def evalulate_detection_test(Y_detect_test, Y_detect_pred):
    accuracy = accuracy_score(Y_detect_test, Y_detect_pred, normalize=True, sample_weight=None)
    tpr, fpr, tp, ap, fp, an = get_tpr_fpr(Y_detect_test, Y_detect_pred)
    return accuracy, tpr, fpr, tp, ap, fp, an

x_adv_list = ["x_test_adv_fgsm.npy","x_test_adv_pgd.npy","x_test_adv_jsma.npy","x_test_adv_cw2.npy"]

for i in range(len(x_adv_list)):
    Y_test_copy=Y_test
    X_test_copy=X_test
    y_test_copy=y_test_0
    x_test_copy=x_test_0
    x_test_adv = np.load("./adv_data/x_adv" + x_adv_list[i])
    X_test_adv = reduce_precision_py(x_test_adv, 256)
    X_test_adv = X_test_adv[inds_correct]
    X_test_adv = X_test_adv[indx_test]
    loss, acc_suc = model.evaluate(X_test_adv, y_test_0, verbose=0)
    X_test_adv_pred = model.predict(X_test_adv)
    inds_success = np.where(X_test_adv_pred.argmax(axis=1) != y_test.argmax(axis=1))[0]
    inds_fail = np.where(X_test_adv_pred.argmax(axis=1) == y_test.argmax(axis=1))[0]
    inds_all_not_fail = list(set(range(0, len(inds_correct)))-set(inds_fail))
    X_test_adv_success = X_test_adv[inds_success]
    Y_test_success = y_test_0[inds_success]
    X_test_adv_fail = X_test_adv[inds_fail]
    Y_test_fail = y_test[inds_fail]
    # prepare X and Y for detectors
    X_all = np.concatenate([x_test_0, X_test_adv])
    Y_all = np.concatenate([np.zeros(len(x_test_0), dtype=bool), np.ones(len(x_test_0), dtype=bool)])
    X_success = np.concatenate([x_test_0[inds_success], X_test_adv_success])
    Y_success = np.concatenate([np.zeros(len(inds_success), dtype=bool), np.ones(len(inds_success), dtype=bool)])
    X_fail = np.concatenate([x_test_0[inds_fail], X_test_adv_fail])
    Y_fail = np.concatenate([np.zeros(len(inds_fail), dtype=bool), np.ones(len(inds_fail), dtype=bool)])
    Y_all_pred, Y_all_pred_score = test(model, "mnist", X_all, threshold)
    acc_all, tpr_all, fpr_all, tp_all, ap_all, fb_all, an_all = evalulate_detection_test(Y_all, Y_all_pred)
    fprs_all, tprs_all, thresholds_all = roc_curve(Y_all, Y_all_pred_score)
    roc_auc_all = auc(fprs_all, tprs_all)
    print("AUC: {:.4f}%, Overall accuracy: {:.4f}%, FPR value: {:.4f}%".format(100*roc_auc_all, 100*acc_all, 100*fpr_all))