In [None]:
#import the required libraries

import numpy as np
import tensorflow as tf
import cv2
from matplotlib import pyplot as plt
from glob import glob 
import os
from sklearn.model_selection import train_test_split
from tensorflow import GradientTape
from tensorflow.train import Checkpoint, CheckpointManager
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.layers import Dense, Convolution2D, MaxPool2D, Flatten, BatchNormalization, Dropout, Input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
import datetime
import sklearn.metrics.pairwise as pairwise
from tensorflow.keras.utils import to_categorical
import random
from itertools import combinations
from IPython.display import display

In [None]:
from utilities import dir_transformation, encoder_dir_transformation

In [None]:
#The input size for the malware classifier and similarity encoder
w1, h2 = 224, 224

In [None]:
#The path to the malware samples
malware_dir = 'Dataset/Malware'
malware_add = glob(malware_dir + '/*')
malware_add = sorted(malware_add)

#The path to the benign samples
benign_dir = 'Dataset/Benign'
benign_add = glob(benign_dir + '/*')
benign_add = sorted(benign_add)

In [None]:
# use dir_transformation from utilities.py to resize images to size w x h.
malware_images = dir_transformation(malware_add, w1, h2)
benign_images = dir_transformation(benign_add, w1, h2)

data = np.array(benign_images + malware_images)

# Make one-hot labels for samples
labels = np.array([[1, 0] for _ in range(len(benign_images))] + [[0, 1] for _ in range(len(malware_images))])

#Shuffle the dataset and split to train and test sets (20% test set)
idx = np.random.RandomState(seed=42).permutation(data.shape[0])
data, labels = data[idx], labels[idx]
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)

train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels)).shuffle(1102).batch(150)
test_dataset =  tf.data.Dataset.from_tensor_slices((test_data, test_labels)).batch(256)

In [None]:
#The function that returns an instance of the malware classifier
def classifier():
  model = Sequential()
  model.add(Convolution2D(filters = 8, kernel_size = 3, padding = 'same', input_shape = [h2, w1, 1], activation = 'relu', name = 'conv_1'))
  model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name = 'maxpool_1'))

  model.add(Convolution2D(filters = 4, kernel_size = 3, padding = 'same', activation = 'relu', name = 'conv_2'))
  model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name = 'maxpool_2'))

  model.add(Convolution2D(filters = 2, kernel_size = 3, padding = 'same', activation = 'relu', name = 'conv_3'))
  model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name = 'maxpool_3'))

  model.add(Convolution2D(filters = 2, kernel_size = 3, padding = 'same', activation = 'relu', name = 'conv_4'))
  model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name = 'maxpool_4'))

  model.add(Convolution2D(filters = 2, kernel_size = 3, padding = 'same', activation = 'relu', name = 'conv_5'))
  model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name = 'maxpool_5'))

  model.add(Flatten(name = 'flatten'))

  model.add(Dense(2, activation = 'softmax', name = 'softmax'))

  return model

In [None]:
# create an instance of the classifier and set the loss and optimizer.
model = classifier()
optimizer = Adam()
loss = CategoricalCrossentropy()
model.compile(optimizer = optimizer, loss = loss, metrics = ['accuracy'])

In [None]:
model.load_weights('models/classifier_weights.h5')

In [None]:
#print a summary of the architecture of the model.
model.summary()

In [None]:
model.fit(train_dataset, validation_data = test_dataset, epochs = 30)

In [None]:
if not os.path.exists('models'):
    os.mkdir('models')
    
model.save_weights('models/classifier_weights.h5')

In [None]:
# Generate a copy of the classifier, set its optimizer and loss funcitons and load the weight of the initial classifier into it.
model_copy = tf.keras.models.clone_model(model)
optimizer = Adam()
loss = CategoricalCrossentropy()
model_copy.compile(optimizer = optimizer, loss = loss, metrics = ['accuracy'])
model_copy.set_weights(model.get_weights())

In [None]:
#Prepare the dataset for training the encoder. The difference here is the fact that we only set value of w for every sample and each sample
#could have a different value for h.
malware_images = encoder_dir_transformation(malware_add, w1)
benign_images = encoder_dir_transformation(benign_add, w1)

In [None]:
#Prepare x_i. 
data_i = np.array(benign_images + malware_images)
data_l = np.array([0 for _ in range(len(benign_images))] + [1 for _ in range(len(malware_images))])
i_height = np.array([i.shape[0] for i in data_i])

idx = np.random.RandomState(seed=42).permutation(data_i.shape[0])
data_i, data_l, i_height = data_i[idx], data_l[idx], i_height[idx]
train_data, test_data, train_labels, test_labels = train_test_split(data_i, data_l, test_size=0.2, random_state=42)

data_dict = {'train': train_data, 'test': test_data}
label_dict = {'train': train_labels, 'test': test_labels}
height_dict = {'train': np.array([i.shape[0] for i in train_data]), 'test': np.array([i.shape[0] for i in test_data])}

In [None]:
#The function that performs the swapping transformation.
def swap_transform(train_or_test, i, i_idx, portion_swap_pct):
  #What portion of i for swaping
  portion_length = np.ceil(i.shape[0] * portion_swap_pct)
  #Find data with the length higher or equle to the swaping portion in the same class and choose one randomly
  data_l_copy = np.copy(label_dict[train_or_test])
  data_l_copy[i_idx] = 1 - label_dict[train_or_test][i_idx]
  p_idx = np.where(data_l_copy == label_dict[train_or_test][i_idx])[0]
  p_idx = np.intersect1d(p_idx, np.where(height_dict[train_or_test] >= portion_length)[0])

  if p_idx.shape[0] == 0:
    print(f'0 data with shape greater than {portion_swap_pct} of i')
  p_idx = np.random.choice(p_idx, 1)[0]

  swap_sample = data_dict[train_or_test][p_idx]
  swap_start_pointer = np.random.choice(np.arange(swap_sample.shape[0] - portion_length + 1), 1)
  i_start_pointer = np.random.choice(np.arange(i.shape[0] - portion_length + 1), 1)
  p = np.copy(i)
  p[int(i_start_pointer):int(i_start_pointer + portion_length),...] = swap_sample[int(swap_start_pointer):int(swap_start_pointer + portion_length),...]

  return p

#The function that performs the noise replacement option.
def noisy_transform(i, noise_pct):
  random_indices = np.random.choice(np.arange(i.size), replace=False, size=int(i.size * noise_pct))
  raveld = np.copy(i).ravel()
  raveld[random_indices] = np.random.uniform(low = 0., high = 1., size = random_indices.size)
  raveld = raveld.reshape(i.shape)
  return raveld

In [None]:
#The function to generate x_i, x_p, x_j and x_n in each transformation.

def batch_transform(train_or_test, data_batch, batch_idx):
  batch_i, batch_p, batch_j, batch_n = [], [], [], []
  for i, i_idx in zip(data_batch, batch_idx):

    # -------------- swap only ---------------
    if portion_swap_pct != 0.0:
      p = swap_transform(train_or_test, i, i_idx, portion_swap_pct)
    
    # -------------- noise only ---------------
    elif noise_pct != 0.0:
      p = noisy_transform(i, noise_pct)


    j_idx = np.random.choice(np.arange(data_dict[train_or_test].shape[0]), 1)
    n_idx = np.random.choice(np.where(label_dict[train_or_test] != label_dict[train_or_test][j_idx])[0], 1)

    j, n = data_dict[train_or_test][j_idx][0], data_dict[train_or_test][n_idx][0]

    batch_i.append(np.expand_dims(cv2.resize(i, (w1, h2), interpolation = cv2.INTER_AREA), axis = -1))
    batch_p.append(np.expand_dims(cv2.resize(p, (w1, h2), interpolation = cv2.INTER_AREA), axis = -1))
    batch_j.append(np.expand_dims(cv2.resize(j, (w1, h2), interpolation = cv2.INTER_AREA), axis = -1))
    batch_n.append(np.expand_dims(cv2.resize(n, (w1, h2), interpolation = cv2.INTER_AREA), axis = -1))

  return np.array(batch_i), np.array(batch_p), np.array(batch_j), np.array(batch_n)

In [None]:
print(model.evaluate(test_dataset))
print(model_copy.evaluate(test_dataset))

In [None]:
#This dictionary is used for training and saving the results based on your chase of distance metric.
distance_type_dict = {'euclidean': 'Euclidean', 'mahalanobis': 'Mahalanobis'}

<h1>In the below cell there are the main hyperparameters of our model (<font color='red'>distance_type, portion_swap_pct, noise_pct, EPOCHS, BATCH_SIZE and m_squared</font>). Keep in mind that we have trained the encoder with the assumption that only one of the two preprocess (portion_swap_pct and noise_pct) options can be non-zero (from [0.05, 0.1, 0.15, 0.2]) and the other should be zero.</h1>

In [None]:
#Here please choose your required distance metrics: euclidean or mahalanobis.
# distance_type = distance_type_dict['euclidean']
distance_type = distance_type_dict['mahalanobis']

#The ratio of preprocess. One should be equal zero.
portion_swap_pct = 0.0
noise_pct = 0.1

#The number of epochs and the batch size for training the encoder.
EPOCHS = 100
BATCH_SIZE = 128

#choose the value of m_squared.
m_squared = 100
m = np.sqrt(m_squared).astype(np.float32)

#The directory to save the results for the encoder.
encoder_save_add = f'models/encoder_weights_{distance_type}_{int(100 * noise_pct)}pctNoise_{int(100 * portion_swap_pct)}pctSwap_msqr{m_squared}'

In [None]:
# Set the embedding dimensionality and learning rate.
embedding_dim = 128
learning_rate = 1e-3

# Set the optimizer, freeze the parameters for every layer except the embedding layer and then create the encoder model.
encoder_opt = Adam(learning_rate = learning_rate)

In [None]:
#set up the checkpoint manager to save the encoder later
checkpoint = Checkpoint(encoder_optimizer = encoder_opt, encoder = encoder)
manager = CheckpointManager(checkpoint=checkpoint, directory=encoder_save_add, max_to_keep=1)

#set up the summary wirters for training and testing to store the loss value in each batch for each epoch.
encoder_plot_add = f'plots/{distance_type}_{int(100 * noise_pct)}pctNoise_{int(100 * portion_swap_pct)}pctSwap_msqr{m_squared}'
encoder_train_summary_writer = tf.summary.create_file_writer(encoder_plot_add + "/logs/train_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
encoder_test_summary_writer = tf.summary.create_file_writer(encoder_plot_add + "/logs/test_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

In [None]:
# The funciton used during the training process. This funciton is called in each batch for each epoch. 
# Depending on the selected distance metric, the loss function is measured differently. The loss function is recorded using the summary writer.
# Using the gradient of the loss function with respect to the parameters of the encoder, we can optimize the same parameters.

@tf.function
def train_step(xi, xp, xj, xn, epoch):
  # ------------------- Euclidean Distance -------------------
  if distance_type == 'Euclidean':
    with GradientTape() as tape:
      ei = encoder(xi)
      ep = encoder(xp) 
      ej = encoder(xj)
      en = encoder(xn)

      loss_l2_ip = tf.reduce_mean(tf.square(ei - ep))
      loss_l2_jn = tf.reduce_mean(tf.maximum(0.0, tf.math.pow(m, 2) - tf.reduce_mean(tf.square(ej - en), axis = -1)))
      loss_l2_total = loss_l2_ip + loss_l2_jn
      
    gradients = tape.gradient(loss_l2_total, encoder.trainable_variables) 
    encoder_opt.apply_gradients(zip(gradients, encoder.trainable_variables))

    with encoder_train_summary_writer.as_default():
      tf.summary.scalar('l2_loss_ip', loss_l2_ip, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('l2_loss_jn', loss_l2_jn, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('l2_loss_total', loss_l2_total, step=tf.cast(epoch, 'int64'))

  # ------------------- Mahalanobis Distance -------------------
  elif distance_type == 'Mahalanobis':
    with GradientTape() as tape:
      ei = encoder(xi)
      ep = encoder(xp) 
      ej = encoder(xj)
      en = encoder(xn)

      ei = tf.expand_dims(ei, axis = 1)
      ep = tf.expand_dims(ep, axis = 1)
      ej = tf.expand_dims(ej, axis = 1)
      en = tf.expand_dims(en, axis = 1)


      cov_matrix_ip = tf.reduce_sum(tf.matmul(tf.math.conj(tf.transpose(ei - tf.reduce_mean(ei, axis = 0), perm = [0, 2, 1])), ep - tf.reduce_mean(ep, axis = 0)), axis = 0) * (1 / (ei.shape[0] - 1))
      cov_matrix_ip = tf.linalg.inv(cov_matrix_ip)
      loss_mahalanobis_ip = tf.reduce_mean(tf.squeeze(tf.matmul(tf.matmul(ei - ep, cov_matrix_ip), tf.math.conj(tf.transpose(ei - ep, perm=[0, 2, 1])))))
      
      cov_matrix_jn = tf.reduce_sum(tf.matmul(tf.math.conj(tf.transpose(ej - tf.reduce_mean(ej, axis = 0), perm = [0, 2, 1])), en - tf.reduce_mean(en, axis = 0)), axis = 0) * (1 / (ej.shape[0] - 1))
      cov_matrix_jn = tf.linalg.inv(cov_matrix_jn)
      loss_mahalanobis_jn = tf.matmul(tf.matmul(ej - en, cov_matrix_jn), tf.math.conj(tf.transpose(ej - en, perm=[0, 2, 1])))
      loss_mahalanobis_jn = tf.reduce_mean(tf.maximum(0.0, tf.math.pow(m, 2) - tf.squeeze(loss_mahalanobis_jn)))

      loss_mahalanobis_total = loss_mahalanobis_ip + loss_mahalanobis_jn

    gradients = tape.gradient(loss_mahalanobis_total, encoder.trainable_variables)
    encoder_opt.apply_gradients(zip(gradients, encoder.trainable_variables))

    with encoder_train_summary_writer.as_default():
      tf.summary.scalar('mahalanobis_loss_ip', loss_mahalanobis_ip, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('mahalanobis_loss_jn', loss_mahalanobis_jn, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('mahalanobis_loss_total', loss_mahalanobis_total, step=tf.cast(epoch, 'int64'))


In [None]:
# The funciton used during the testing process. This funciton is called in each batch for each epoch. 
# Depending on the selected distance metric, the loss function is measured differently. The loss function is recorded using the summary writer.

@tf.function
def test_step(xi, xp, xj, xn, epoch):
  # ------------ Euclidean Distance ------------
  if distance_type == 'Euclidean':  
    with GradientTape() as tape:
      ei = encoder(xi)
      ep = encoder(xp) 
      ej = encoder(xj)
      en = encoder(xn)

      loss_l2_ip = tf.reduce_mean(tf.square(ei - ep))
      loss_l2_jn = tf.reduce_mean(tf.maximum(0.0, tf.math.pow(m, 2) - tf.reduce_mean(tf.square(ej - en), axis = -1)))
      loss_l2_total = loss_l2_ip + loss_l2_jn

    with encoder_test_summary_writer.as_default():
      tf.summary.scalar('l2_loss_ip', loss_l2_ip, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('l2_loss_jn', loss_l2_jn, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('l2_loss_total', loss_l2_total, step=tf.cast(epoch, 'int64'))

  # ------------------- Mahalanobis Distance -------------------
  elif distance_type == 'Mahalanobis':  

    with GradientTape() as tape:
      ei = encoder(xi)
      ep = encoder(xp) 
      ej = encoder(xj)
      en = encoder(xn)

      ei = tf.expand_dims(ei, axis = 1)
      ep = tf.expand_dims(ep, axis = 1)
      ej = tf.expand_dims(ej, axis = 1)
      en = tf.expand_dims(en, axis = 1)

      cov_matrix_ip = tf.reduce_sum(tf.matmul(tf.math.conj(tf.transpose(ei - tf.reduce_mean(ei, axis = 0), perm = [0, 2, 1])), ep - tf.reduce_mean(ep, axis = 0)), axis = 0) * (1 / (ei.shape[0] - 1))
      cov_matrix_ip = tf.linalg.inv(cov_matrix_ip)
      loss_mahalanobis_ip = tf.reduce_mean(tf.squeeze(tf.matmul(tf.matmul(ei - ep, cov_matrix_ip), tf.math.conj(tf.transpose(ei - ep, perm=[0, 2, 1])))))

      cov_matrix_jn = tf.reduce_sum(tf.matmul(tf.math.conj(tf.transpose(ej - tf.reduce_mean(ej, axis = 0), perm = [0, 2, 1])), en - tf.reduce_mean(en, axis = 0)), axis = 0) * (1 / (ej.shape[0] - 1))
      cov_matrix_jn = tf.linalg.inv(cov_matrix_jn)
      loss_mahalanobis_jn = tf.matmul(tf.matmul(ej - en, cov_matrix_jn), tf.math.conj(tf.transpose(ej - en, perm=[0, 2, 1])))
      loss_mahalanobis_jn = tf.reduce_mean(tf.maximum(0.0, tf.math.pow(m, 2) - tf.squeeze(loss_mahalanobis_jn)))

      loss_mahalanobis_total = loss_mahalanobis_ip + loss_mahalanobis_jn

    with encoder_test_summary_writer.as_default():
      tf.summary.scalar('mahalanobis_loss_ip', loss_mahalanobis_ip, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('mahalanobis_loss_jn', loss_mahalanobis_jn, step=tf.cast(epoch, 'int64'))
      tf.summary.scalar('mahalanobis_loss_total', loss_mahalanobis_total, step=tf.cast(epoch, 'int64'))


In [None]:
# This function recieves a series of batches of (x_i, x_p, x_j, x_n)s and then call the train_step and test_step functions. This process is repeated for a set of epochs which is defined
# as argument epochs. At the end of each epoch, the dataset is shuffled.

def train_test(epochs):
  for epoch in tf.range(epochs):
    print(f'Epoch {epoch}')
    for batch in range(int(np.ceil(data_dict['train'].shape[0] / BATCH_SIZE))):
      batch_i, batch_p, batch_j, batch_n = None, None, None, None

      if (batch + 1) * BATCH_SIZE > data_dict['train'].shape[0] - 1:
        data_batch = data_dict['train'][batch * BATCH_SIZE:data_dict['train'].shape[0]]


        batch_i, batch_p, batch_j, batch_n = batch_transform('train', data_batch, range(batch * BATCH_SIZE, data_dict['train'].shape[0]))
      else:
        data_batch = data_dict['train'][batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
        batch_i, batch_p, batch_j, batch_n = batch_transform('train', data_batch, range(batch * BATCH_SIZE, (batch + 1) * BATCH_SIZE))
      train_step(batch_i, batch_p, batch_j, batch_n, epoch)


    for batch in range(int(np.ceil(data_dict['test'].shape[0] / BATCH_SIZE))):
      batch_i, batch_p, batch_j, batch_n = None, None, None, None

      if (batch + 1) * BATCH_SIZE > data_dict['test'].shape[0] - 1:
        data_batch = data_dict['test'][batch * BATCH_SIZE:data_dict['test'].shape[0]]

        batch_i, batch_p, batch_j, batch_n = batch_transform('test', data_batch, range(batch * BATCH_SIZE, data_dict['test'].shape[0]))
      else:
        data_batch = data_dict['test'][batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]
        batch_i, batch_p, batch_j, batch_n = batch_transform('test', data_batch, range(batch * BATCH_SIZE, (batch + 1) * BATCH_SIZE))
      test_step(batch_i, batch_p, batch_j, batch_n, epoch)

    idx = np.random.RandomState().permutation(data_dict['train'].shape[0])
    data_dict['train'], label_dict['train'], height_dict['train'] = data_dict['train'][idx], label_dict['train'][idx], height_dict['train'][idx]

    idx = np.random.RandomState().permutation(data_dict['test'].shape[0])
    data_dict['test'], label_dict['test'], height_dict['test'] = data_dict['test'][idx], label_dict['test'][idx], height_dict['test'][idx]
  

In [None]:
train_test(EPOCHS)

In [None]:
# Save the encoder model at the directory that was selected before.
manager.save()

In [None]:
# Show the training and testing loss functions with Tensorboard.
%reload_ext tensorboard
%tensorboard --logdir encoder_plot_add

In [None]:
train_data_resized = np.array([np.expand_dims(cv2.resize(d, (w1, h2), interpolation = cv2.INTER_AREA), axis = -1) for d in train_data])
test_data_resized = np.array([np.expand_dims(cv2.resize(d, (w1, h2), interpolation = cv2.INTER_AREA), axis = -1) for d in test_data])

train_dataset_noLabel = tf.data.Dataset.from_tensor_slices(train_data_resized).batch(150)
test_dataset_noLabel = tf.data.Dataset.from_tensor_slices(test_data_resized).batch(250)

In [None]:
import pandas as pd

# Adapted from the github (https://github.com/schoyc/blackbox-detection) of Chen, Steven, Nicholas Carlini, and David Wagner. \
# "Stateful detection of black-box adversarial attacks." Proceedings of the 1st ACM Workshop on Security and Privacy on Artificial Intelligence. 2020. 
# Function calculate_thresholds() returns the a pair of arrays. One for the K values and one for the thresholds. The threshold is dynamcily chosen for each K, such that
# 0.1% of the training data would be detected as adversarial.

def calculate_thresholds(training_data, K, encoder=lambda x: x, P=1000, up_to_K=False):
  data = np.concatenate([encoder(batch) for batch in training_data])
  
  distances = []
  for i in range(data.shape[0] // P):
    distance_mat = pairwise.pairwise_distances(data[i * P:(i+1) * P,:], Y=data)
    distance_mat = np.sort(distance_mat, axis=-1)
    distance_mat_K = distance_mat[:,:K]
    
    distances.append(distance_mat_K)
  distance_matrix = np.concatenate(distances, axis=0)
  
  start = 0 if up_to_K else K

  THRESHOLDS = []
  K_S = []
  for k in range(start, K + 1):
    dist_to_k_neighbors = distance_matrix[:,:k+1]
    avg_dist_to_k_neighbors = dist_to_k_neighbors.mean(axis=-1)
    
    threshold = np.percentile(avg_dist_to_k_neighbors, 0.1)
    
    K_S.append(k)
    THRESHOLDS.append(threshold)

  return K_S, THRESHOLDS

In [None]:
%matplotlib inline

K = 200
Ks, Thrs = calculate_thresholds(train_dataset_noLabel, K, encoder, up_to_K=True)

plt.figure(figsize=(20,5))
plt.plot(Ks, Thrs)
plt.xticks(range(0, K, 5))
plt.show()

In [None]:
from collections import OrderedDict

class Detector(object):

  def __init__(self, K, threshold=None, training_data=None, chunk_size=1000, weights_path=encoder_save_add):
    self.K = K
    self.threshold = threshold
    self.training_data = training_data

    if self.threshold is None and self.training_data is None:
      raise ValueError("Must provide explicit detection threshold or training data to calculate threshold!")

    self._init_encoder(weights_path)

    if self.training_data is not None:
      _, self.thresholds = calculate_thresholds(self.training_data, self.K, self.encode, up_to_K=False)
      self.threshold = self.thresholds[-1]

    self.num_queries = 0
    self.buffer = []
    self.memory = []
    self.chunk_size = chunk_size

    self.history = [] # Tracks number of queries (t) when attack was detected
    self.history_by_attack = []
    self.detected_dists = [] # Tracks knn-dist that was detected

  def _init_encoder(self, weights_path):
    self.encode = lambda x : encoder.predict(x)
    self.encoder = encoder

  def process(self, queries):
    queries = self.encode(queries)
    for query in queries:
      self.process_query(query)

  def process_query(self, query):
    if len(self.memory) == 0 and len(self.buffer) < self.K:
      self.buffer.append(query)
      self.num_queries += 1
      return False

    k = self.K
    all_dists = []

    if len(self.buffer) > 0:
      queries = np.stack(self.buffer, axis=0)
      dists = np.linalg.norm(queries - query, axis=-1)
      all_dists.append(dists)

    for queries in self.memory:
      dists = np.linalg.norm(queries - query, axis=-1)
      all_dists.append(dists)

    dists = np.concatenate(all_dists)
    k_nearest_dists = np.partition(dists, k - 1)[:k, None]
    k_avg_dist = np.mean(k_nearest_dists)

    self.buffer.append(query)
    self.num_queries += 1

    if len(self.buffer) >= self.chunk_size:
      self.memory.append(np.stack(self.buffer, axis=0))
      self.buffer = []

    is_attack = k_avg_dist < self.threshold
    if is_attack:
      self.history.append(self.num_queries)
      self.detected_dists.append(k_avg_dist)


  def clear_memory(self):
    self.buffer = []
    self.memory = []

  def get_detections(self):
    history = self.history
    epochs = []
    for i in range(len(history) - 1):
      epochs.append(history[i + 1] - history[i])

    return epochs

In [None]:
print(model.evaluate(test_dataset))
print(model_copy.evaluate(test_dataset))

In [None]:
#select train data, which have been correctly classified by the malware classifier.
train_model_pred = np.argmax(model.predict(train_dataset_noLabel), axis = -1)
right_train_data = np.delete(train_data_resized, np.where(train_model_pred != train_labels)[0], axis = 0)
right_train_labels = np.delete(train_labels, np.where(train_model_pred != train_labels)[0], axis = 0)
print('indices of removed train samples', np.where(train_model_pred != train_labels)[0])
model.evaluate(right_train_data, to_categorical(right_train_labels))

#select test data, which have been correctly classified by the malware classifier.
test_model_pred = np.argmax(model.predict(test_dataset_noLabel), axis = -1)
right_test_data = np.delete(test_data_resized, np.where(test_model_pred != test_labels)[0], axis = 0)
right_test_labels = np.delete(test_labels, np.where(test_model_pred != test_labels)[0], axis = 0)
print('indices of removed test samples', np.where(test_model_pred != test_labels)[0])
model.evaluate(right_test_data, to_categorical(right_test_labels))

In [None]:
# We use Foolbox to implement FGSM attacks. (https://github.com/bethgelab/foolbox)
import foolbox as fb

def generate_adv_samples(model, data, labels, num_sample, eps_range):
  #------------- FGSM ----------------
  random_idx = np.random.choice(np.arange(data.shape[0]), 1)[0]

  raw, clipped, is_adv = None, None, None
  fb_data = tf.constant(data[random_idx:random_idx + 1], dtype = tf.float32)
  fb_label = tf.constant(labels[random_idx:random_idx + 1])

  preprocessing = dict()
  bounds = (0, 1)
  fmodel = fb.TensorFlowModel(model, bounds=bounds, preprocessing=preprocessing)
  raw, clipped, is_adv = fb.attacks.FGSM()(fmodel, fb_data, fb_label, epsilons = np.arange(eps_range[0], eps_range[1], (eps_range[1] - eps_range[0]) / num_sample))


  return [c[0,...].numpy() for c in clipped], np.ones((num_sample)) * labels[random_idx]

In [None]:
#We measure True Positive, False Positive, True Negative, False Negative and accuracy using this function.
def perf_measure(y_actual, y_hat):
  TP, FP, TN, FN = 0, 0, 0, 0

  for i in range(len(y_hat)): 
    if y_actual[i]==y_hat[i]==1:
      TP += 1
    elif y_hat[i]==1 and y_actual[i]!=y_hat[i]:
      FP += 1
    elif y_actual[i]==y_hat[i]==0:
      TN += 1
    elif y_hat[i]==0 and y_actual[i]!=y_hat[i]:
      FN += 1

  accuracy = (TP + TN) / (TP + TN + FP + FN)

  return (TP, FP, TN, FN), accuracy

In [None]:
# Run adversarial attack for a specific number of rounds and return the average detection rate for the encoder 
# and the average misclassification rate caused by the attack on the malware classifier.

def report_attack_results(k, round_num, num_samples):  
  round_detections = []
  missclass = []

  for r_idx in range(round_num):
    detector = Detector(K=k, training_data=train_dataset_noLabel)

    benign_queries, benign_labels = train_data_resized, train_labels

    suspicious_queries, suspicious_labels = generate_adv_samples(model, right_train_data, right_train_labels, num_samples, [0.01, 0.3])

    suspicious_queries = np.array(suspicious_queries)

    detector.process(benign_queries)
    detections = detector.get_detections()

    detector.process(suspicious_queries)
    detections = detector.get_detections()
    np_hist = np.array(detector.history)
    suspicious_detection = np.where(np_hist > benign_queries.shape[0])[0].shape[0]

    round_detections.append(suspicious_detection/num_samples)

    wrong_pred = np.where(np.argmax(model.predict(suspicious_queries), axis = -1) != suspicious_labels)[0]

    x, y = perf_measure(suspicious_labels, np.argmax(model.predict(suspicious_queries), axis = -1))

    missclass.append(wrong_pred.shape[0]/num_samples)
    
  return np.array(round_detections).mean(), np.array(missclass).mean()

In [None]:
# Run report_attack_results function for a set of K values. 
# You can specify the value for number of generated samples (num_samples) and how many iteration to run the attack for each K (round_num)
print(f'======== Encoder with {distance_type} distance metric, {100*portion_swap_pct}% swapping, {100*noise_pct}% noise and m^2 = {m_squared} ==========')
num_samples = 1000
round_num = 10

detection_value, misclass_value = None, None

for k in range(5, 51, 3):
  print(f'----------- {k} ----------')
  detection_value, misclass_value = report_attack_results(k, round_num, num_samples)
  print('detection rate', detection_value, 'misclassification rate', misclass_value)