In [1]:
import pandas as pd
import numpy as np
import joblib
from keras.models import load_model

from adversarialdefence.mutation_testing import AdversarialDetectorThroughMutation
from adversarialdefence.model_mut_operators import ModelMutationOperators

from adversarialdefence.utils import ModelUtils, GeneralUtils

### ***Data Loading***

In [2]:
df = pd.read_csv('../csv/CICIDS2017_improved-preprocessed.csv')
df.drop(df.columns[0], axis=1, inplace=True)
df.shape

(1715326, 49)

In [3]:
df_benign = df[df['Label'] == 0]
df_anomalous = df[df['Label'] == 1]

print(f'Number of benign samples: {df_benign.shape[0]}')
print(f'Number of anomalous samples: {df_anomalous.shape[0]}')

Number of benign samples: 1432918
Number of anomalous samples: 282408


### ***Load Models and Scalers***

In [4]:
autoencoder = load_model('../modelli/autoencoder_best_weights_96-96.hdf5')
dnn = load_model('../modelli/DNN_best_weights_99.hdf5')







In [5]:
std_scaler_aut = joblib.load('../modelli/std_scaler_aut.bin')
std_scaler_dnn = joblib.load('../modelli/std_scaler_dnn.bin')
columns = df.copy().drop('Label', axis=1).columns

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


### ***Load WB adversarial samples***

In [6]:
base_advs_csv_path = '../csv/'

df_fgsm, X_fgsm, y_fgsm = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'fgsm.csv')
df_deepFool, X_deepFool, y_deepFool = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'deepFool.csv')
df_carlini, X_carlini, y_carlini = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'carlini.csv')

X_carlini.drop(X_carlini.columns[0], axis=1, inplace=True)
df_carlini.drop(df_carlini.columns[0], axis=1, inplace=True)

In [7]:
preds_fgsm_dnn = ModelUtils.binary_preds_supervised(dnn, X_fgsm)
preds_deepFool_dnn = ModelUtils.binary_preds_supervised(dnn, X_deepFool)
preds_carlini_dnn = ModelUtils.binary_preds_supervised(dnn, X_carlini)

preds_fgsm_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_fgsm)
preds_deepFool_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_deepFool)
preds_carlini_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_carlini)

In [8]:
advs_fgsm_dnn = GeneralUtils.get_advs_samples(preds_fgsm_dnn, df_fgsm)
advs_deepFool_dnn = GeneralUtils.get_advs_samples(preds_deepFool_dnn, df_deepFool)
advs_carlini_dnn = GeneralUtils.get_advs_samples(preds_carlini_dnn, df_carlini)

advs_fgsm_aut = GeneralUtils.get_advs_samples(preds_fgsm_aut, df_fgsm)
advs_deepFool_aut = GeneralUtils.get_advs_samples(preds_deepFool_aut, df_deepFool)
advs_carlini_aut = GeneralUtils.get_advs_samples(preds_carlini_aut, df_carlini)

In [9]:
df_advs_dnn = pd.concat([advs_fgsm_dnn, advs_deepFool_dnn, advs_carlini_dnn], ignore_index=True).sample(frac=1)
df_advs_aut = pd.concat([advs_fgsm_aut, advs_deepFool_aut, advs_carlini_aut], ignore_index=True).sample(frac=1)

print(f'Number of adversarial samples for dnn: {df_advs_dnn.shape[0]}')
print(f'Number of adversarial samples for aut: {df_advs_aut.shape[0]}')

Number of adversarial samples for dnn: 358351
Number of adversarial samples for aut: 270813


### ***Check Mutation Testing results***

In [10]:
X_benign = df_benign.sample(n=1000)
y_benign = X_benign.pop('Label')

X_benign_dnn = std_scaler_dnn.transform(X_benign)
X_benign_aut = std_scaler_aut.transform(X_benign)

In [11]:
N_WB_ADVS_SAMPLES = 1000
wb_attacks_dnn_dict = {
    'FGSM': advs_fgsm_dnn.sample(n=N_WB_ADVS_SAMPLES).drop('Label', axis=1), 
    'DEEPFOOL': advs_deepFool_dnn.sample(n=N_WB_ADVS_SAMPLES).drop('Label', axis=1), 
    'CARLINI': advs_carlini_dnn.sample(n=N_WB_ADVS_SAMPLES).drop('Label', axis=1), 
    'BENIGN': X_benign_dnn
}

wb_attacks_aut_dict = {
    'FGSM': advs_fgsm_aut.sample(n=N_WB_ADVS_SAMPLES).drop('Label', axis=1), 
    'DEEPFOOL': advs_deepFool_aut.sample(n=N_WB_ADVS_SAMPLES).drop('Label', axis=1), 
    'CARLINI': advs_carlini_aut.sample(n=N_WB_ADVS_SAMPLES).drop('Label', axis=1), 
    'BENIGN': X_benign_aut
}

In [12]:
def get_mutated_models(orig_model, num_mutation, mutation_ratio, mutation_operator):
        mutated_models = []
        for i in range(0, num_mutation):
            if mutation_operator == 'NAI':
                mutated_model = ModelMutationOperators.NAI_mut(orig_model, mutation_ratio)
            elif mutation_operator == 'GF':
                mutated_model = ModelMutationOperators.GF_mut(orig_model, mutation_ratio)
            elif mutation_operator == 'WS':
                mutated_model = ModelMutationOperators.WS_mut(orig_model, mutation_ratio)
            else:
                raise Exception('Mutation operator not valid. Choose between [NAI, GF, WS]')
            mutated_models.append(mutated_model)

        return mutated_models

def get_LCR_metrics(X, mutated_models, orig_model, model_type):
    LCR_list = [None] * X.shape[0]
    num_deflected_models = np.zeros(X.shape[0])

    for mutated_model in mutated_models:
        if model_type == 'SUPERVISED':
            orig_labels = ModelUtils.binary_preds_supervised(orig_model, X)
            mutated_labels = ModelUtils.binary_preds_supervised(mutated_model, X)
        else:
            orig_labels = ModelUtils.binary_preds_unsupervised(orig_model, X)
            mutated_labels = ModelUtils.binary_preds_unsupervised(mutated_model, X)

        for i in range(0, len(orig_labels)):
            if orig_labels[i] != mutated_labels[i]:
                num_deflected_models[i] = num_deflected_models[i] + 1
    
        for i in range(0, len(LCR_list)):
            LCR_list[i] = num_deflected_models[i] / len(mutated_models)
    
    return sum(LCR_list) / len(LCR_list), max(LCR_list)

In [35]:
mut_rates_dnn = [0.25, 0.3, 0.4]
mut_operators = ['GF', 'NAI', 'WS']

print('Mutation testing on DNN\n')
for mut_operator in mut_operators:
    for mut_rate in mut_rates_dnn:
        mutated_models = get_mutated_models(dnn, 200, mut_rate, mut_operator)
        print(f'Mutation operator: {mut_operator}; mutation rate: {mut_rate}')
        for attack in wb_attacks_dnn_dict:
            LCR_avg = get_LCR_metrics(wb_attacks_dnn_dict[attack], mutated_models, dnn, 'SUPERVISED')
            print(f'  - LCR_avg on {attack} adversarial samples: {LCR_avg}')

Mutation testing on DNN

Mutation operator: GF; mutation rate: 0.25
  - LCR_avg on FGSM adversarial samples: 0.12895000000000015
  - LCR_avg on DEEPFOOL adversarial samples: 0.0007849999999999999
  - LCR_avg on CARLINI adversarial samples: 0.09023500000000001
  - LCR_avg on non adversarial samples: 0.0014399999999999997
Mutation operator: GF; mutation rate: 0.3
  - LCR_avg on FGSM adversarial samples: 0.1292100000000001
  - LCR_avg on DEEPFOOL adversarial samples: 0.0008100000000000001
  - LCR_avg on CARLINI adversarial samples: 0.08964000000000001
  - LCR_avg on non adversarial samples: 0.0016299999999999995
Mutation operator: GF; mutation rate: 0.4
  - LCR_avg on FGSM adversarial samples: 0.14793999999999985
  - LCR_avg on DEEPFOOL adversarial samples: 0.000885
  - LCR_avg on CARLINI adversarial samples: 0.10297999999999985
  - LCR_avg on non adversarial samples: 0.0018849999999999995
Mutation operator: NAI; mutation rate: 0.25
  - LCR_avg on FGSM adversarial samples: 0.1843249999999

In [36]:
mut_rates_aut = [0.025, 0.03, 0.04]
print('Mutation testing on AUTOENCODER')
for mut_operator in mut_operators:
    for mut_rate in mut_rates_aut:
        mutated_models = get_mutated_models(autoencoder, 200, mut_rate, mut_operator)
        print(f'Mutation operator: {mut_operator}; mutation rate: {mut_rate}')
        for attack in wb_attacks_aut_dict:
            LCR_avg = get_LCR_metrics(wb_attacks_aut_dict[attack], mutated_models, autoencoder, 'UNSUPERVISED')
            print(f'  - LCR_avg on {attack} adversarial samples: {LCR_avg}')

Mutation testing on AUTOENCODER
Mutation operator: GF; mutation rate: 0.025
  - LCR_avg on FGSM adversarial samples: 0.9980750000000012
  - LCR_avg on DEEPFOOL adversarial samples: 0.987200000000004
  - LCR_avg on CARLINI adversarial samples: 0.9728700000000013
  - LCR_avg on non adversarial samples: 0.4132800000000009
Mutation operator: GF; mutation rate: 0.03
  - LCR_avg on FGSM adversarial samples: 0.9992400000000005
  - LCR_avg on DEEPFOOL adversarial samples: 0.9954000000000031
  - LCR_avg on CARLINI adversarial samples: 0.9851950000000013
  - LCR_avg on non adversarial samples: 0.44126500000000074
Mutation operator: GF; mutation rate: 0.04
  - LCR_avg on FGSM adversarial samples: 0.999895
  - LCR_avg on DEEPFOOL adversarial samples: 0.9999250000000001
  - LCR_avg on CARLINI adversarial samples: 0.9975950000000007
  - LCR_avg on non adversarial samples: 0.5256499999999995
Mutation operator: NAI; mutation rate: 0.025
  - LCR_avg on FGSM adversarial samples: 0.9967650000000026
  - L

### ***Detection on WB adversarial samples***

In [12]:
MUTATED_MODELS_BASE_PATH = '../modelli/mutation/'
adv_detector_dnn = AdversarialDetectorThroughMutation(dnn, 'DNN', 'SUPERVISED', 0.5, MUTATED_MODELS_BASE_PATH)
adv_detector_aut = AdversarialDetectorThroughMutation(autoencoder, 'AUT', 'UNSUPERVISED', 0.05, MUTATED_MODELS_BASE_PATH)

In [37]:
LCR_th_dnn = adv_detector_dnn.fit(X_benign_dnn, 500, 0.3)
LCR_th_aut = adv_detector_aut.fit(X_benign_aut, 500, 0.006)

  saving_api.save_model(


In [13]:
def perform_detection(X, detector, max_iter, detection_sensibility, attack, n_step):
    num_detected_list = []
    for i in range(0, n_step):
        detect_status = detector.detect(X, max_iter, detection_sensibility)
        num_detected_list.append(len([d for d in detect_status if d == True]))
    
    avg_detected = sum(num_detected_list) / len(num_detected_list)
    print(f'Average number of samples detected as adversarial on 5 attempts for {attack}: {avg_detected} / {X.shape[0]} - Ratio: {(avg_detected/X.shape[0]) * 100}')

In [42]:
for attack in wb_attacks_dnn_dict:
    X_advs = wb_attacks_dnn_dict[attack]
    perform_detection(X_advs, adv_detector_dnn, 200, 0.2, attack, 5)

Average number of samples detected as adversarial on 5 attempts for FGSM: 909.6 / 1000 - Ratio: 90.96000000000001
Average number of samples detected as adversarial on 5 attempts for DEEPFOOL: 267.6 / 1000 - Ratio: 26.76
Average number of samples detected as adversarial on 5 attempts for CARLINI: 913.0 / 1000 - Ratio: 91.3
Average number of samples detected as adversarial on 5 attempts for BENIGN: 123.6 / 1000 - Ratio: 12.36


In [43]:
for attack in wb_attacks_aut_dict:
    X_advs = wb_attacks_aut_dict[attack]
    perform_detection(X_advs, adv_detector_aut, 200, 0.7, attack, 5)

Average number of samples detected as adversarial on 5 attempts for FGSM: 874.0 / 1000 - Ratio: 87.4
Average number of samples detected as adversarial on 5 attempts for DEEPFOOL: 631.2 / 1000 - Ratio: 63.12000000000001
Average number of samples detected as adversarial on 5 attempts for CARLINI: 716.4 / 1000 - Ratio: 71.63999999999999
Average number of samples detected as adversarial on 5 attempts for BENIGN: 204.6 / 1000 - Ratio: 20.46


### ***Detection on BB Adversarial samples***

In [14]:
df_hsj, X_hsj, y_hsj = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'hsj_new.csv')
df_zoo, X_zoo, y_zoo = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'zoo_new.csv')
df_boundary, X_boundary, y_boundary = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'boundary_new.csv')
df_query_eff, X_query_eff, y_query_eff = GeneralUtils.get_data_with_advs(base_advs_csv_path + 'query_eff_new.csv')

In [15]:
preds_hsj_dnn = ModelUtils.binary_preds_supervised(dnn, X_hsj)
preds_zoo_dnn = ModelUtils.binary_preds_supervised(dnn, X_zoo)
preds_boundary_dnn = ModelUtils.binary_preds_supervised(dnn, X_boundary)
preds_query_eff_dnn = ModelUtils.binary_preds_supervised(dnn, X_query_eff)

preds_hsj_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_hsj)
preds_zoo_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_zoo)
preds_boundary_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_boundary)
preds_query_eff_aut = ModelUtils.binary_preds_unsupervised(autoencoder, X_query_eff)

In [16]:
advs_hsj_dnn = GeneralUtils.get_advs_samples(preds_hsj_dnn, df_hsj)
advs_zoo_dnn = GeneralUtils.get_advs_samples(preds_zoo_dnn, df_zoo)
advs_boundary_dnn = GeneralUtils.get_advs_samples(preds_boundary_dnn, df_boundary)
advs_query_eff_dnn = GeneralUtils.get_advs_samples(preds_query_eff_dnn, df_query_eff)

advs_hsj_aut = GeneralUtils.get_advs_samples(preds_hsj_aut, df_hsj)
advs_zoo_aut = GeneralUtils.get_advs_samples(preds_zoo_aut, df_zoo)
advs_boundary_aut = GeneralUtils.get_advs_samples(preds_boundary_aut, df_boundary)
advs_query_eff_aut = GeneralUtils.get_advs_samples(preds_query_eff_aut, df_query_eff)

In [17]:
df_advs_BB_dnn = pd.concat([advs_hsj_dnn, advs_zoo_dnn, advs_boundary_dnn, advs_query_eff_dnn], ignore_index=True).sample(frac=1)
df_advs_BB_aut = pd.concat([advs_hsj_aut, advs_zoo_aut, advs_boundary_aut, advs_query_eff_aut], ignore_index=True).sample(frac=1)

print(f'Number of adversarial samples BB for dnn: {df_advs_BB_dnn.shape[0]}')
print(f'Number of adversarial samples BB for aut: {df_advs_BB_aut.shape[0]}')

Number of adversarial samples BB for dnn: 127254
Number of adversarial samples BB for aut: 83283


In [18]:
X_advs_BB_aut = df_advs_BB_aut.copy()
y_advs_BB_aut = X_advs_BB_aut.pop('Label')

X_advs_BB_dnn = df_advs_BB_dnn.copy()
y_advs_BB_dnn = X_advs_BB_dnn.pop('Label')

X_benign = df_benign.sample(n=1000)
y_benign = X_benign.pop('Label')

X_benign_dnn = std_scaler_dnn.transform(X_benign)
X_benign_aut = std_scaler_aut.transform(X_benign)

In [19]:
bb_attacks_dnn_dict = {
    'HOPSKIPJUMP': advs_hsj_dnn.sample(n=1000).drop('Label', axis=1), 
    'BOUNDARY': advs_boundary_dnn.sample(n=1000).drop('Label', axis=1), 
    'ZOO': advs_zoo_dnn.sample(n=1000).drop('Label', axis=1), 
    'QUERY_EFF': advs_query_eff_dnn.sample(n=1000).drop('Label', axis=1)
}

bb_attacks_aut_dict = {
    'HOPSKIPJUMP': advs_hsj_aut.sample(n=1000).drop('Label', axis=1), 
    'BOUNDARY': advs_boundary_aut.drop('Label', axis=1), 
    'ZOO': advs_zoo_aut.sample(n=1000).drop('Label', axis=1), 
    'QUERY_EFF': advs_query_eff_aut.sample(n=1000).drop('Label', axis=1)
}

In [51]:
for attack in bb_attacks_dnn_dict:
    X_advs = bb_attacks_dnn_dict[attack]
    perform_detection(X_advs, adv_detector_dnn, 200, 0.2, attack, 5)

Average number of samples detected as adversarial on 5 attempts for HOPSKIPJUMP: 993.0 / 1000 - Ratio: 99.3
Average number of samples detected as adversarial on 5 attempts for BOUNDARY: 985.8 / 1000 - Ratio: 98.58
Average number of samples detected as adversarial on 5 attempts for ZOO: 776.6 / 1000 - Ratio: 77.66000000000001
Average number of samples detected as adversarial on 5 attempts for QUERY_EFF: 112.8 / 1000 - Ratio: 11.28


In [55]:
for attack in bb_attacks_aut_dict:
    X_advs = bb_attacks_aut_dict[attack]
    perform_detection(X_advs, adv_detector_aut, 200, 0.7, attack, 5)

Average number of samples detected as adversarial on 5 attempts for HOPSKIPJUMP: 851.2 / 1000 - Ratio: 85.12
Average number of samples detected as adversarial on 5 attempts for BOUNDARY: 66.4 / 191 - Ratio: 34.76439790575917
Average number of samples detected as adversarial on 5 attempts for ZOO: 681.4 / 1000 - Ratio: 68.14
Average number of samples detected as adversarial on 5 attempts for QUERY_EFF: 680.2 / 1000 - Ratio: 68.02


### ***Attempt 2***

In [None]:
LCR_th_dnn = adv_detector_dnn.fit(X_benign_dnn, 500, 0.1)

In [20]:
for attack in wb_attacks_dnn_dict:
    X_advs = wb_attacks_dnn_dict[attack]
    perform_detection(X_advs, adv_detector_dnn, 200, 0.02, attack, 5)

Average number of samples detected as adversarial on 5 attempts for FGSM: 814.0 / 1000 - Ratio: 81.39999999999999
Average number of samples detected as adversarial on 5 attempts for DEEPFOOL: 20.8 / 1000 - Ratio: 2.08
Average number of samples detected as adversarial on 5 attempts for CARLINI: 702.8 / 1000 - Ratio: 70.28
Average number of samples detected as adversarial on 5 attempts for BENIGN: 24.4 / 1000 - Ratio: 2.44


In [21]:
for attack in bb_attacks_dnn_dict:
    X_advs = bb_attacks_dnn_dict[attack]
    perform_detection(X_advs, adv_detector_dnn, 200, 0.02, attack, 5)

Average number of samples detected as adversarial on 5 attempts for HOPSKIPJUMP: 989.0 / 1000 - Ratio: 98.9
Average number of samples detected as adversarial on 5 attempts for BOUNDARY: 380.8 / 1000 - Ratio: 38.080000000000005
Average number of samples detected as adversarial on 5 attempts for ZOO: 522.8 / 1000 - Ratio: 52.279999999999994
Average number of samples detected as adversarial on 5 attempts for QUERY_EFF: 6.0 / 1000 - Ratio: 0.6
