In [None]:
## Import Libraries
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import torch
from collections import Counter
from tensorflow.keras import Sequential,layers, losses, optimizers
from tensorflow.keras.layers import Dense
from keras.layers import LeakyReLU

In [None]:
## Loading adversarial training data
df_train = pd.read_csv("Novelty_Detector_Training_Data.csv")

In [None]:
df_train = df_train.loc[:, ~df_train.columns.str.contains('^Unnamed')]

In [None]:
adv_data = df_train.iloc[:,0:500]

In [None]:
## AE model architecture
tf.random.set_seed(2)
input_dims = 500
model = Sequential()
model.add(Dense(input_dims, input_shape = (input_dims, )))
model.add(Dense(400))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(300))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(input_dims // 2, activation='relu'))
model.add(Dense(input_dims // 4, activation='relu'))
model.add(Dense(input_dims // 8, activation='relu'))
model.add(Dense(input_dims // 4, activation='relu'))
model.add(Dense(input_dims // 2))
model.add(Dense(300))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(400))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(input_dims, activation='relu'))

In [None]:
## Compilation and training
model.compile(optimizer='adam', loss='mae')
model.fit(adv_data , adv_data , batch_size=256, epochs=500, shuffle=True)

In [None]:
## Saving the stage-3 base line model
model.save('Novelty_detector_baseline.h5')

In [None]:
## Loading the stage-3 adversarial model
from tensorflow.keras.models import load_model

model = load_model('Novelty_detector_baseline.h5', compile = False)

In [None]:
## pbRe metric
def recon_metrices(data, reconstructed_data):

    maes = np.absolute(data.values - reconstructed_data)
    
    for j in range(len(maes)):
        m = maes[j]
        d = data.values[j]
        r = reconstructed_data[j]
        for el in range(len(m)):
            if r[el] == 0 or d[el] == 0:
                m[el] = 0
 
            
    mae_byte_list = []
    ip_mae_list = []
    tcp_header_mae_list = []
    tcp_options_mae_list = []
    tcp_segment_mae_list = []
    i=0
    for mae in maes:
#         print(np.count_nonzero(reconstructed_data[i]))
#         print(np.count_nonzero(data.values[i]))
#         print(np.max(np.count_nonzero(reconstructed_data[i]),np.count_nonzero(data.values[i])))
        mae_byte = sum(mae) / np.count_nonzero(mae)
        mae_byte_list.append(mae_byte)
        i += 1
    return mae_byte_list

In [None]:
## Constructing threshold on test adversarial data

In [None]:
recon_adv = model.predict(adv_data)

In [None]:
mae_byte_list = recon_metrices(adv_data,recon_adv)

In [None]:
## Thresholds for 99,95 and 90 percentile values
mae_byte_avg = sum(mae_byte_list) / len(mae_byte_list)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list, 99)
print('mae_byte_99_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list, 95)
print('mae_byte_95_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list, 90)
print('mae_byte_90_percentile', mae_byte_per_95)

In [None]:
## Loading the test dataset
df_test = pd.read_csv("Adv_all_DNN_truerandom.csv")

In [None]:
df_test = df_test.loc[:, ~df_test.columns.str.contains('^Unnamed')]

In [None]:
## Testing the baseline model against different adversarial samples and CICIDS2018 data 
##that leaked from the previous stages

In [None]:
adv_data_test = df_test.iloc[:, 0:500]

In [None]:
recon_adv_test = model.predict(adv_data_test)

In [None]:
mae_byte_list_adv = recon_metrices(adv_data_test,recon_adv_test)

In [None]:
mae_byte_avg = sum(mae_byte_list_adv) / len(mae_byte_list_adv)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list_adv, 99)
print('mae_byte_99_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list_adv, 95)
print('mae_byte_95_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list_adv, 90)
print('mae_byte_90_percentile', mae_byte_per_95)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_adv if i > 0.09])
print(count_99 / len(mae_byte_list_adv))

In [None]:
df_test

In [None]:
adv_all = pd.read_csv("adv_examples_all_allDNN_bigger.csv")

In [None]:
adv_data_RL = adv_all.iloc[:,0:500]

In [None]:
recon_adv_RL = model.predict(adv_data_RL)

In [None]:
mae_byte_list_adv_RL = recon_metrices(adv_data_RL,recon_adv_RL)

In [None]:
mae_byte_avg = sum(mae_byte_list_adv_RL) / len(mae_byte_list_adv_RL)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list_adv_RL, 99)
print('mae_byte_99_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list_adv_RL, 95)
print('mae_byte_95_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list_adv_RL, 90)
print('mae_byte_90_percentile', mae_byte_per_95)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_adv_RL if i > 0.07])
print(count_99 / len(mae_byte_list_adv_RL))

In [None]:
df_2018 = pd.read_csv("all_group_train_normalized_2018.csv")

In [None]:
infiltration_attack = df_2018[df_2018['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
print(infiltration_attack.shape)
# portscan_attack = df_2018[df_2018['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
# print(portscan_attack.shape)
ddos_attack = df_2018[df_2018['Label'] == 'ddos'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
print(ddos_attack.shape)
bruteforce_attack = df_2018[df_2018['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
print(bruteforce_attack.shape)
dos_attack = df_2018[df_2018['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
print(dos_attack.shape)
web_attack = df_2018[df_2018['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
print(web_attack.shape)
benign_data = df_2018[df_2018['Label'] == 'Benign'].drop(['Label_binary','Label'], axis=1).iloc[:,0:500]
print(benign_data.shape)

In [None]:
recon_inf = model.predict(infiltration_attack)
recon_dos = model.predict(dos_attack)
recon_ddos = model.predict(ddos_attack)
recon_web = model.predict(web_attack)
recon_brute = model.predict(bruteforce_attack)
recon_ben = model.predict(benign_data)

In [None]:
mae_byte_list = recon_metrices(benign_data,recon_ben)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list if i > 0.09])
print(count_99 / len(mae_byte_list))