In [None]:
## Import Libraries
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import torch
import seaborn as sns
from collections import Counter
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential,layers, losses, optimizers
from tensorflow.keras.layers import Dense

In [None]:
## Loading the CICIDS 2017 dataset
df_test1 = pd.read_csv("Anomaly_detector_testing_data.csv")

In [None]:
## Test benign data
df_test = df_test1[df_test1['Label'] == 'Benign']

In [None]:
df_test = df_test.iloc[:, 0:200]

In [None]:
## Loading benign data from CICIDS 2017 dataset
df1 = pd.read_pickle("Anomaly_detector_training_data.pkl")

In [None]:
## Normalizing the values
benign_data = df1.iloc[:,0:200]/255

In [None]:
benign_data

In [None]:
from keras.layers import LeakyReLU

In [None]:
## Architecture of the baseline model
tf.random.set_seed(2)
input_dims = 200
model = Sequential()
model.add(Dense(input_dims, input_shape = (input_dims, )))
model.add(Dense(150))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(150))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(input_dims // 2, activation='relu'))
model.add(Dense(input_dims // 4, activation='relu'))
model.add(Dense(input_dims // 8, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(input_dims // 8, activation='relu'))
model.add(Dense(input_dims // 4, activation='relu'))
model.add(Dense(input_dims // 2))
model.add(Dense(150))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(150))
model.add(LeakyReLU(alpha=0.05))
model.add(Dense(input_dims, activation='relu'))

In [None]:
## Compilation and fitting
model.compile(optimizer='adam', loss='mae')
model.fit(benign_data , benign_data , batch_size=256, epochs=500, shuffle=True)

In [None]:
## Saving the baseline autoencoder model
model.save('Anomaly_detector_baseline.h5')

In [None]:
## Loading the autoencoder baseline model
from tensorflow.keras.models import load_model

model = load_model('Anomaly_detector_baseline.h5', compile = False)

In [None]:
### Threshold Construction

In [None]:
## Pbre metric
def recon_metrices(data, reconstructed_data):

    maes = np.absolute(data.values - reconstructed_data)
    
    for j in range(len(maes)):
        m = maes[j]
        d = data.values[j]
        r = reconstructed_data[j]
        for el in range(len(m)):
            if r[el] == 0 or d[el] == 0:
                m[el] = 0
 
            
    mae_byte_list = []
    ip_mae_list = []
    tcp_header_mae_list = []
    tcp_options_mae_list = []
    tcp_segment_mae_list = []
    i=0
    for mae in maes:
#         print(np.count_nonzero(reconstructed_data[i]))
#         print(np.count_nonzero(data.values[i]))
#         print(np.max(np.count_nonzero(reconstructed_data[i]),np.count_nonzero(data.values[i])))
        mae_byte = sum(mae) / np.count_nonzero(mae)
        mae_byte_list.append(mae_byte)
        i += 1
    return mae_byte_list

In [None]:
df_test.columns = df1.columns[0:200]

In [None]:
recon_ben = model.predict(df_test)

In [None]:
mae_byte_list = recon_metrices(df_test,recon_ben)

In [None]:
## Thresholds on 99,95,90 percentile of benign data
mae_byte_avg = sum(mae_byte_list) / len(mae_byte_list)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list, 99)
print('mae_byte_99_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list, 95)
print('mae_byte_95_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list, 90)
print('mae_byte_90_percentile', mae_byte_per_95)

In [None]:
## Thresholds on 98 percentile of benign data
mae_byte_avg = sum(mae_byte_list) / len(mae_byte_list)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list, 98)

In [None]:
## CICIDS2017 Testing

In [None]:
test_ben = benign_data.sample(frac = 0.2, random_state = 2).reset_index(drop = True)

In [None]:
recon_ben = model.predict(test_ben)

In [None]:
mae_byte_list = recon_metrices(test_ben,recon_ben)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list if i > 0.089])
print(count_99 / len(mae_byte_list))

In [None]:
## Testing the Baseline autoencoder model against attack benign data from CICIDS 2018 and CICIDS 2017

In [None]:
infiltration_attack = df_test1[df_test1['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(infiltration_attack.shape)
portscan_attack = df_test1[df_test1['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(portscan_attack.shape)
ddos_attack = df_test1[df_test1['Label'] == 'DDoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(ddos_attack.shape)
bruteforce_attack = df_test1[df_test1['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(bruteforce_attack.shape)
dos_attack = df_test1[df_test1['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(dos_attack.shape)
web_attack = df_test1[df_test1['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(web_attack.shape)

In [None]:
inf_ = model.predict(infiltration_attack)
port_ = model.predict(portscan_attack)
dos_ = model.predict(dos_attack)
ddos_ = model.predict(ddos_attack)
brute_ = model.predict(bruteforce_attack)
web_ = model.predict(web_attack)

In [None]:
mae_byte_list_inf  = recon_metrices(web_attack,web_)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_inf if i > 0.09])
print(count_99 / len(mae_byte_list_inf))

In [None]:
## CICIDS 2018 Testing

In [None]:
df_test_2018 = pd.read_csv("all_group_train_normalized_2018.csv")

In [None]:
df_test_2018

In [None]:
infiltration_attack = df_test_2018[df_test_2018['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(infiltration_attack.shape)
portscan_attack = df_test_2018[df_test_2018['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(portscan_attack.shape)
ddos_attack = df_test_2018[df_test_2018['Label'] == 'ddos'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(ddos_attack.shape)
bruteforce_attack = df_test_2018[df_test_2018['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(bruteforce_attack.shape)
dos_attack = df_test_2018[df_test_2018['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(dos_attack.shape)
web_attack = df_test_2018[df_test_2018['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(web_attack.shape)
benign_2018 = df_test_2018[df_test_2018['Label'] == 'Benign'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(benign_2018.shape)

In [None]:
inf_ = model.predict(infiltration_attack)
dos_ = model.predict(dos_attack)
ddos_ = model.predict(ddos_attack)
brute_ = model.predict(bruteforce_attack)
web_ = model.predict(web_attack)
benign_ = model.predict(benign_2018)

In [None]:
mae_byte_list_inf  = recon_metrices(web_attack,web_)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_inf if i > 0.09])
print(count_99 / len(mae_byte_list_inf))

In [None]:
## Testing against different adversarial data

In [None]:
adv_all = pd.read_csv("adv_examples_all_allDNN_bigger.csv")

In [None]:
adv_random = pd.read_csv("Adv_all_DNN_truerandom.csv")

In [None]:
adv_random = adv_random.loc[:, ~adv_random.columns.str.contains('^Unnamed')]

In [None]:
adv_ = adv_random.iloc[:,0:200]/256

In [None]:
recon_adv = model.predict(adv_)

In [None]:
mae_byte_list_adv = recon_metrices(adv_,recon_adv)

In [None]:
mae_byte_avg = sum(mae_byte_list_adv) / len(mae_byte_list_adv)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list_adv, 99)
print('mae_byte_99_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list_adv, 95)
print('mae_byte_95_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list_adv, 90)
print('mae_byte_90_percentile', mae_byte_per_95)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_adv if i > 0.09])
print(count_99 / len(mae_byte_list_adv))

In [None]:
infiltration_attack = df_test1[df_test1['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(infiltration_attack.shape)
portscan_attack = df_test1[df_test1['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(portscan_attack.shape)
ddos_attack = df_test1[df_test1['Label'] == 'DDoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(ddos_attack.shape)
bruteforce_attack = df_test1[df_test1['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(bruteforce_attack.shape)
dos_attack = df_test1[df_test1['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(dos_attack.shape)
web_attack = df_test1[df_test1['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(web_attack.shape)

In [None]:
recon_inf = model.predict(infiltration_attack)
recon_port = model.predict(portscan_attack)
recon_dos = model.predict(dos_attack)
recon_ddos = model.predict(ddos_attack)

In [None]:
mae_byte_list_inf = recon_metrices(infiltration_attack,recon_inf)
mae_byte_list_port = recon_metrices(portscan_attack,recon_port)
mae_byte_list_dos = recon_metrices(dos_attack,recon_dos)
mae_byte_list_ddos = recon_metrices(ddos_attack,recon_ddos)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_inf if i > 0.13])
print(count_99 / len(mae_byte_list_inf))

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_port if i > 0.09])
print(count_99 / len(mae_byte_list_port))

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_dos if i > 0.09])
print(count_99 / len(mae_byte_list_dos))

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list_ddos if i > 0.09])
print(count_99 / len(mae_byte_list_ddos))