In [None]:
## Importing Libraries
from sklearn.neural_network import MLPClassifier
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pickle
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import joblib
import pickle
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from tensorflow.keras import Sequential,layers, losses, optimizers
from tensorflow.keras.layers import Dense
import tensorflow as tf
import time

In [None]:
## Load CICIDS 2017 training dataset
df_train = pd.read_csv("Malicious_Packet_Detector_Training_Data.csv")

In [None]:
## Loading CICIDS217 test dataset
df_test = pd.read_csv("Malicious_Packet_Detector_Training_Data.csv")

In [None]:
## Subsetting attack packets from CICIDS 2017 dataset
infiltration_attack = df_test[df_test['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(infiltration_attack.shape)
portscan_attack = df_test[df_test['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(portscan_attack.shape)
ddos_attack = df_test[df_test['Label'] == 'DDoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(ddos_attack.shape)
bruteforce_attack = df_test[df_test['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(bruteforce_attack.shape)
dos_attack = df_test[df_test['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(dos_attack.shape)
web_attack = df_test[df_test['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(web_attack.shape)
benign_data = df_test[df_test['Label'] == 'Benign'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(benign_data.shape)

In [None]:
## Loading CICIDS 2018 training data
df_test_2018 = pd.read_csv("Malicious_Packet_Detector_Testing_Data_2018.csv")

In [None]:
df_test_2018 = df_test_2018.sample(frac = 1).reset_index(drop = True)

In [None]:
X_train_2018 = df_test_2018.iloc[:,0:1525]
Y_train_2018 = df_test_2018.iloc[:,1526:1527]

In [None]:
X_train_2017 = df_test.iloc[:,0:1525]
Y_train_2017 = df_test.iloc[:,1526:1527]

In [None]:
Y_train_2017.loc[Y_train_2017['Label_binary'] == 'Benign', 'Label_binary'] = 0
Y_train_2017.loc[Y_train_2017['Label_binary'] == 'Malicious', 'Label_binary'] = 1

In [None]:
Y_train_2018.loc[Y_train_2018['Label_binary'] == 'Benign', 'Label_binary'] = 0
Y_train_2018.loc[Y_train_2018['Label_binary'] == 'Malicious', 'Label_binary'] = 1

In [None]:
X_train_total = pd.concat([X_train_2017,X_train_2018])
Y_train_total = pd.concat([Y_train_2017,Y_train_2018])

In [None]:
X_train_ = X_train_total.values.astype('float32')
Y_train_ = Y_train_total.values.astype('float32')

In [None]:
## Subsetting atatck apckets from CICIDS 2018 dataset
infiltration_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(infiltration_attack_2018.shape)
portscan_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(portscan_attack_2018.shape)
ddos_attack_2018 = df_test_2018[df_test_2018['Label'] == 'ddos'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(ddos_attack_2018.shape)
bruteforce_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(bruteforce_attack_2018.shape)
dos_attack_2018 = df_test_2018[df_test_2018['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(dos_attack_2018.shape)
web_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(web_attack_2018.shape)
benign_data_2018 = df_test_2018[df_test_2018['Label'] == 'Benign'].drop(['Label_binary','Label'], axis=1).iloc[:,0:200]
print(benign_data_2018.shape)

In [None]:
benign_data_total = pd.concat([benign_data, benign_data_2018])

In [None]:
## Loading the baseline stage 2 AE model
stage2_AE = tf.keras.models.load_model('Stage2_AE.h5')

In [None]:
## the pbRe reconstruction metric
def recon_metrices(data, reconstructed_data):

    maes = np.absolute(data.values - reconstructed_data)
    
    for j in range(len(maes)):
        m = maes[j]
        d = data.values[j]
        r = reconstructed_data[j]
        for el in range(len(m)):
            if r[el] == 0 or d[el] == 0:
                m[el] = 0
 
            
    mae_byte_list = []
    ip_mae_list = []
    tcp_header_mae_list = []
    tcp_options_mae_list = []
    tcp_segment_mae_list = []
    i=0
    for mae in maes:
#         print(np.count_nonzero(reconstructed_data[i]))
#         print(np.count_nonzero(data.values[i]))
#         print(np.max(np.count_nonzero(reconstructed_data[i]),np.count_nonzero(data.values[i])))
        mae_byte = sum(mae) / np.count_nonzero(mae)
        mae_byte_list.append(mae_byte)
        i += 1
    return mae_byte_list

In [None]:
inf = stage2_AE.predict(infiltration_attack.values.astype('float32'))

In [None]:
mae_byte_list = recon_metrices(infiltration_attack,inf)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list if i > 0.09])
print(count_99 / len(mae_byte_list))

In [None]:
### Retraining with CICIDS 2018 data

In [None]:
## Modifiable layers
mod_layers = [6,7,8,10,11,12]

In [None]:
## Modifiable neurons in modifiable layers
layer6_ind = [h for h in range(0,50,3)]
layer7_ind = [h for h in range(0,25,3)]
layer8_ind = [h for h in range(0,12,3)]
layer9_ind = []
layer10_ind = [h for h in range(0,11,3)]
layer11_ind = [h for h in range(0,25,3)]
layer12_ind = [h for h in range(0,50,3)]

In [None]:
## Initializing a new autoencoder model
retrained_stage2 = tf.keras.Sequential()

In [None]:
## Architecture of the retrained model
for i, layer in enumerate(stage2_AE.layers):
    if i not in mod_layers:
        layer.trainable = False
        retrained_stage2.add(layer)
    else:
        retrained_stage2.add(layer)
        if i == 6:
            for ind in range(0,50):
                if ind in layer6_ind:
                    retrained_stage2.layers[-1].kernel[ind]._trainable = False
            retrained_stage2.add(Dense(50, activation='relu', name = 'Trainable1'))
        if i == 7:
            for ind in range(0,25):
                if ind in layer7_ind:
                    retrained_stage2.layers[-1].kernel[ind]._trainable = False
                    
        if i == 8:
            for ind in range(0,12):
                if ind in layer8_ind:
                    retrained_stage2.layers[-1].kernel[ind]._trainable = False
        
        if i == 10:
            for ind in range(0,6):
                if ind in layer10_ind:
                    retrained_stage2.layers[-1].kernel[ind]._trainable = False
        
        if i == 11:
            for ind in range(0,12):
                if ind in layer11_ind:
                    retrained_stage2.layers[-1].kernel[ind]._trainable = False
        if i == 12:
            for ind in range(0,25):
                if ind in layer12_ind:
                    retrained_stage2.layers[-1].kernel[ind]._trainable = False
            retrained_stage2.add(Dense(50, activation='relu', name = 'Trainable2'))


In [None]:
retrained_stage2.summary()

In [None]:
## Compiling and Training
start_time = time.time()
retrained_stage2.compile(optimizer='adam', loss='mae')
retrained_stage2.fit(benign_data_total , benign_data_total , batch_size=256, epochs=500, shuffle=True)
end_time = time.time()

In [None]:
## Saving the retrained model
retrained_stage2.save('Retrained_Anomaly_Detector.h5')

In [None]:
## Loading the retrained model
from tensorflow.keras.models import load_model

retrained_stage2 = load_model('Retrained_Anomaly_Detector.h5')

In [None]:
time_taken = end_time - start_time
print(time_taken)

In [None]:
## CICIDS2017 and CICIDS2018 Testing retrained model

In [None]:
inf_ = retrained_stage2.predict(infiltration_attack)
port_ = retrained_stage2.predict(portscan_attack)
dos_ = retrained_stage2.predict(dos_attack)
ddos_ = retrained_stage2.predict(ddos_attack)
web_ = retrained_stage2.predict(web_attack)
brute_ = retrained_stage2.predict(bruteforce_attack)
ben_ = retrained_stage2.predict(benign_data)

In [None]:
mae_byte_list = recon_metrices(benign_data,ben_)

In [None]:
mae_byte_avg = sum(mae_byte_list) / len(mae_byte_list)
print('mae_byte_avg', mae_byte_avg)
mae_byte_per_95 = np.percentile(mae_byte_list, 99)
print('mae_byte_99_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list, 95)
print('mae_byte_95_percentile', mae_byte_per_95)
mae_byte_per_95 = np.percentile(mae_byte_list, 90)
print('mae_byte_90_percentile', mae_byte_per_95)

In [None]:
inf_2018 = retrained_stage2.predict(infiltration_attack_2018)
dos_2018 = retrained_stage2.predict(dos_attack_2018)
ddos_2018 = retrained_stage2.predict(ddos_attack_2018)
web_2018 = retrained_stage2.predict(web_attack_2018)
brute_2018 = retrained_stage2.predict(bruteforce_attack_2018)
ben_2018 = retrained_stage2.predict(benign_data_2018)

In [None]:
mae_byte_list = recon_metrices(benign_data_2018,ben_2018)

In [None]:
## Classification
count_99 = len([i for i in mae_byte_list if i > 0.22])
print(count_99 / len(mae_byte_list))

In [None]:
print('Infiltration')
cnt_inf, acc_inf = accuracy_calc(inf_2018)
print(cnt_inf)
print(acc_inf)
print('DoS')
cnt_dos, acc_dos = accuracy_calc(dos_2018)
print(cnt_dos)
print(acc_dos)
print('DDoS')
cnt_ddos, acc_ddos = accuracy_calc(ddos_2018)
print(cnt_ddos)
print(acc_ddos)
print('Web')
cnt_web, acc_web = accuracy_calc(web_2018)
print(cnt_web)
print(acc_web)
print('Brute')
cnt_brute, acc_brute = accuracy_calc(brute_2018)
print(cnt_brute)
print(acc_brute)
print('Benign')
cnt_ben, acc_ben = accuracy_calc(ben_2018)
print(cnt_ben)
print(1-acc_ben)

In [None]:
## CICIDS2017 Testing Retrained Model

In [None]:
inf = retrained_stage1.predict(infiltration_attack)
dos = retrained_stage1.predict(dos_attack)
ddos = retrained_stage1.predict(ddos_attack)
web = retrained_stage1.predict(web_attack)
brute = retrained_stage1.predict(bruteforce_attack)
ben = retrained_stage1.predict(benign_data)

In [None]:
print('Infiltration')
cnt_inf, acc_inf = accuracy_calc(inf)
print(cnt_inf)
print(acc_inf)
print('DoS')
cnt_dos, acc_dos = accuracy_calc(dos)
print(cnt_dos)
print(acc_dos)
print('DDoS')
cnt_ddos, acc_ddos = accuracy_calc(ddos)
print(cnt_ddos)
print(acc_ddos)
print('Web')
cnt_web, acc_web = accuracy_calc(web)
print(cnt_web)
print(acc_web)
print('Brute')
cnt_brute, acc_brute = accuracy_calc(brute)
print(cnt_brute)
print(acc_brute)
print('Benign')
cnt_ben, acc_ben = accuracy_calc(ben)
print(cnt_ben)
print(1-acc_ben)