In [1]:
## Importing Libraries
import pandas as pd
import numpy as np
import joblib
import pickle
from sklearn.utils import shuffle
from tensorflow.keras import Sequential,layers, losses, optimizers
from tensorflow.keras.layers import Dense
import tensorflow as tf
from tensorflow.keras.models import load_model
import time

In [None]:
## Loading the CICIDS2017 training dataset
df_train = pd.read_csv("Malicious_Packet_Detector_Training_Data.csv")

In [None]:
## Loading the CICIDS2017 testing dataset
df_test = pd.read_csv("Malicious_Packet_Detector_Test_Data.csv")

In [None]:
## Extracting attacks from the testing data
infiltration_attack = df_test[df_test['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(infiltration_attack.shape)
portscan_attack = df_test[df_test['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(portscan_attack.shape)
ddos_attack = df_test[df_test['Label'] == 'DDoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(ddos_attack.shape)
bruteforce_attack = df_test[df_test['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(bruteforce_attack.shape)
dos_attack = df_test[df_test['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(dos_attack.shape)
web_attack = df_test[df_test['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(web_attack.shape)
benign_data = df_test[df_test['Label'] == 'Benign'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(benign_data.shape)

In [None]:
## Loading the CICIDS 2018 training data
df_test_2018 = pd.read_csv("Malicious_Packet_Detector_Test_Data_2018.csv")

In [None]:
df_test_2018 = df_test_2018.sample(frac = 1).reset_index(drop = True)

In [None]:
## Loading the botnet data from CICIDS2017 emulating Zero day attack
bot = pd.read_csv("botnet_train_normalized.csv")
bot_test = pd.read_csv("botnet_test_normalized.csv")

In [None]:
botnet = bot[bot['Label'] == 'Botnet'].drop(['Label'], axis=1).iloc[:,0:1525]
botnet_test = bot_test[bot_test['Label'] == 'Botnet'].drop(['Label'], axis=1).iloc[:,0:1525]

In [None]:
X_train_bot = bot.iloc[:,0:1525]
Y_train_bot = bot.iloc[:,1525:1526]

In [None]:
X_train_2018 = df_test_2018.iloc[:,0:1525]
Y_train_2018 = df_test_2018.iloc[:,1526:1527]

In [None]:
X_train_2017 = df_test.iloc[:,0:1525]
Y_train_2017 = df_test.iloc[:,1526:1527]

In [None]:
Y_train_bot.columns = ['Label_binary']

In [None]:
Y_train_bot.loc[Y_train_bot['Label_binary'] == 'Benign', 'Label_binary'] = 0
Y_train_bot.loc[Y_train_bot['Label_binary'] == 'Botnet', 'Label_binary'] = 1

In [None]:
Y_train_2017.loc[Y_train_2017['Label_binary'] == 'Benign', 'Label_binary'] = 0
Y_train_2017.loc[Y_train_2017['Label_binary'] == 'Malicious', 'Label_binary'] = 1

In [None]:
Y_train_2018.loc[Y_train_2018['Label_binary'] == 'Benign', 'Label_binary'] = 0
Y_train_2018.loc[Y_train_2018['Label_binary'] == 'Malicious', 'Label_binary'] = 1

In [None]:
X_train_total = pd.concat([X_train_2017,X_train_2018,X_train_bot,X_train_bot])
Y_train_total = pd.concat([Y_train_2017,Y_train_2018, Y_train_bot,Y_train_bot])

In [None]:
X_train_ = X_train_total.values.astype('float32')
Y_train_ = Y_train_total.values.astype('float32')

In [None]:
## Extracting attacks from the CICIDS2018 dataset
infiltration_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Infiltration'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(infiltration_attack_2018.shape)
portscan_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Port Scan'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(portscan_attack_2018.shape)
ddos_attack_2018 = df_test_2018[df_test_2018['Label'] == 'ddos'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(ddos_attack_2018.shape)
bruteforce_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Brute Force'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(bruteforce_attack_2018.shape)
dos_attack_2018 = df_test_2018[df_test_2018['Label'] == 'DoS'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(dos_attack_2018.shape)
web_attack_2018 = df_test_2018[df_test_2018['Label'] == 'Web Attack'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(web_attack_2018.shape)
benign_data_2018 = df_test_2018[df_test_2018['Label'] == 'Benign'].drop(['Label_binary','Label'], axis=1).iloc[:,0:1525]
print(benign_data_2018.shape)

In [2]:
## Prediction Function
def predictions(y_test):
    pred_list = []
    for i in y_test:
        if i > 0.5:
            pred = [1]
        else:
            pred = [0]

        pred_list.append(pred)
    return pred_list

In [3]:
## Calculates detection accuracy
def accuracy_calc(inf):
    inf_ = predictions(inf)
    cnt = 0
    for i in inf_:
        if i[0] != 1:
            cnt += 1
    acc = 1 - (cnt / len(inf))
    return cnt, acc

In [4]:
## Loading the Stage 1 baseline DNN
stage1_DNN = tf.keras.models.load_model('Malicious_Packet_Detector_Baseline.h5', compile = False)

In [None]:
## Testing the Baseline DNN against unseen CICIDS 2017 test data

In [23]:
inf = stage1_DNN.predict(infiltration_attack.values.astype('float32'))

In [29]:
cnt_inf, acc_inf = accuracy_calc(inf)
print(cnt_inf)
print(acc_inf)

27
0.9997628271009567


In [None]:
port = stage1_DNN.predict(portscan_attack.values.astype('float32'))

In [None]:
cnt_port, acc_port = accuracy_calc(port)
print(cnt_port)
print(acc_port)

In [None]:
dos = stage1_DNN.predict(dos_attack.values.astype('float32'))

In [None]:
cnt_dos, acc_dos = accuracy_calc(dos)
print(cnt_dos)
print(acc_dos)

In [None]:
ddos = stage1_DNN.predict(ddos_attack.values.astype('float32')) 

In [None]:
cnt_ddos, acc_ddos = accuracy_calc(ddos)
print(cnt_ddos)
print(acc_ddos)

In [None]:
web = stage1_DNN.predict(web_attack.values.astype('float32'))

In [None]:
cnt_web, acc_web = accuracy_calc(web)
print(cnt_web)
print(acc_web)

In [None]:
brute = stage1_DNN.predict(bruteforce_attack.values.astype('float32'))

In [None]:
cnt_brute, acc_brute = accuracy_calc(brute)
print(cnt_brute)
print(acc_brute)

In [None]:
ben = stage1_DNN.predict(benign_data.values.astype('float32'))

In [None]:
cnt_benign, acc_benign = accuracy_calc(ben)
print(cnt_benign)
print(1-acc_benign)

In [None]:
### Testing the Baseline DNN against CICIDS 2018 data

In [None]:
inf_2018 = stage1_DNN.predict(infiltration_attack_2018)
dos_2018 = stage1_DNN.predict(dos_attack_2018)
ddos_2018 = stage1_DNN.predict(ddos_attack_2018)
web_2018 = stage1_DNN.predict(web_attack_2018)
brute_2018 = stage1_DNN.predict(bruteforce_attack_2018)
ben_2018 = stage1_DNN.predict(benign_data_2018)

In [None]:
print('Infiltration')
cnt_inf, acc_inf = accuracy_calc(inf_2018)
print(cnt_inf)
print(acc_inf)
print('DoS')
cnt_dos, acc_dos = accuracy_calc(dos_2018)
print(cnt_dos)
print(acc_dos)
print('DDoS')
cnt_ddos, acc_ddos = accuracy_calc(ddos_2018)
print(cnt_ddos)
print(acc_ddos)
print('Web')
cnt_web, acc_web = accuracy_calc(web_2018)
print(cnt_web)
print(acc_web)
print('Brute')
cnt_brute, acc_brute = accuracy_calc(brute_2018)
print(cnt_brute)
print(acc_brute)
print('Benign')
cnt_ben, acc_ben = accuracy_calc(ben_2018)
print(cnt_ben)
print(1-acc_ben)

## Retraining With new data (CICIDS2018)

In [None]:
## Layers that we selected can be modified from the base DNN
mod_layers = [2,3]

In [None]:
## Neurons that are not trainable in the modifiable layers
layer2_ind = [h for h in range(0,64,3)]
layer3_ind = [h for h in range(0,32,3)]

In [None]:
## Initializing the new model
retrained_stage1 = tf.keras.Sequential()

In [None]:
## Setting up the architecture of the new retrained model
for i, layer in enumerate(stage1_DNN.layers):
    if i not in mod_layers:
        layer.trainable = False
        retrained_stage1.add(layer)
    else:
        retrained_stage1.add(layer)
        if i == 2:
            for ind in range(0,64):
                if ind in layer2_ind:
                    retrained_stage1.layers[-1].kernel[ind]._trainable = False
        if i == 3:
            for ind in range(0,32):
                if ind in layer3_ind:
                    retrained_stage1.layers[-1].kernel[ind]._trainable = False
            retrained_stage1.add(Dense(32, activation='relu'))
        


In [None]:
retrained_stage1.summary()

In [None]:
## Compilation and training of the new retrained model
start_time = time.time()
retrained_stage1.compile(optimizer='adam', loss= 'binary_crossentropy', metrics=['accuracy'])
retrained_stage1.fit(X_train_ , Y_train_, batch_size=256, epochs=50, shuffle=True)
end_time = time.time()

In [None]:
## Saving the new model
retrained_stage1.save('Retrained_Malicious_Packet_Detector.h5')

## Testing the retrained model against zero-day and CICIDS 2018 data

In [None]:
time_taken = end_time - start_time
print(time_taken)

In [None]:
bot_ = retrained_stage1.predict(botnet_test.values.astype('float32'))

In [None]:
cnt_bot, acc_bot = accuracy_calc(bot_)
print(cnt_bot)
print(acc_bot)

In [None]:
### CICIDS2018 Testing with retrained model

In [None]:
inf_2018 = retrained_stage1.predict(infiltration_attack_2018)
dos_2018 = retrained_stage1.predict(dos_attack_2018)
ddos_2018 = retrained_stage1.predict(ddos_attack_2018)
web_2018 = retrained_stage1.predict(web_attack_2018)
brute_2018 = retrained_stage1.predict(bruteforce_attack_2018)
ben_2018 = retrained_stage1.predict(benign_data_2018)

In [None]:
print('Infiltration')
cnt_inf, acc_inf = accuracy_calc(inf_2018)
print(cnt_inf)
print(acc_inf)
print('DoS')
cnt_dos, acc_dos = accuracy_calc(dos_2018)
print(cnt_dos)
print(acc_dos)
print('DDoS')
cnt_ddos, acc_ddos = accuracy_calc(ddos_2018)
print(cnt_ddos)
print(acc_ddos)
print('Web')
cnt_web, acc_web = accuracy_calc(web_2018)
print(cnt_web)
print(acc_web)
print('Brute')
cnt_brute, acc_brute = accuracy_calc(brute_2018)
print(cnt_brute)
print(acc_brute)
print('Benign')
cnt_ben, acc_ben = accuracy_calc(ben_2018)
print(cnt_ben)
print(1-acc_ben)

In [None]:
## CICIDS2017 Testing

In [None]:
port = retrained_stage1.predict(portscan_attack)
inf = retrained_stage1.predict(infiltration_attack)
dos = retrained_stage1.predict(dos_attack)
ddos = retrained_stage1.predict(ddos_attack)
web = retrained_stage1.predict(web_attack)
brute = retrained_stage1.predict(bruteforce_attack)
ben = retrained_stage1.predict(benign_data)

In [None]:
print('Portscan')
cnt_port, acc_port = accuracy_calc(port)
print(cnt_port)
print(acc_port)
print('Infiltration')
cnt_inf, acc_inf = accuracy_calc(inf)
print(cnt_inf)
print(acc_inf)
print('DoS')
cnt_dos, acc_dos = accuracy_calc(dos)
print(cnt_dos)
print(acc_dos)
print('DDoS')
cnt_ddos, acc_ddos = accuracy_calc(ddos)
print(cnt_ddos)
print(acc_ddos)
print('Web')
cnt_web, acc_web = accuracy_calc(web)
print(cnt_web)
print(acc_web)
print('Brute')
cnt_brute, acc_brute = accuracy_calc(brute)
print(cnt_brute)
print(acc_brute)
print('Benign')
cnt_ben, acc_ben = accuracy_calc(ben)
print(cnt_ben)
print(1-acc_ben)