In [None]:
!pip3 install pandas
!pip3 install seaborn
!pip3 install --upgrade tensorflow-gpu
!pip3 install import-ipynb
!pip3 install cuda-python

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import glob
import math
import pickle
import time
import sys

import import_ipynb
import AttackClassificationModel

import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ReduceLROnPlateau

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from bayes_opt import BayesianOptimization

%matplotlib inline

In [None]:
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction = 0.333)
sess = tf.compat.v1.Session(config = tf.compat.v1.ConfigProto(gpu_options = gpu_options))

# Data Pre-Processing

In [None]:
df_mirai = AttackClassificationModel.get_files_nbaiot_mirai('../../nbaiot/SimpleHome_XCS7_1003_WHT_Security_Camera/mirai')
df_gafgyt = AttackClassificationModel.get_files_nbaiot_gafgyt('../../nbaiot/SimpleHome_XCS7_1003_WHT_Security_Camera/gafgyt')

# Mirai Ack
df_mirai_ack, label_mirai_ack = AttackClassificationModel.get_attack(0, df_mirai)

# Mirai Scan
df_mirai_scan, label_mirai_scan = AttackClassificationModel.get_attack(1, df_mirai)

# Mirai Syn
df_mirai_syn, label_mirai_syn = AttackClassificationModel.get_attack(2, df_mirai)

# Mirai Udp
df_mirai_udp, label_mirai_udp = AttackClassificationModel.get_attack(3, df_mirai)

# Mirai Udpplain
df_mirai_udpplain, label_mirai_udpplain = AttackClassificationModel.get_attack(4, df_mirai)

# Gafgyt Combo
df_gafgyt_combo, label_gafgyt_combo = AttackClassificationModel.get_attack(5, df_gafgyt)

# Gafgyt Junk
df_gafgyt_junk, label_gafgyt_junk = AttackClassificationModel.get_attack(6, df_gafgyt)

# Gafgyt Scan
df_gafgyt_scan, label_gafgyt_scan = AttackClassificationModel.get_attack(7, df_gafgyt)

# Gafgyt Tcp
df_gafgyt_tcp, label_gafgyt_tcp = AttackClassificationModel.get_attack(8, df_gafgyt)

# Gafgyt Udp
df_gafgyt_udp, label_gafgyt_udp = AttackClassificationModel.get_attack(9, df_gafgyt)

In [None]:
# Normalize with the min-max scaler
scaler = MinMaxScaler()
df_mirai_ack_norm = scaler.fit_transform(df_mirai_ack)
df_mirai_scan_norm = scaler.fit_transform(df_mirai_scan)
df_mirai_syn_norm = scaler.fit_transform(df_mirai_syn)
df_mirai_udp_norm = scaler.fit_transform(df_mirai_udp)
df_mirai_udpplain_norm = scaler.fit_transform(df_mirai_udpplain)
df_gafgyt_combo_norm = scaler.fit_transform(df_gafgyt_combo)
df_gafgyt_junk_norm = scaler.fit_transform(df_gafgyt_junk)
df_gafgyt_scan_norm = scaler.fit_transform(df_gafgyt_scan)
df_gafgyt_udp_norm = scaler.fit_transform(df_gafgyt_udp)
df_gafgyt_tcp_norm = scaler.fit_transform(df_gafgyt_tcp)

# Sets

In [None]:
# Train
len_mirai_ack_train = int(0.7 * len(df_mirai_ack_norm))
X_mirai_ack_train = df_mirai_ack_norm[:len_mirai_ack_train]

len_mirai_scan_train = int(0.7 * len(df_mirai_scan_norm))
X_mirai_scan_train = df_mirai_scan_norm[:len_mirai_scan_train]

len_mirai_syn_train = int(0.7 * len(df_mirai_syn_norm))
X_mirai_syn_train = df_mirai_syn_norm[:len_mirai_syn_train]

len_mirai_udp_train = int(0.7 * len(df_mirai_udp_norm))
X_mirai_udp_train = df_mirai_udp_norm[:len_mirai_udp_train]

len_mirai_udpplain_train = int(0.7 * len(df_mirai_udpplain_norm))
X_mirai_udpplain_train = df_mirai_udpplain_norm[:len_mirai_udpplain_train]

len_gafgyt_combo_train = int(0.7 * len(df_gafgyt_combo_norm))
X_gafgyt_combo_train = df_gafgyt_combo_norm[:len_gafgyt_combo_train]

len_gafgyt_junk_train = int(0.7 * len(df_gafgyt_junk_norm))
X_gafgyt_junk_train = df_gafgyt_junk_norm[:len_gafgyt_junk_train]

len_gafgyt_scan_train = int(0.7 * len(df_gafgyt_scan_norm))
X_gafgyt_scan_train = df_gafgyt_scan_norm[:len_gafgyt_scan_train]

len_gafgyt_udp_train = int(0.7 * len(df_gafgyt_udp_norm))
X_gafgyt_udp_train = df_gafgyt_udp_norm[:len_gafgyt_udp_train]

len_gafgyt_tcp_train = int(0.7 * len(df_gafgyt_tcp_norm))
X_gafgyt_tcp_train = df_gafgyt_tcp_norm[:len_gafgyt_tcp_train]

X_train = np.concatenate([X_mirai_ack_train, X_mirai_scan_train, X_mirai_syn_train, X_mirai_udp_train, X_mirai_udpplain_train,
                          X_gafgyt_combo_train, X_gafgyt_junk_train, X_gafgyt_scan_train, X_gafgyt_udp_train, X_gafgyt_tcp_train])
Y_train = np.concatenate([label_mirai_ack[:len_mirai_ack_train], label_mirai_scan[:len_mirai_scan_train], label_mirai_syn[:len_mirai_syn_train], 
                          label_mirai_udp[:len_mirai_udp_train], label_mirai_udpplain[:len_mirai_udpplain_train],  
                          label_gafgyt_combo[:len_gafgyt_combo_train], label_gafgyt_junk[:len_gafgyt_junk_train], 
                          label_gafgyt_scan[:len_gafgyt_scan_train], label_gafgyt_tcp[:len_gafgyt_tcp_train], label_gafgyt_udp[:len_gafgyt_udp_train]])

In [None]:
# Test
len_mirai_ack_test = len_mirai_ack_train + int(0.15 * len(df_mirai_ack_norm))
X_mirai_ack_test = df_mirai_ack_norm[len_mirai_ack_train : len_mirai_ack_test]

len_mirai_scan_test = len_mirai_scan_train + int(0.15 * len(df_mirai_scan_norm))
X_mirai_scan_test = df_mirai_scan_norm[len_mirai_scan_train : len_mirai_scan_test]

len_mirai_syn_test = len_mirai_syn_train + int(0.15 * len(df_mirai_syn_norm))
X_mirai_syn_test = df_mirai_syn_norm[len_mirai_syn_train : len_mirai_syn_test]

len_mirai_udp_test = len_mirai_udp_train + int(0.15 * len(df_mirai_udp_norm))
X_mirai_udp_test = df_mirai_udp_norm[len_mirai_udp_train : len_mirai_udp_test]

len_mirai_udpplain_test = len_mirai_udpplain_train + int(0.15 * len(df_mirai_udpplain_norm))
X_mirai_udpplain_test = df_mirai_udpplain_norm[len_mirai_udpplain_train : len_mirai_udpplain_test]

len_gafgyt_combo_test = len_gafgyt_combo_train + int(0.15 * len(df_gafgyt_combo_norm))
X_gafgyt_combo_test = df_gafgyt_combo_norm[len_gafgyt_combo_train : len_gafgyt_combo_test]

len_gafgyt_junk_test = len_gafgyt_junk_train + int(0.15 * len(df_gafgyt_junk_norm))
X_gafgyt_junk_test = df_gafgyt_junk_norm[len_gafgyt_junk_train : len_gafgyt_junk_test]

len_gafgyt_scan_test = len_gafgyt_scan_train + int(0.15 * len(df_gafgyt_scan_norm))
X_gafgyt_scan_test = df_gafgyt_scan_norm[len_gafgyt_scan_train : len_gafgyt_scan_test]

len_gafgyt_udp_test = len_gafgyt_udp_train + int(0.15 * len(df_gafgyt_udp_norm))
X_gafgyt_udp_test = df_gafgyt_udp_norm[len_gafgyt_udp_train : len_gafgyt_udp_test]

len_gafgyt_tcp_test = len_gafgyt_tcp_train + int(0.15 * len(df_gafgyt_tcp_norm))
X_gafgyt_tcp_test = df_gafgyt_tcp_norm[len_gafgyt_tcp_train : len_gafgyt_tcp_test]

X_test = np.concatenate([X_mirai_ack_test, X_mirai_scan_test, X_mirai_syn_test, X_mirai_udp_test, X_mirai_udpplain_test,
                          X_gafgyt_combo_test, X_gafgyt_junk_test, X_gafgyt_scan_test, X_gafgyt_udp_test, X_gafgyt_tcp_test])
Y_test = np.concatenate([label_mirai_ack[len_mirai_ack_train : len_mirai_ack_test], label_mirai_scan[len_mirai_scan_train : len_mirai_scan_test], label_mirai_syn[len_mirai_syn_train : len_mirai_syn_test], 
                         label_mirai_udp[len_mirai_udp_train : len_mirai_udp_test], label_mirai_udpplain[len_mirai_udpplain_train : len_mirai_udpplain_test],  
                         label_gafgyt_combo[len_gafgyt_combo_train : len_gafgyt_combo_test], label_gafgyt_junk[len_gafgyt_junk_train : len_gafgyt_junk_test], 
                         label_gafgyt_scan[len_gafgyt_scan_train : len_gafgyt_scan_test], label_gafgyt_tcp[len_gafgyt_tcp_train : len_gafgyt_tcp_test], label_gafgyt_udp[len_gafgyt_udp_train : len_gafgyt_udp_test]])


In [None]:
# Validation
X_mirai_ack_val = df_mirai_ack_norm[len_mirai_ack_test:]
X_mirai_scan_val = df_mirai_scan_norm[len_mirai_scan_test:]
X_mirai_syn_val = df_mirai_syn_norm[len_mirai_syn_test:]
X_mirai_udp_val = df_mirai_udp_norm[len_mirai_udp_test:]
X_mirai_udpplain_val = df_mirai_udpplain_norm[len_mirai_udpplain_test:]
X_gafgyt_combo_val = df_gafgyt_combo_norm[len_gafgyt_combo_test:]
X_gafgyt_junk_val = df_gafgyt_junk_norm[len_gafgyt_junk_test:]
X_gafgyt_scan_val = df_gafgyt_scan_norm[len_gafgyt_scan_test:]
X_gafgyt_tcp_val = df_gafgyt_tcp_norm[len_gafgyt_tcp_test:]
X_gafgyt_udp_val = df_gafgyt_udp_norm[len_gafgyt_udp_test:]

X_val = np.concatenate([X_mirai_ack_val, X_mirai_scan_val, X_mirai_syn_val, X_mirai_udp_val, X_mirai_udpplain_val,
                        X_gafgyt_combo_val, X_gafgyt_junk_val, X_gafgyt_scan_val, X_gafgyt_udp_val, X_gafgyt_tcp_val])
Y_val = np.concatenate([label_mirai_ack[len_mirai_ack_test:], label_mirai_scan[len_mirai_scan_test:], label_mirai_syn[len_mirai_syn_test:], 
                        label_mirai_udp[len_mirai_udp_test:], label_mirai_udpplain[len_mirai_udpplain_test:],  
                        label_gafgyt_combo[len_gafgyt_combo_test:], label_gafgyt_junk[len_gafgyt_junk_test:], 
                        label_gafgyt_scan[len_gafgyt_scan_test:], label_gafgyt_tcp[len_gafgyt_tcp_test:], label_gafgyt_udp[len_gafgyt_udp_test:]])

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], 1))

samples, feature, depth = X_train.shape

# Model

In [None]:
# Initial values
batch_size = 50
learning_rate = 0.008
epochs = 10

# Dictionary
dict_params = { 'learning_rate': learning_rate, 'batch_size': round(batch_size), 'epochs': round(epochs) }
pbounds = { 'learning_rate': (0.000001, 0.001), 'batch_size': (10, 23), 'epochs': (1, 100) }

In [None]:
cnn_model = AttackClassificationModel.CNN_nbaiot(feature, depth)
reduce_lr = ReduceLROnPlateau(moniter = 'val_loss', factor = 0.1, patience = 10)

# Hyperparameter Optimization

In [None]:
def maximize_training(X_train = X_train,
                      Y_train = Y_train, 
                      X_val = X_val, 
                      Y_val = Y_val, 
                      X_test = X_test, 
                      Y_test = Y_test, 
                      learning_rate = learning_rate, 
                      epochs = epochs, 
                      batch_size = batch_size,
                      reduce_lr = reduce_lr):
    
    nadam = optimizers.Nadam(learning_rate = dict_params['learning_rate'], beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-08, schedule_decay = 0.004)
    model = AttackClassificationModel.CNN_nbaiot(feature, depth)
    model.compile(loss = "sparse_categorical_crossentropy", optimizer = "nadam", metrics = ["accuracy"])
    
    history = model.fit(X_train, Y_train, 
                        epochs = dict_params['epochs'], 
                        batch_size = dict_params['batch_size'], 
                        validation_data = (X_val, Y_val),
                        callbacks = [reduce_lr],
                        verbose = 0)

    scores = model.evaluate(X_test, Y_test)
    return scores[1]

In [None]:
# Apply Bayesian optimization to choose the best hyperparameters

opt = BayesianOptimization(f = maximize_training,
                           pbounds = pbounds,
                           verbose = 2, 
                           random_state = 1)

opt_start = time.time()

opt.maximize(init_points = 5, n_iter = 5)

opt_end = time.time()
opt_time = opt_end - opt_start
print("Optimization time:", opt_time)

learning_rate = opt.max['params']['learning_rate']
epochs = round(opt.max['params']['epochs'])
batch_size = round(opt.max['params']['batch_size'])

# Classify Attacks

In [None]:
# Training step with the best hyperparameters
#learning_rate = 0.0006855342808963628
#epochs = 1
#batch_size = 21
#opt_time = 3915.1859345436096

In [None]:
nadam = optimizers.Nadam(learning_rate = learning_rate, beta_1 = 0.9, beta_2 = 0.999, epsilon = 1e-08, schedule_decay = 0.004)
cnn_model.compile(loss = "sparse_categorical_crossentropy", optimizer = "nadam", metrics = ["accuracy"])

for i in range(1):
    print("Iteration " + str(i))
    
    # Train
    cnn_model, train_time = AttackClassificationModel.train(X_train, Y_train, X_val, Y_val, epochs, batch_size, reduce_lr, cnn_model)
    
    # Test
    Y_pred, test_time = AttackClassificationModel.test(X_test, cnn_model)
    print(Y_pred)
    print(Y_test)
    
    # Multi classification metrics
    labels = ['Mirai_Ack','Mirai_Scan','Mirai_Syn','Mirai_Udp','Mirai_Udpplain', 'Bashlite_Combo','Bashlite_Junk','Bashlite_Scan','Bashlite_Udp', 'Bashlite_Tcp']
    acc, f1, pre, rec = AttackClassificationModel.get_scores(Y_test, Y_pred, labels)
    
    # Print results
    AttackClassificationModel.print_results(learning_rate,
                                            epochs,
                                            batch_size,
                                            X_train,
                                            X_val,
                                            X_test,
                                            opt_time,
                                            train_time,
                                            test_time,
                                            acc,
                                            f1,
                                            pre,
                                            rec,
                                            Y_test,
                                            Y_pred,
                                            "CNN",
                                            "Results/S1003_camera.txt")
    
    print("")

In [None]:
conf_matrix = confusion_matrix(Y_test, Y_pred)

fig, ax = plt.subplots(figsize = (4, 4))
ax.matshow(conf_matrix, cmap = plt.cm.Blues, alpha = 0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x = j, y = i,s = conf_matrix[i, j], va = 'center', ha = 'center', size = 'large')
 
plt.xlabel('Predictions', fontsize=12)
plt.ylabel('Actuals', fontsize=12)
plt.title('Confusion Matrix', fontsize=14)
plt.savefig("Results/Confusion Matrix/S3 CNN")
plt.show()

# NB Model

In [None]:
# Model
nb_model = GaussianNB()

for i in range(1):
    print("Iteration " + str(i))
    
    # Train
    nb_model, train_time = AttackClassificationModel.train_nb_knn(X_train, Y_train, X_val, nb_model)
    
    # Test
    Y_pred, test_time = AttackClassificationModel.test(X_test, nb_model)
    
    # Multi classification metrics
    labels = ['Mirai_Ack','Mirai_Scan','Mirai_Syn','Mirai_Udp','Mirai_Udpplain', 'Bashlite_Combo','Bashlite_Junk','Bashlite_Scan','Bashlite_Udp', 'Bashlite_Tcp']
    acc, f1, pre, rec = AttackClassificationModel.get_scores(Y_test, Y_pred, labels)
    
    # Print results
    AttackClassificationModel.print_results(learning_rate,
                                            epochs,
                                            batch_size,
                                            X_train,
                                            X_val,
                                            X_test,
                                            opt_time,
                                            train_time,
                                            test_time,
                                            acc,
                                            f1,
                                            pre,
                                            rec,
                                            Y_test,
                                            Y_pred,
                                            "NB",
                                            "Results/S1003_camera.txt)
    
    print("")

In [None]:
conf_matrix = confusion_matrix(Y_test, Y_pred)

fig, ax = plt.subplots(figsize = (4, 4))
ax.matshow(conf_matrix, cmap = plt.cm.Blues, alpha = 0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x = j, y = i,s = conf_matrix[i, j], va = 'center', ha = 'center', size = 'large')
 
plt.xlabel('Predictions', fontsize=12)
plt.ylabel('Actuals', fontsize=12)
plt.title('Confusion Matrix', fontsize=14)
plt.savefig("Results/Confusion Matrix/S3 NB")
plt.show()

# KNN Model

In [None]:
# Model
knn_model = KNeighborsClassifier(n_neighbors = 50)

for i in range(1):
    print("Iteration " + str(i))
    
    # Train
    knn_model, train_time = AttackClassificationModel.train_nb_knn(X_train, Y_train, X_val, knn_model)
    
    # Test
    Y_pred, test_time = AttackClassificationModel.test(X_test, knn_model)
    
    # Multi classification metrics
    labels = ['Mirai_Ack','Mirai_Scan','Mirai_Syn','Mirai_Udp','Mirai_Udpplain', 'Bashlite_Combo','Bashlite_Junk','Bashlite_Scan','Bashlite_Udp', 'Bashlite_Tcp']
    acc, f1, pre, rec = AttackClassificationModel.get_scores(Y_test, Y_pred, labels)
    
    # Print results
    AttackClassificationModel.print_results(learning_rate,
                                            epochs,
                                            batch_size,
                                            X_train,
                                            X_val,
                                            X_test,
                                            opt_time,
                                            train_time,
                                            test_time,
                                            acc,
                                            f1,
                                            pre,
                                            rec,
                                            Y_test,
                                            Y_pred,
                                            "KNN",
                                            "Results/S1003_camera.txt)
    
    print("")

In [None]:
conf_matrix = confusion_matrix(Y_test, Y_pred)

fig, ax = plt.subplots(figsize = (4, 4))
ax.matshow(conf_matrix, cmap = plt.cm.Blues, alpha = 0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x = j, y = i,s = conf_matrix[i, j], va = 'center', ha = 'center', size = 'large')
 
plt.xlabel('Predictions', fontsize=12)
plt.ylabel('Actuals', fontsize=12)
plt.title('Confusion Matrix', fontsize=14)
plt.savefig("Results/Confusion Matrix/S3 KNN")
plt.show()