<a href="https://colab.research.google.com/github/am-onebit/DeepLearningCybersecurity/blob/main/am_ids.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#An example Intrusion Detection application using Dense, Conv1d and Lstm layers
#please cite below works if you find it useful:
#Akgun, Devrim, Selman Hizal, and Unal Cavusoglu. "A new DDoS attacks intrusion detection
#model based on deep learning for cybersecurity." Computers & Security 118 (2022): 102748.
#
#Hizal, Selman, Ünal ÇAVUŞOĞLU, and Devrim AKGÜN. "A New Deep Learning Based Intrusion
#Detection System for Cloud Security." 2021 3rd International Congress on Human-Computer
#Interaction, Optimization and Robotic Applications (HORA). IEEE, 2021.
#
!pip install tensorflow
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
import matplotlib.pyplot as plt
from tensorflow.keras.utils import plot_model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.callbacks import ModelCheckpoint, EarlyStopping
import os
from tensorflow.keras.utils import to_categorical
from sklearn.utils import class_weight
#from tensorflow.keras.models import models_ddos
#from models import models_ddos

In [None]:
epochs = 100
nclass = 12

def loadDataset():
    # Put dataset path here !
    data = pd.read_csv('https://raw.githubusercontent.com/kdemertzis/EKPA/main/Data/network_traffic_data.csv', delimiter=',')
    trainfile=data
    print(data.head(20))


# -- Dense
# model=models_ddos.model_dense(lr=1e-4,N=64,inshape=inshape)
# -- LSTM
# model=models_ddos.model_lstm(lr=1e-4,N=64,inshape=inshape)

model.summary()
# -----------------------------------------------------------------------------
# print model to an image file
# dot_img_file = 'model1.png'
# plot_model(model, to_file=dot_img_file, show_shapes=True)

# -- TRAIN MODEL --------------------------------------------------------------
history = model.fit(train_data,
                    train_label,
                    shuffle=True,
                    epochs=epochs,
                    batch_size=256,  # 256,#128,#32, 64
                    # validation_data=validation_generator,
                    # validation_split=0.2,
                    # validation_data=(val_data,val_label),
                    validation_data=(val_data, val_label),
                    callbacks=[modelCheckPoint],
                    class_weight=class_weights,
                    workers=3)

# -- Load best model ----------------------------------------------------------
str_models = os.listdir('./savemodels')
str_models = np.sort(str_models)
best_model = str_models[str_models.size-1]
print('best_model=', best_model)
model.load_weights('./savemodels/'+best_model)

# --Confusion matrix ----------------------------------------------------------
print('TEST DATA-Confusion matrix:')
pred = model.predict(test_data)
pred_y = pred.argmax(axis=-1)

cm = confusion_matrix(test_labelp.astype('int32'), pred_y)
print(cm)

print('Accuracy ratios for each class')
print('WebDDoS      =', cm[0, 0]/np.sum(cm[0, :]))
print('BENIGN       =', cm[1, 1]/np.sum(cm[1, :]))
print('UDP-lag      =', cm[2, 2]/np.sum(cm[2, :]))
print('DrDoS_NTP    =', cm[3, 3]/np.sum(cm[3, :]))
print('Syn          =', cm[4, 4]/np.sum(cm[4, :]))
print('DrDoS_SSDP   =', cm[5, 5]/np.sum(cm[5, :]))
print('DrDoS_UDP    =', cm[6, 6]/np.sum(cm[6, :]))
print('DrDoS_NetBIOS=', cm[7, 7]/np.sum(cm[7, :]))
print('DrDoS_MSSQL  =', cm[8, 8]/np.sum(cm[8, :]))
print('DrDoS_SNMP   =', cm[9, 9]/np.sum(cm[9, :]))
print('TFTP         =', cm[10,10]/np.sum(cm[10, :]))
print('DrDoS_DNS    =', cm[11,11]/np.sum(cm[11, :]))


# -- Confusion matrix plot
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
label=np.array(["WebDDoS","BENIGN","UDP-lag","DrDoS_NTP","Syn ",
                "DrDoS_SSDP","DrDoS_UDP","DrDoS_NetBIOS","DrDoS_MSSQL",
                "DrDoS_SNMP","TFTP","DrDoS_DNS"])

cmo = ConfusionMatrixDisplay(cm,display_labels=label)
fig, ax = plt.subplots(figsize=(12,12))
cmo.plot(ax=ax, xticks_rotation=45)


# Plot training and validation accurry and loss graphs
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

np.save('historydata.npy',[acc,val_acc,loss,val_loss])
[acc, val_acc, loss, val_loss] = np.load('historydata.npy')

plt.figure()
epochs = range(len(acc))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.legend()
plt.figure()
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r.', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# -- LOAD DATA -----------------------------------------------------------------
train_data, train_labelp, val_data, val_labelp, test_data, test_labelp = loadDataset()

# to_categorical
train_label = to_categorical(train_labelp, nclass)
val_label   = to_categorical(val_labelp,   nclass)
test_label  = to_categorical(test_labelp,  nclass)

print('train_data.shape=', train_data.shape)
print('test_data.shape=',  test_data.shape)
print('test_data.shape=',  val_data.shape)

#get the number of features
inshape=train_data.shape[1]

# Class balancing weights
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(
                                                      train_labelp),
                                                  y=train_labelp)


class_weights = {i: class_weights[i] for i in range(len(class_weights))}


# -- CALLBACKS -----------------------------------------------------------------
earlyStopping = EarlyStopping(monitor='val_loss',
                              patience=30,
                              verbose=0,
                              mode='min')

modelCheckPoint = ModelCheckpoint('./savemodels/model5class.weights.{epoch:03d}-{val_acc:.4f}.hdf5',
                                  save_best_only=True,
                                  monitor='val_acc',
                                  mode='max')

# reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss',
#                                   factor=0.1,
#                                   patience=7,
#                                   verbose=1,
#                                   epsilon=1e-4,
#                                   mode='min')

# -- Baseline models-----------------------------------------------------------

# -- Conv1d
model=models_ddos.model_conv1D(lr=1e-4,N=64,inshape=inshape)
# -- Dense
# model=models_ddos.model_dense(lr=1e-4,N=64,inshape=inshape)
# -- LSTM
# model=models_ddos.model_lstm(lr=1e-4,N=64,inshape=inshape)

model.summary()
# -----------------------------------------------------------------------------
# print model to an image file
# dot_img_file = 'model1.png'
# plot_model(model, to_file=dot_img_file, show_shapes=True)

# -- TRAIN MODEL --------------------------------------------------------------
history = model.fit(train_data,
                    train_label,
                    shuffle=True,
                    epochs=epochs,
                    batch_size=256,  # 256,#128,#32, 64
                    # validation_data=validation_generator,
                    # validation_split=0.2,
                    # validation_data=(val_data,val_label),
                    validation_data=(val_data, val_label),
                    callbacks=[modelCheckPoint],
                    class_weight=class_weights,
                    workers=3)

# -- Load best model ----------------------------------------------------------
str_models = os.listdir('./savemodels')
str_models = np.sort(str_models)
best_model = str_models[str_models.size-1]
print('best_model=', best_model)
model.load_weights('./savemodels/'+best_model)

# --Confusion matrix ----------------------------------------------------------
print('TEST DATA-Confusion matrix:')
pred = model.predict(test_data)
pred_y = pred.argmax(axis=-1)

cm = confusion_matrix(test_labelp.astype('int32'), pred_y)
print(cm)

print('Accuracy ratios for each class')
print('WebDDoS      =', cm[0, 0]/np.sum(cm[0, :]))
print('BENIGN       =', cm[1, 1]/np.sum(cm[1, :]))
print('UDP-lag      =', cm[2, 2]/np.sum(cm[2, :]))
print('DrDoS_NTP    =', cm[3, 3]/np.sum(cm[3, :]))
print('Syn          =', cm[4, 4]/np.sum(cm[4, :]))
print('DrDoS_SSDP   =', cm[5, 5]/np.sum(cm[5, :]))
print('DrDoS_UDP    =', cm[6, 6]/np.sum(cm[6, :]))
print('DrDoS_NetBIOS=', cm[7, 7]/np.sum(cm[7, :]))
print('DrDoS_MSSQL  =', cm[8, 8]/np.sum(cm[8, :]))
print('DrDoS_SNMP   =', cm[9, 9]/np.sum(cm[9, :]))
print('TFTP         =', cm[10,10]/np.sum(cm[10, :]))
print('DrDoS_DNS    =', cm[11,11]/np.sum(cm[11, :]))


# -- Confusion matrix plot
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
label=np.array(["WebDDoS","BENIGN","UDP-lag","DrDoS_NTP","Syn ",
                "DrDoS_SSDP","DrDoS_UDP","DrDoS_NetBIOS","DrDoS_MSSQL",
                "DrDoS_SNMP","TFTP","DrDoS_DNS"])

cmo = ConfusionMatrixDisplay(cm,display_labels=label)
fig, ax = plt.subplots(figsize=(12,12))
cmo.plot(ax=ax, xticks_rotation=45)


# Plot training and validation accurry and loss graphs
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

np.save('historydata.npy',[acc,val_acc,loss,val_loss])
[acc, val_acc, loss, val_loss] = np.load('historydata.npy')

plt.figure()
epochs = range(len(acc))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.legend()
plt.figure()
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r.', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

             src_ip          dest_ip  src_port  dest_port protocol  \
0      114.208.13.8   122.46.117.227     57422      33910     ICMP   
1     39.193.236.24     38.15.247.21     33946      34501      UDP   
2    102.200.245.21   212.53.148.199     41742      49649      TCP   
3     234.237.3.153    240.15.58.246     49115      53733     ICMP   
4     185.28.108.97    30.169.201.31     18404      29740      UDP   
5      214.36.74.69  186.161.254.253     27894      13625     ICMP   
6     145.127.84.41    8.208.138.223     38418      37042      UDP   
7      99.40.165.33     9.213.248.16     53081       9085      UDP   
8    16.164.152.229    67.47.179.110     15283       9633     ICMP   
9     121.75.86.116  152.122.148.100     15897      36395     ICMP   
10    105.85.212.26   101.240.64.190     14040      45838      UDP   
11    56.30.133.218     216.50.35.80     43161      18509      TCP   
12    167.97.11.236   232.126.87.193      1177      21829     ICMP   
13   248.52.198.137 

IndexError: index 67 is out of bounds for axis 1 with size 11