# RNN LSTM

In [None]:
#Make stages into functions so that it can be reused
#Import libraries
import pandas as pd
import numpy as np
import tensorflow
from tensorflow import keras
from tensorflow.keras import layers
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

    
#Import data set using Pandas and split 
def read_split_data(test_size):
    float_test_size = float(test_size) 
    dataset = pd.read_excel("KDD_DDoS.xlsx")
    ipt_data=dataset.iloc[:, :41] #specifying index_location (iloc)
    opt_data=dataset.iloc[:, 41:42]
    X_train, X_test, Y_train, Y_test = train_test_split(ipt_data,opt_data,test_size=float_test_size, random_state=1985)
    return X_train, X_test, Y_train, Y_test

#Define a function that accepts number of layers, nodes then auto generates the model
def rnn_lstm_model(num_layers, num_nodes):
    RNN_LSTM_model = keras.Sequential()

    RNN_LSTM_model.add(layers.LSTM(num_nodes[0], activation='relu', return_sequences=True, input_shape=(41, 1)))

    for i in range(1, num_layers - 1):
        RNN_LSTM_model.add(layers.LSTM(num_nodes[i], activation='relu', return_sequences=True))

    RNN_LSTM_model.add(layers.LSTM(num_nodes[-1], activation='relu'))

    RNN_LSTM_model.add(layers.Dense(1, activation='sigmoid'))
    return RNN_LSTM_model

def train_and_evaluate_RNN(RNN_model, X_train, Y_train, X_test, Y_test):
    # Compile the model
    RNN_LSTM_model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

    # Get Model Summary
    RNN_LSTM_model.summary()

    # Convert Pandas data frame to a numpy array
    X_train_RNN_LSTM = np.array(X_train)
    print(X_train_RNN_LSTM.shape)

    # Reshape for multiple timesteps
    X_train_RNN_LSTM = X_train_RNN_LSTM.reshape(X_train_RNN_LSTM.shape[0], X_train_RNN_LSTM.shape[1], 1)
    print(X_train_RNN_LSTM.shape)

    # Convert Pandas data frame to a numpy array
    X_test_RNN_LSTM = np.array(X_test)
    print(X_test_RNN_LSTM.shape)

    # Reshape for multiple timesteps
    X_test_RNN_LSTM = X_test_RNN_LSTM.reshape(X_test_RNN_LSTM.shape[0], X_test_RNN_LSTM.shape[1], 1)
    print(X_test_RNN_LSTM.shape)

    # Fitting the RNN model
    RNN_LSTM_model.fit(X_train_RNN_LSTM, Y_train, epochs=50, batch_size=16)

    # Evaluate the Model
    print("RNN LSTM Model Evaluation:")
    RNN_LSTM_model.evaluate(X_test_RNN_LSTM, Y_test, batch_size=16)
    return X_test_RNN_LSTM

def predict_output(RNN_LSTM_model, X_test_RNN_LSTM):
    y_pred_RNN_LSTM = RNN_LSTM_model.predict(X_test_RNN_LSTM)
    y_pred_RNN_LSTM = (y_pred_RNN_LSTM > 0.5).astype(int)
    return y_pred_RNN_LSTM

def calculate_performance_metrics(Y_test, y_pred_RNN_LSTM):
    cm = confusion_matrix(Y_test, y_pred_RNN_LSTM)
    accuracy = accuracy_score(Y_test, y_pred_RNN_LSTM)
    class_report = classification_report(Y_test, y_pred_RNN_LSTM, zero_division=1)
    
    tn, fp, fn, tp = cm.ravel()
    
    tpr = tp / (tp + fn)
    
    if (tp + fp) != 0:
        precision = tp / (tp + fp)
    else:
        precision = 0.0
    
    if (tn + fp) != 0:
        fpr = fp / (tn + fp)
    else:
        fpr = 0.0
    
    return accuracy, class_report, cm, precision, tpr, fpr


# def calculate_performance_metrics():
#     # Calculate accuracy score
#     RNN_accuracy_score = accuracy_score(Y_test, y_pred_RNN)
    
#     # Calculate classification report
#     RNN_classification_report = classification_report(Y_test, y_pred_RNN, zero_division=1)
    
#     # Calculate confusion matrix
#     cm_RNN = confusion_matrix(Y_test, y_pred_RNN)
    
#     # Calculate precision, true positive rate (TPR), and false positive rate (FPR)
#     rnn_precision = cm_RNN[1, 1] / (cm_RNN[1, 1] + cm_RNN[0, 1])
#     rnn_tpr = cm_RNN[1, 1] / (cm_RNN[1, 1] + cm_RNN[1, 0])
#     rnn_fpr = cm_RNN[0, 1] / (cm_RNN[0, 0] + cm_RNN[0, 1])
    
#     return RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr


# MS1, DD1 90/10 & 4layers, 64 neurones 

In [6]:
X_train, X_test, Y_train, Y_test = read_split_data(0.1)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_LSTM_model = rnn_lstm_model(4, [64, 32, 16, 8])

# Train and evaluate model
X_test_RNN_LSTM = train_and_evaluate_RNN(RNN_LSTM_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN_LSTM = predict_output(RNN_LSTM_model, X_test_RNN_LSTM)
y_pred_RNN_LSTM

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN_LSTM)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 41, 64)            16896     
                                                                 
 lstm_5 (LSTM)               (None, 41, 32)            12416     
                                                                 
 lstm_6 (LSTM)               (None, 41, 16)            3136      
                                                                 
 lstm_7 (LSTM)               (None, 8)                 800       
                                                                 
 dense_1 (Dense)             (None, 1)                 9         
                                                                 
Total params: 33,257
Trainable params: 33,257
Non-trainable params: 0
_________________________________________________________________
(26257, 41, 1)
(26257, 41, 1)
(2918, 41, 1)
(2918,

# MS1, DD2 70/30 & 4layers, 64 neurones 

In [7]:
X_train, X_test, Y_train, Y_test = read_split_data(0.3)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_LSTM_model = rnn_lstm_model(4, [64, 32, 16, 8])

# Train and evaluate model
X_test_RNN_LSTM = train_and_evaluate_RNN(RNN_LSTM_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN_LSTM = predict_output(RNN_LSTM_model, X_test_RNN_LSTM)
y_pred_RNN_LSTM

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN_LSTM)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 41, 64)            16896     
                                                                 
 lstm_9 (LSTM)               (None, 41, 32)            12416     
                                                                 
 lstm_10 (LSTM)              (None, 41, 16)            3136      
                                                                 
 lstm_11 (LSTM)              (None, 8)                 800       
                                                                 
 dense_2 (Dense)             (None, 1)                 9         
                                                                 
Total params: 33,257
Trainable params: 33,257
Non-trainable params: 0
_________________________________________________________________
(20422, 41, 1)
(20422, 41, 1)
(8753, 41, 1)
(8753,

# MS2, DD1 5 layers, 128 neurons , 90/10 partition 

In [8]:
X_train, X_test, Y_train, Y_test = read_split_data(0.1)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_LSTM_model = rnn_lstm_model(5, [128, 64, 32, 16, 8])

# Train and evaluate model
X_test_RNN_LSTM = train_and_evaluate_RNN(RNN_LSTM_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN_LSTM = predict_output(RNN_LSTM_model, X_test_RNN_LSTM)
y_pred_RNN_LSTM

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN_LSTM)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 41, 128)           66560     
                                                                 
 lstm_13 (LSTM)              (None, 41, 64)            49408     
                                                                 
 lstm_14 (LSTM)              (None, 41, 32)            12416     
                                                                 
 lstm_15 (LSTM)              (None, 41, 16)            3136      
                                                                 
 lstm_16 (LSTM)              (None, 8)                 800       
                                                                 
 dense_3 (Dense)             (None, 1)                 9         
                                                                 
Total params: 132,329
Trainable params: 132,329
Non-tr