# RNN

In [35]:
#Make stages into functions so that it can be reused
#Import libraries
import pandas as pd
import numpy as np
import tensorflow
from tensorflow import keras
from tensorflow.keras import layers
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

    
#Import data set using Pandas and split 
def read_split_data(test_size):
    float_test_size = float(test_size) 
    dataset = pd.read_excel("KDD_DDoS.xlsx")
    ipt_data=dataset.iloc[:, :41] #specifying index_location (iloc)
    opt_data=dataset.iloc[:, 41:42]
    X_train, X_test, Y_train, Y_test = train_test_split(ipt_data,opt_data,test_size=float_test_size, random_state=1985)
    return X_train, X_test, Y_train, Y_test

#Define a function that accepts number of layers, nodes then auto generates the model
def rnn_model(num_layers, num_nodes):
    RNN_model = keras.Sequential()

    RNN_model.add(keras.layers.SimpleRNN(num_nodes[0], activation='relu', return_sequences=True, input_shape=(41, 1)))

    for i in range(1, num_layers - 1):
        RNN_model.add(keras.layers.SimpleRNN(num_nodes[i], activation='relu', return_sequences=True))

    RNN_model.add(keras.layers.SimpleRNN(num_nodes[-1], activation='relu'))

    RNN_model.add(keras.layers.Dense(1, activation='sigmoid'))
    return RNN_model

def train_and_evaluate_RNN(RNN_model, X_train, Y_train, X_test, Y_test):
    # Compile the model
    RNN_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Get Model Summary
    RNN_model.summary()

    # Convert Pandas data frame to a numpy array
    X_train_RNN = np.array(X_train)
    print(X_train_RNN.shape)

    # Reshape for multiple timesteps
    X_train_RNN = X_train_RNN.reshape(X_train_RNN.shape[0], X_train_RNN.shape[1], 1)
    print(X_train_RNN.shape)

    # Convert Pandas data frame to a numpy array
    X_test_RNN = np.array(X_test)
    print(X_test_RNN.shape)

    # Reshape for multiple timesteps
    X_test_RNN = X_test_RNN.reshape(X_test_RNN.shape[0], X_test_RNN.shape[1], 1)
    print(X_test_RNN.shape)

    # Fitting the RNN model
    RNN_model.fit(X_train_RNN, Y_train, epochs=6, batch_size=10)

    # Evaluate the Model
    print("RNN Model Evaluation:")
    RNN_model.evaluate(X_test_RNN, Y_test, batch_size=10)
    return X_test_RNN

def predict_output(RNN_model, X_test_RNN):
    y_pred_RNN = RNN_model.predict(X_test_RNN)
    y_pred_RNN = (y_pred_RNN > 0.5).astype(int)
    return y_pred_RNN

def calculate_performance_metrics(Y_test, y_pred_RNN):
    cm = confusion_matrix(Y_test, y_pred_RNN)
    accuracy = accuracy_score(Y_test, y_pred_RNN)
    class_report = classification_report(Y_test, y_pred_RNN, zero_division=1)
    
    tn, fp, fn, tp = cm.ravel()
    
    tpr = tp / (tp + fn)
    
    if (tp + fp) != 0:
        precision = tp / (tp + fp)
    else:
        precision = 0.0
    
    if (tn + fp) != 0:
        fpr = fp / (tn + fp)
    else:
        fpr = 0.0
    
    return accuracy, class_report, cm, precision, tpr, fpr


# def calculate_performance_metrics():
#     # Calculate accuracy score
#     RNN_accuracy_score = accuracy_score(Y_test, y_pred_RNN)
    
#     # Calculate classification report
#     RNN_classification_report = classification_report(Y_test, y_pred_RNN, zero_division=1)
    
#     # Calculate confusion matrix
#     cm_RNN = confusion_matrix(Y_test, y_pred_RNN)
    
#     # Calculate precision, true positive rate (TPR), and false positive rate (FPR)
#     rnn_precision = cm_RNN[1, 1] / (cm_RNN[1, 1] + cm_RNN[0, 1])
#     rnn_tpr = cm_RNN[1, 1] / (cm_RNN[1, 1] + cm_RNN[1, 0])
#     rnn_fpr = cm_RNN[0, 1] / (cm_RNN[0, 0] + cm_RNN[0, 1])
    
#     return RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr


# MS1, DD1 90/10 & 4layers, 64 neurones 

In [36]:
X_train, X_test, Y_train, Y_test = read_split_data(0.1)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_model = rnn_model(4, [64, 32, 16, 8])

# Train and evaluate model
X_test_RNN = train_and_evaluate_RNN(RNN_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN = predict_output(RNN_model, X_test_RNN)
y_pred_RNN

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_44 (SimpleRNN)   (None, 41, 64)            4224      
                                                                 
 simple_rnn_45 (SimpleRNN)   (None, 41, 32)            3104      
                                                                 
 simple_rnn_46 (SimpleRNN)   (None, 41, 16)            784       
                                                                 
 simple_rnn_47 (SimpleRNN)   (None, 8)                 200       
                                                                 
 dense_9 (Dense)             (None, 1)                 9         
                                                                 
Total params: 8,321
Trainable params: 8,321
Non-trainable params: 0
_________________________________________________________________
(26257, 41, 1)
(26257, 41, 1)
(2918, 41, 1)
(2918, 

# MS1, DD2 70/30 & 4layers, 64 neurones 

In [37]:
X_train, X_test, Y_train, Y_test = read_split_data(0.3)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_model = rnn_model(4, [64, 32, 16, 8])

# Train and evaluate model
X_test_RNN = train_and_evaluate_RNN(RNN_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN = predict_output(RNN_model, X_test_RNN)
y_pred_RNN

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_48 (SimpleRNN)   (None, 41, 64)            4224      
                                                                 
 simple_rnn_49 (SimpleRNN)   (None, 41, 32)            3104      
                                                                 
 simple_rnn_50 (SimpleRNN)   (None, 41, 16)            784       
                                                                 
 simple_rnn_51 (SimpleRNN)   (None, 8)                 200       
                                                                 
 dense_10 (Dense)            (None, 1)                 9         
                                                                 
Total params: 8,321
Trainable params: 8,321
Non-trainable params: 0
_________________________________________________________________
(20422, 41, 1)
(20422, 41, 1)
(8753, 41, 1)
(8753, 

# MS2, DD1 5 layers, 128 neurons , 90/10 partition 

In [38]:
X_train, X_test, Y_train, Y_test = read_split_data(0.1)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_model = rnn_model(5, [128, 64, 32, 16, 8])

# Train and evaluate model
X_test_RNN = train_and_evaluate_RNN(RNN_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN = predict_output(RNN_model, X_test_RNN)
y_pred_RNN

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_52 (SimpleRNN)   (None, 41, 128)           16640     
                                                                 
 simple_rnn_53 (SimpleRNN)   (None, 41, 64)            12352     
                                                                 
 simple_rnn_54 (SimpleRNN)   (None, 41, 32)            3104      
                                                                 
 simple_rnn_55 (SimpleRNN)   (None, 41, 16)            784       
                                                                 
 simple_rnn_56 (SimpleRNN)   (None, 8)                 200       
                                                                 
 dense_11 (Dense)            (None, 1)                 9         
                                                                 
Total params: 33,089
Trainable params: 33,089
Non-tra