# RNN GRU

In [10]:
#Make stages into functions so that it can be reused
#Import libraries
import pandas as pd
import numpy as np
import tensorflow
from tensorflow import keras
from keras.models import Sequential
from keras.layers import GRU, Dense
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

    
#Import data set using Pandas and split 
def read_split_data(test_size):
    float_test_size = float(test_size) 
    dataset = pd.read_excel("KDD_DDoS.xlsx")
    ipt_data=dataset.iloc[:, :41] #specifying index_location (iloc)
    opt_data=dataset.iloc[:, 41:42]
    X_train, X_test, Y_train, Y_test = train_test_split(ipt_data,opt_data,test_size=float_test_size, random_state=1985)
    return X_train, X_test, Y_train, Y_test

#Define a function that accepts number of layers, nodes then auto generates the model
def rnn_gru_model(num_layers, num_nodes):
    RNN_GRU_model = Sequential()

    RNN_GRU_model.add(GRU(units=num_nodes[0], activation='relu', return_sequences=True, input_shape=(41, 1)))

    for i in range(1, num_layers - 1):
        RNN_GRU_model.add(GRU(units=num_nodes[i], activation='relu', return_sequences=True))

    RNN_GRU_model.add(GRU(units=num_nodes[-1], activation='relu'))

    RNN_GRU_model.add(Dense(units=1, activation='sigmoid'))
    return RNN_GRU_model

def train_and_evaluate_RNN(RNN_GRU_model, X_train, Y_train, X_test, Y_test):
    # Compile the model
    RNN_GRU_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Get Model Summary
    RNN_GRU_model.summary()

    # Convert Pandas data frame to a numpy array
    X_train_RNN_GRU = np.array(X_train)
    print(X_train_RNN_GRU.shape)

    # Reshape for multiple timesteps
    X_train_RNN_GRU = X_train_RNN_GRU.reshape(X_train_RNN_GRU.shape[0], X_train_RNN_GRU.shape[1], 1)
    print(X_train_RNN_GRU.shape)

    # Convert Pandas data frame to a numpy array
    X_test_RNN_GRU = np.array(X_test)
    print(X_test_RNN_GRU.shape)

    # Reshape for multiple timesteps
    X_test_RNN_GRU = X_test_RNN_GRU.reshape(X_test_RNN_GRU.shape[0], X_test_RNN_GRU.shape[1], 1)
    print(X_test_RNN_GRU.shape)

    # Fitting the RNN model
    RNN_GRU_model.fit(X_train_RNN_GRU, Y_train, epochs=10, batch_size=10)

    # Evaluate the Model
    print("RNN LSTM Model Evaluation:")
    RNN_GRU_model.evaluate(X_test_RNN_GRU, Y_test, batch_size=16)
    return X_test_RNN_GRU

def predict_output(RNN_GRU_model, X_test_RNN_GRU):
    y_pred_RNN_GRU = RNN_GRU_model.predict(X_test_RNN_GRU)
    y_pred_RNN_GRU = (y_pred_RNN_GRU > 0.5).astype(int)
    return y_pred_RNN_GRU

def calculate_performance_metrics(Y_test, y_pred_RNN_GRU):
    cm = confusion_matrix(Y_test, y_pred_RNN_GRU)
    accuracy = accuracy_score(Y_test, y_pred_RNN_GRU)
    class_report = classification_report(Y_test, y_pred_RNN_GRU, zero_division=1)
    
    tn, fp, fn, tp = cm.ravel()
    
    tpr = tp / (tp + fn)
    
    if (tp + fp) != 0:
        precision = tp / (tp + fp)
    else:
        precision = 0.0
    
    if (tn + fp) != 0:
        fpr = fp / (tn + fp)
    else:
        fpr = 0.0
    
    return accuracy, class_report, cm, precision, tpr, fpr


# MS1, DD1 90/10 & 4layers, 64 neurones 

In [11]:
X_train, X_test, Y_train, Y_test = read_split_data(0.1)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_GRU_model = rnn_gru_model(4, [64, 32, 16, 8])

# Train and evaluate model
X_test_RNN_GRU = train_and_evaluate_RNN(RNN_GRU_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN_GRU = predict_output(RNN_GRU_model, X_test_RNN_GRU)
y_pred_RNN_GRU

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN_GRU)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_21 (GRU)                (None, 41, 64)            12864     
                                                                 
 gru_22 (GRU)                (None, 41, 32)            9408      
                                                                 
 gru_23 (GRU)                (None, 41, 16)            2400      
                                                                 
 gru_24 (GRU)                (None, 8)                 624       
                                                                 
 dense_4 (Dense)             (None, 1)                 9         
                                                                 
Total params: 25,305
Trainable params: 25,305
Non-trainable params: 0
_________________________________________________________________
(26257, 41, 1)
(26257, 41, 1)
(2918, 41, 1)
(2918,

# MS1, DD2 70/30 & 4layers, 64 neurones 

In [12]:
X_train, X_test, Y_train, Y_test = read_split_data(0.3)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_GRU_model = rnn_gru_model(4, [64, 32, 16, 8])

# Train and evaluate model
X_test_RNN_GRU = train_and_evaluate_RNN(RNN_GRU_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN_GRU = predict_output(RNN_GRU_model, X_test_RNN_GRU)
y_pred_RNN_GRU

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN_GRU)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_25 (GRU)                (None, 41, 64)            12864     
                                                                 
 gru_26 (GRU)                (None, 41, 32)            9408      
                                                                 
 gru_27 (GRU)                (None, 41, 16)            2400      
                                                                 
 gru_28 (GRU)                (None, 8)                 624       
                                                                 
 dense_5 (Dense)             (None, 1)                 9         
                                                                 
Total params: 25,305
Trainable params: 25,305
Non-trainable params: 0
_________________________________________________________________
(20422, 41, 1)
(20422, 41, 1)
(8753, 41, 1)
(8753,

# MS2, DD1 5 layers, 128 neurons , 90/10 partition 

In [13]:
X_train, X_test, Y_train, Y_test = read_split_data(0.1)

# Reshape the input data
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# Add an extra dimension to the input data
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Define model
RNN_GRU_model = rnn_gru_model(5, [128, 64, 32, 16, 8])

# Train and evaluate model
X_test_RNN_GRU = train_and_evaluate_RNN(RNN_GRU_model, X_train, Y_train, X_test, Y_test)

# Predict output
y_pred_RNN_GRU = predict_output(RNN_GRU_model, X_test_RNN_GRU)
y_pred_RNN_GRU

# Calculate performance metrics
accuracy, class_report, cm, precision, tpr, fpr = calculate_performance_metrics(Y_test, y_pred_RNN_GRU)
# RNN_accuracy_score, RNN_classification_report, cm_RNN, rnn_precision, rnn_tpr, rnn_fpr = calculate_performance_metrics(Y_test, y_pred_RNN)
print(f"Accuracy: {accuracy}")
print(f"Classification report:\n{class_report}")
print(f"Confusion matrix:\n{cm}")
print(f"Precision: {precision}")
print(f"TPR: {tpr}")
print(f"FPR: {fpr}")

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_29 (GRU)                (None, 41, 128)           50304     
                                                                 
 gru_30 (GRU)                (None, 41, 64)            37248     
                                                                 
 gru_31 (GRU)                (None, 41, 32)            9408      
                                                                 
 gru_32 (GRU)                (None, 41, 16)            2400      
                                                                 
 gru_33 (GRU)                (None, 8)                 624       
                                                                 
 dense_6 (Dense)             (None, 1)                 9         
                                                                 
Total params: 99,993
Trainable params: 99,993
Non-trai