# Final Report

The final report will take a similar structure to the previous reports compiled. We will report the accuracy of our models using classification matrices for each method applied to the data set to compare and contrast what was achieved. This will allow us to create a summary graph comparing the neural networks to the Naive Bayes and Random Forest methods employed as comparisons. We will then use bar graphs to directly compare the accuracies and draw conclusions on the advantages and draw backs of each method. Further results, visualisations and analysis of each model can be found in each indivduals folder.

In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

import pickle

### Classifcation Reports

In [3]:
from sklearn.metrics import confusion_matrix, classification_report

def classification_eval(y_true,y_pred):
    
    print("Confusion Matrix")
    C = confusion_matrix(y_true,y_pred)
    
    print('Classification report')
    print(classification_report(y_true, y_pred, target_names = (['Normal','Exploits','Reconnaissance','DoS','Generic',
                                                                 'Shellcode','Fuzzers','Worms','Backdoor','Analysis' ]), digits=4))

In [5]:
# Matt's NN Predictions
Matt_NN_Actual = pickle.load(open('../Matt Corrie/Y_test.p','rb'))
Matt_NN_Pred = pickle.load(open('../Matt Corrie/Y_predictions.p','rb'))

# Matt's RF Predictions
Matt_RF_Actual = pickle.load(open('../Matt Corrie/Y_test.p','rb'))
Matt_RF_Pred = pickle.load(open('../Matt Corrie/rf_predictions.p','rb'))

# Alex's NN Predictions

# Luke's NB Predictions

# Luke's AE Predictions

# Gab's Dropout NN Predictions

# Gab's No Hidden Layer NN Predictions


In [7]:
print('Below is the report for the Neural Network model prepared by Matt.')
print('-------------------------------------------------------')
classification_eval(Matt_NN_Actual,Matt_NN_Pred)

Below is the report for the Neural Network model prepared by Matt.
-------------------------------------------------------
Confusion Matrix
Classification report
                precision    recall  f1-score   support

        Normal     0.9961    0.9963    0.9962    221772
      Exploits     0.5966    0.8844    0.7125      4411
Reconnaissance     0.8656    0.7945    0.8285      1411
           DoS     0.4400    0.0334    0.0621      1647
       Generic     0.9943    0.9857    0.9900     21642
     Shellcode     0.6531    0.8000    0.7191       160
       Fuzzers     0.5853    0.6139    0.5992      2437
         Worms     0.0000    0.0000    0.0000         6
      Backdoor     1.0000    0.0480    0.0916       250
      Analysis     0.3750    0.0112    0.0217       269

      accuracy                         0.9803    254005
     macro avg     0.6506    0.5167    0.5021    254005
  weighted avg     0.9799    0.9803    0.9778    254005



In [8]:
print('Below is the report for the Random Forest model prepared by Matt.')
print('-------------------------------------------------------')
classification_eval(Matt_RF_Actual,Matt_RF_Pred)

Below is the report for the Random Forest model prepared by Matt.
-------------------------------------------------------
Confusion Matrix
Classification report
                precision    recall  f1-score   support

        Normal     0.9977    0.9985    0.9981    221772
      Exploits     0.6309    0.8216    0.7137      4411
Reconnaissance     0.9222    0.7725    0.8407      1411
           DoS     0.3059    0.2325    0.2642      1647
       Generic     0.9977    0.9876    0.9926     21642
     Shellcode     0.7644    0.8313    0.7964       160
       Fuzzers     0.7721    0.7091    0.7393      2437
         Worms     0.0000    0.0000    0.0000         6
      Backdoor     0.7917    0.0760    0.1387       250
      Analysis     0.7742    0.0892    0.1600       269

      accuracy                         0.9835    254005
     macro avg     0.6957    0.5518    0.5644    254005
  weighted avg     0.9837    0.9835    0.9827    254005



In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

def cal_APRF(pre,test):
    result = []
    ACC = round(100*(accuracy_score(pre, test)),3)
    result.append(ACC)
    # Precision
    P = round(100*(precision_score(pre, test, average="weighted")),3)
    result.append(P)
    # Recall 
    R = round(100*(recall_score(pre, test, average="weighted")),3)
    result.append(R)
    # F1-Score
    F = round(100*(f1_score(pre, test, average="weighted")),3)
    result.append(F)
    return result

score1 = cal_APRF(Matt_NN_Actual,Matt_NN_Pred)
score2 = cal_APRF(Matt_RF_Actual,Matt_RF_Pred)

In [None]:
def draw_APRF(score1,score2,score3,score4,score5,score6,score7):
    plt.figure(figsize=(12,7))
    metric = ('Accuracy', 'Precison', 'Recall', 'F1_score', )
    bar_width = 0.2  
    index1 = np.arange(len(metric))
    index2 = index1 + bar_width  
    index3 = index2 + bar_width  

    p1 = plt.bar(index1, height=score1, width=bar_width, color='deepskyblue',label='Keras Feed Forward Neural Network')
    p2 = plt.bar(index2, height=score2, width=bar_width, color='sandybrown',label='Random Forest')
    p3 = plt.bar(index3, height=score3, width=bar_width, color='limegreen',label='R Feed Forward Neural Network')
    p4 = plt.bar(index3, height=score3, width=bar_width, color='maroon',label='Naive Bayes')
    p5 = plt.bar(index3, height=score3, width=bar_width, color='black',label='Auto Encoder')
    p6 = plt.bar(index3, height=score3, width=bar_width, color='purple',label='Dropout Neural Network')
    p7 = plt.bar(index3, height=score3, width=bar_width, color='gold',label='Dropout Neural Network')
    
    
    #Mark the value on the graph
    for p in p1:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")
    for p in p2:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")
    for p in p3:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")
    for p in p4:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")
    for p in p5:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")
    for p in p6:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")
    for p in p7:
        height = p.get_height()
        plt.text(p.get_x() + p.get_width() / 2, height+1, str(height), ha="center", va="bottom")

    plt.legend(bbox_to_anchor=(1, 1))  
    plt.xticks(index1 + bar_width/2, metric, fontsize=16)  
    plt.ylabel('Metric_Value', fontsize=16)  
    plt.title('Evaluation', fontsize=24, fontweight= 'black')  
    plt.show()
    
draw_APRF(score1,score2,score3,score4,score5,score6,score7)