# Model evaluation
Evaluation of local and global models trained with UNSW-NB15 dataset 

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import models
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

Dataset preprocessing extracted from https://github.com/polvalls9/Transfer-Learning-Based-Intrusion-Detection-in-5G-and-IoT-Networks.git 

In [None]:
def preprocessing(data): 

    # Select the 'proto' and 'state' values that I want
    data = data.loc[(data['proto'] == 'tcp') | (data['proto'] =='udp') | (data['proto'] =='icmp') | (data['proto'] =='arp') | (data['proto'] =='ipv6-icmp') | (data['proto'] =='igmp') | (data['proto'] =='rarp'), :]
    data = data.loc[(data['state'] == 'RST') | (data['state'] =='REQ') | (data['state'] =='INT') | (data['state'] =='FIN') | (data['state'] =='CON') | (data['state'] =='ECO') | (data['state'] =='ACC') | (data['state'] == 'PAR'), :]

    # Extracting labels 
    data_labels = data[['label']]

    # Drop the invalid features and select interested data features
    data_features=data[['proto','srcip','sport','dstip','dsport','spkts','dpkts','sbytes','dbytes','state','stime','ltime','dur']]

    """PREPROCESSING"""


    # Preprocess IP and ports features
    # IP Source Address
    data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(".")[-1])
    data_features['srcip'] = data_features['srcip'].apply(lambda x: x.split(":")[-1])
    data_features['srcip'] = data_features['srcip'].apply(lambda x: int(x, 16))


    # IP Destination Address
    data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(".")[-1])
    data_features['dstip'] = data_features['dstip'].apply(lambda x: x.split(":")[-1])
    data_features['dstip'] = data_features['dstip'].apply(lambda x: int(x, 16))

    # Ports
    data_features['sport'] = data_features['sport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)
    data_features['dsport'] = data_features['dsport'].apply(lambda x: x.replace('0x','') if "0x" in str(x) else x)

    # Convert all ports with 0 decimal, and HEX to DEC
    data_features['sport'] = data_features['sport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
    data_features['sport'] = data_features['sport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

    data_features['dsport'] = data_features['dsport'].apply(lambda x: str(x)[:-2] if str(x)[-2:] == '.0' else str(x))
    data_features['dsport'] = data_features['dsport'].apply(lambda x: -1 if str(x).isalpha()==True else int(x,16))

    # Convert field to int format
    data_features['srcip'] = data_features['srcip'].astype(int)
    data_features['sport'] = data_features['sport'].astype(int)
    data_features['dstip'] = data_features['dstip'].astype(int)
    data_features['dsport'] = data_features['dsport'].astype(int)

    # Convert some fields to logarithmic
    log1p_col = ['dur', 'sbytes', 'dbytes', 'spkts']

    for col in log1p_col:
        data_features[col] = data_features[col].apply(np.log1p)

    # Create a complementary field of attack & Transform to One hot encoding - LABELS
    normal=data_labels['label']
    normal=normal.replace(1,2)
    normal=normal.replace(0,1)
    normal=normal.replace(2,0)

    # Insert the new column in data labels
    data_labels.insert(1, 'normal', normal)
    data_labels = pd.get_dummies(data_labels)

    data_labels = pd.get_dummies(data_labels)

    # Transform to One hot encoding - FEATURES
    data_features=pd.get_dummies(data_features)

    # Value given for the missing columns
    auxCol=0

    # As we are using different datasets that might not have all representations, we are going to detect and add the missing columns 
    # The columns that can have types are: proto and state: need to check if all representations are done 
    state_cols = [col for col in data_features if col.startswith('state_')]
    proto_cols = [col for col in data_features if col.startswith('proto_')]
    
    # Check if all columns are present
    if 'state_PAR' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_PAR', auxCol, True)
    if 'state_ACC' not in state_cols: 
        data_features.insert(data_features.shape[1], 'state_ACC', auxCol, True)
    if 'state_ECO' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_ECO', auxCol, True)
    if 'state_CON' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_CON', auxCol, True)
    if 'state_FIN' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_FIN', auxCol, True)
    if 'state_INT' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_INT', auxCol, True)
    if 'state_REQ' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_REQ', auxCol, True)
    if 'state_RST' not in state_cols:
        data_features.insert(data_features.shape[1], 'state_RST', auxCol, True)
    if 'proto_igmp' not in proto_cols:
        data_features.insert(data_features.shape[1], 'proto_igmp', auxCol, True)
    if 'proto_arp' not in proto_cols:
        data_features.insert(data_features.shape[1], 'proto_arp', auxCol, True)
    if 'proto_icmp' not in proto_cols:
        data_features.insert(data_features.shape[1], 'proto_icmp', auxCol, True)
    if 'proto_udp' not in proto_cols:
        data_features.insert(data_features.shape[1], 'proto_udp', auxCol, True)
    if 'proto_tcp' not in proto_cols:
        data_features.insert(data_features.shape[1], 'proto_tcp', auxCol, True)

    # Normalize all data features
    data_features = StandardScaler().fit_transform(data_features)

    #Add dimension to data features
    data_features = np.expand_dims(data_features, axis=2)
    data_features = np.expand_dims(data_features, axis=3)

    x = data_features
    y = data_labels.to_numpy()

    return x, y

In [None]:
def evaluate_detection(model, x, y, attack_cat): 
    """
    Given a model, a test dataset (x, y), and the attack category of each sample, it returns
    the following metrics: 
        - FNR - FP/(FP+TN)*
        - F1 - 2*(Precision * Recall)/(Precision + Recall)*
        - Precision - TN/(TN+FN)*
        - Recall - TN/(TN+FP)*
        - Accuracy - (TP+TN)/(TP+TN+FP+FN)*
        - Confusion Matrix
        - Percentages of each attack category that have been correctly classified
        - DataFrame with each test sample and its associated attack category, true label (0/1), and predicted label (0/1)
    
    *Consider in our preprocessing, normal is associated with 1 and attack with 0. 
    This means in our confusion matrix, negatives are actually attacks. To compute the proper metrics, 
    we define the metrics as above. 
        
    """
    y_pred = model.predict(x)
    y_pred = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y, axis=1)
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    FNR = fp/(fp+tn)
    Precision = tn/(tn+fn)
    Recall = tn/(tn+fp)
    Acc = (tn+tp)/(tn+tp+fp+fn)
    F1 = 2*Precision*Recall/(Precision+Recall)


    attack_categories = ['generic', 'exploits', 'reconnaissance', 'dos', 'unknown']
    category_counts = {cat: {'total': 0, 'detected': 0} for cat in attack_categories}


    for true_label, pred_label, attack in zip(y_true, y_pred, attack_cat):
        if attack in category_counts:
            category_counts[attack]['total'] += 1
            if true_label == 0 and pred_label == 0:
                category_counts[attack]['detected'] += 1
        elif true_label == 0:  # UNKNOWN ATTACKS 
            category_counts['unknown']['total'] += 1
            if true_label == 0 and pred_label == 0:
                category_counts['unknown']['detected'] += 1
    
    category_percentages = {cat: (counts['detected'] / counts['total'] * 100) if counts['total'] > 0 else 0
                            for cat, counts in category_counts.items()}
    
    df = pd.DataFrame({
        'Attack Category': attack_cat,
        'True Label': y_true,
        'Predicted Label': y_pred
    })
    
    return ('F1:'+ str(F1), 'FNR:'+ str(FNR), 'Accuracy:'+str(Acc), 'Recall:'+str(Recall), 'Precision:'+str(Precision), cm,category_percentages, df, tn, tp, fp, fn)

In [None]:
det_test = pd.read_csv('/mnt/c/users/UX430/Documents/thesis/datasets/UNSW-NB15/UNSW-NB15-Test-Complete-Un.csv', low_memory = False)

In [None]:
xun, yun = preprocessing(det_test)

In [None]:
attack_cat = det_test['attack_cat'] # attack categories

In [None]:
model_dir = "" # define directory of model that needs to be evaluated 

In [None]:
model = models.load_model(model_dir)

In [None]:
evaluate_detection(model, xun, yun, attack_cat)