# Multiclass Classification Baselines for Anomaly-based Network Intrusion Detection Systems 

### Importing libraries

In [8]:
import os
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
import wandb
from wandb.keras import WandbCallback
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
np.random.seed(7)
import tensorflow as tf
tf.random.set_seed(2) 

### Pulling Data

In [2]:
df = pd.read_csv("ASNM-datasets/ASNM-TUN.csv")

### Replacing missing spaces with mean of dataframe

In [3]:
df = df.fillna(df.mean())

### Using attributes from Feature Selection for our input data and make out output data label 3
### Label 3: distinguishes among legitimate traffic (symbol 3), direct and obfuscated network attacks (symbols 1 and 2)

In [4]:
FFSCols = ['PolyInd10ordOut[3]',
 'OutPktLen64s10i[8]',
 'OutPkt4s10i[7]',
 'ConTcpFinCntIn',
 'GaussProds4In[1]',
 'FourGonAngleAllN[2]',
 'MedTCPHdrLen',
 'GaussProds8In[5]',
 'SumTTLOut',
 'PolyTime10ordOut[2]',
 'InPkt64s20iTr2KB[14]',
 'OutPktLen64s10i[5]',
 'PolyInd13ordIn[7]',
 'InPkt1s10i[8]',
 'OutPkt32s20iTr4KB[11]',
 'PolyTime10ordOut[8]',
 'OutPktLen4s10i[3]',
 'PolyInd13ordOut[13]',
 'PolyInd13ordIn[12]',
 'InPkt64s20iTr2KB[7]']
X = df[FFSCols].values
y = df[['label_3']].values

### Normalizing input data

In [5]:
scaler = MinMaxScaler(feature_range=(0.1,0.9))
X = scaler.fit_transform(X)


### Encoding the output for multiclassification

In [6]:
encoder = LabelEncoder()
encoder.fit(y)
encoded_Y = encoder.transform(y)
y = np_utils.to_categorical(encoded_Y)

### KFold Cross Validation

In [7]:
kfold = KFold(n_splits=5, shuffle=True, random_state=7)

### Building Neural Network baseline model with Adam optimizer

In [8]:
cvscores = []
for train, test in kfold.split(X, y):
        wandb.init(project="asnm-tun")
        model = Sequential()
        model.add(Dense(X.shape[1]+1, input_dim=X.shape[1], activation='relu'))
        model.add(Dense(40,activation='relu'))
        model.add(Dense(60,activation='relu'))
        model.add(Dense(30,activation='relu'))
        model.add(Dense(10,activation='relu'))
        model.add(Dense(y.shape[1], activation='softmax'))
        opt = keras.optimizers.Adam(learning_rate=0.009)
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        model.fit(
                X[train],
                y[train],
                epochs=1200,
                verbose=0,
                batch_size=10,
                callbacks=[WandbCallback()]
        )
        model.save("model.h5")
        open("model.json", "w").write(model.to_json())
        scores = model.evaluate(X[test], y[test], verbose=0)
        cvscores.append(scores[1] * 100)
        wandb.save('model.h5')
        wandb.save('model.json')
        
        print("---------------------------")
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        print("---------------------------")
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

---------------------------
accuracy: 87.34%
---------------------------


---------------------------
accuracy: 78.48%
---------------------------


---------------------------
accuracy: 70.89%
---------------------------


---------------------------
accuracy: 87.34%
---------------------------


---------------------------
accuracy: 84.62%
---------------------------
81.73% (+/- 6.32%)


### Building Neural Network baseline model with SGD optimizer

In [9]:
cvscores = []
for train, test in kfold.split(X, y):
        wandb.init(project="asnm-tun")
        model = Sequential()
        model.add(Dense(X.shape[1]+1, input_dim=X.shape[1], activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(40,activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(60,activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(30,activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(10,activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(y.shape[1], activation='softmax'))
        opt = SGD(lr=0.01, momentum=0.75)
        model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
        model.fit(
                X[train],
                y[train],
                epochs=1000,
                verbose=0,
                batch_size=10,
                callbacks=[WandbCallback()]
        )
        model.save(os.path.join(wandb.run.dir, "model.h5")) 
        open(wandb.run.dir + "/model.json", "w").write(model.to_json())
        scores = model.evaluate(X[test], y[test], verbose=0)
        cvscores.append(scores[1] * 100)

        print("---------------------------")
        print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
        print("---------------------------")
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

---------------------------
accuracy: 75.95%
---------------------------


---------------------------
accuracy: 65.82%
---------------------------


---------------------------
accuracy: 60.76%
---------------------------


---------------------------
accuracy: 63.29%
---------------------------


---------------------------
accuracy: 76.92%
---------------------------
68.55% (+/- 6.64%)


### Building baseline model with Decision Tree

In [12]:
cvscores = []
for train, test in kfold.split(X, y):
    dtree_model = DecisionTreeClassifier(max_depth = 1000).fit(X[train], y[train]) 
    scores = dtree_model.score(X[test], y[test])
    y_pred = dtree_model.predict(X[test])
    print(classification_report(y[test], y_pred, target_names=['Legitmate', 'Direct', 'Ofuscated']))
    cvscores.append(scores * 100)
    print("---------------------------")
    print("accuracy: %.2f%%" % (scores*100))
    print("---------------------------")
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

precision    recall  f1-score   support

   Legitmate       0.97      1.00      0.98        30
      Direct       0.76      0.93      0.84        14
   Ofuscated       1.00      0.89      0.94        35

   micro avg       0.94      0.94      0.94        79
   macro avg       0.91      0.94      0.92        79
weighted avg       0.95      0.94      0.94        79
 samples avg       0.94      0.94      0.94        79

---------------------------
accuracy: 93.67%
---------------------------
              precision    recall  f1-score   support

   Legitmate       0.91      0.87      0.89        23
      Direct       0.75      0.75      0.75        20
   Ofuscated       0.86      0.89      0.88        36

   micro avg       0.85      0.85      0.85        79
   macro avg       0.84      0.84      0.84        79
weighted avg       0.85      0.85      0.85        79
 samples avg       0.85      0.85      0.85        79

---------------------------
accuracy: 84.81%
--------------------------

### Building baseline model with KNN

In [13]:
cvscores = []
for train, test in kfold.split(X, y):
    knn = KNeighborsClassifier(n_neighbors = 3).fit(X[train], y[train]) 
    scores = knn.score(X[test], y[test])
    y_pred = knn.predict(X[test])
    print(classification_report(y[test], y_pred, target_names=['Legitmate', 'Direct', 'Ofuscated']))
    cvscores.append(scores * 100)
    print("---------------------------")
    print("accuracy: %.2f%%" % (scores*100))
    print("---------------------------")
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

precision    recall  f1-score   support

   Legitmate       0.91      1.00      0.95        30
      Direct       0.92      0.79      0.85        14
   Ofuscated       0.91      0.89      0.90        35

   micro avg       0.91      0.91      0.91        79
   macro avg       0.91      0.89      0.90        79
weighted avg       0.91      0.91      0.91        79
 samples avg       0.91      0.91      0.91        79

---------------------------
accuracy: 91.14%
---------------------------
              precision    recall  f1-score   support

   Legitmate       0.79      0.83      0.81        23
      Direct       0.67      0.60      0.63        20
   Ofuscated       0.91      0.86      0.89        36

   micro avg       0.82      0.78      0.80        79
   macro avg       0.79      0.76      0.78        79
weighted avg       0.81      0.78      0.80        79
 samples avg       0.78      0.78      0.78        79

---------------------------
accuracy: 78.48%
--------------------------

### Model baseline with Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier
cvscores = []
for train, test in kfold.split(X, y):
    rfc = RandomForestClassifier(max_depth=1000).fit(X[train], y[train])  
    scores = rfc.score(X[test], y[test])
    y_pred = rfc.predict(X[test])
    print(classification_report(y[test], y_pred, target_names=['Legitmate', 'Direct', 'Ofuscated']))
    cvscores.append(scores * 100)
    print("---------------------------")
    print("accuracy: %.2f%%" % (scores*100))
    print("---------------------------")
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))


precision    recall  f1-score   support

   Legitmate       1.00      1.00      1.00        30
      Direct       0.92      0.79      0.85        14
   Ofuscated       1.00      0.91      0.96        35

   micro avg       0.99      0.92      0.95        79
   macro avg       0.97      0.90      0.93        79
weighted avg       0.99      0.92      0.95        79
 samples avg       0.92      0.92      0.92        79

---------------------------
accuracy: 92.41%
---------------------------
              precision    recall  f1-score   support

   Legitmate       1.00      0.87      0.93        23
      Direct       0.89      0.80      0.84        20
   Ofuscated       0.87      0.92      0.89        36

   micro avg       0.91      0.87      0.89        79
   macro avg       0.92      0.86      0.89        79
weighted avg       0.91      0.87      0.89        79
 samples avg       0.87      0.87      0.87        79

---------------------------
accuracy: 87.34%
--------------------------

### Building baseline model with SVC

In [15]:
cvscores = []
for train, test in kfold.split(X, y):
    svc_model = OneVsRestClassifier(SVC(kernel = 'linear')).fit(X[train], y[train]) 
    scores = svc_model.score(X[test], y[test])
    y_pred = svc_model.predict(X[test])
    print(classification_report(y[test], y_pred, target_names=['Legitmate', 'Direct', 'Ofuscated']))
    cvscores.append(scores * 100)
    print("---------------------------")
    print("accuracy: %.2f%%" % (scores*100))
    print("---------------------------")
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))


precision    recall  f1-score   support

   Legitmate       0.00      0.00      0.00        30
      Direct       1.00      0.07      0.13        14
   Ofuscated       0.81      0.37      0.51        35

   micro avg       0.82      0.18      0.29        79
   macro avg       0.60      0.15      0.21        79
weighted avg       0.54      0.18      0.25        79
 samples avg       0.18      0.18      0.18        79

---------------------------
accuracy: 17.72%
---------------------------
              precision    recall  f1-score   support

   Legitmate       0.00      0.00      0.00        23
      Direct       1.00      0.05      0.10        20
   Ofuscated       0.36      0.11      0.17        36

   micro avg       0.42      0.06      0.11        79
   macro avg       0.45      0.05      0.09        79
weighted avg       0.42      0.06      0.10        79
 samples avg       0.06      0.06      0.06        79

---------------------------
accuracy: 5.06%
---------------------------