In [1]:
import pandas as pd
from numpy import isnan
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

In [2]:
def get_training_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [3]:
def test_mult_model(model):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    print(classification_report(y_test, y_preds))
    print(confusion_matrix(y_test, y_preds))

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average="weighted")

In [4]:
def get_models():
    models = (BaggingClassifier(), RandomForestClassifier(), AdaBoostClassifier(), ExtraTreesClassifier(), DecisionTreeClassifier(), ExtraTreeClassifier())
    
    for model in models:
      yield model

In [5]:
def print_results(accuracy, f1):
    print("")

    if not isnan(accuracy):
      print(f"\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\tf1-score: {f1}")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results(*test_mult_model(model))

In [6]:
model = get_models()

In [7]:
eval(next(model))

BaggingClassifier:


              precision    recall  f1-score   support

           0       0.45      0.17      0.25      4089
           1       0.60      0.90      0.72     11132
           2       0.26      0.53      0.35      6062
           3       1.00      0.97      0.98     18871
           4       0.96      0.75      0.84     37000
           5       0.92      0.79      0.85      3496

    accuracy                           0.78     80650
   macro avg       0.70      0.69      0.67     80650
weighted avg       0.84      0.78      0.79     80650

[[  689  3118   197    37    19    29]
 [  414 10008   380    36   126   168]
 [  173  1616  3208     5  1050    10]
 [   77   354    41 18393     5     1]
 [  125   924  8247     6 27676    22]
 [   42   593    54     1    28  2778]]

	accuracy: 0.7780781153130812
	f1-score: 0.7913534450258591


In [8]:
eval(next(model))

RandomForestClassifier:
              precision    recall  f1-score   support

           0       0.61      0.12      0.20      4089
           1       0.61      0.92      0.73     11132
           2       0.29      0.58      0.39      6062
           3       1.00      0.97      0.98     18871
           4       0.96      0.76      0.85     37000
           5       0.92      0.80      0.86      3496

    accuracy                           0.79     80650
   macro avg       0.73      0.69      0.67     80650
weighted avg       0.85      0.79      0.80     80650

[[  475  3339   225     5    14    31]
 [  169 10270   423     8   107   155]
 [   30  1442  3545     2  1025    18]
 [   39   489    53 18279     6     5]
 [   42   792  7881     6 28254    25]
 [   24   606    59     0     9  2798]]

	accuracy: 0.788853068815871
	f1-score: 0.7977245139770475


In [9]:
eval(next(model))

AdaBoostClassifier:


In [None]:
eval(next(model))

ExtraTreesClassifier:
              precision    recall  f1-score   support

           0       0.60      0.12      0.20      4089
           1       0.61      0.92      0.73     11132
           2       0.29      0.58      0.39      6062
           3       1.00      0.97      0.98     18871
           4       0.96      0.76      0.85     37000
           5       0.82      0.78      0.80      3496

    accuracy                           0.79     80650
   macro avg       0.71      0.69      0.66     80650
weighted avg       0.85      0.79      0.79     80650

[[  498  3336   196     7    17    35]
 [  188 10246   443     6   128   121]
 [   33  1327  3503     3  1080   116]
 [   51   450    56 18290    16     8]
 [   31   743  7717     7 28177   325]
 [   23   648    77     1    12  2735]]

	accuracy: 0.7867203967761934
	f1-score: 0.7949429998882475


In [None]:
eval(next(model))

DecisionTreeClassifier:
              precision    recall  f1-score   support

           0       0.36      0.26      0.30      4089
           1       0.60      0.81      0.69     11132
           2       0.25      0.45      0.32      6062
           3       0.98      0.98      0.98     18871
           4       0.94      0.76      0.84     37000
           5       0.89      0.79      0.84      3496

    accuracy                           0.77     80650
   macro avg       0.67      0.68      0.66     80650
weighted avg       0.82      0.77      0.79     80650

[[ 1071  2682   199    63    51    23]
 [ 1141  9006   412   120   215   238]
 [  385  1367  2752    40  1491    27]
 [   77   279    48 18442    23     2]
 [  185  1029  7584    69 28064    69]
 [   83   542    66     4    35  2766]]

	accuracy: 0.7700061996280223
	f1-score: 0.7858099824218681


In [None]:
eval(next(model))

ExtraTreeClassifier:
              precision    recall  f1-score   support

           0       0.31      0.27      0.29      4089
           1       0.48      0.75      0.59     11132
           2       0.26      0.46      0.33      6062
           3       0.98      0.80      0.88     18871
           4       0.93      0.75      0.83     37000
           5       0.70      0.74      0.72      3496

    accuracy                           0.72     80650
   macro avg       0.61      0.63      0.61     80650
weighted avg       0.79      0.72      0.74     80650

[[ 1106  2617   220    31    68    47]
 [ 1403  8373   593   124   320   319]
 [  494  1092  2808    25  1500   143]
 [  156  3560    74 15029    38    14]
 [  251  1278  6937    58 27879   597]
 [  114   523   153     3   112  2591]]

	accuracy: 0.7165034097954123
	f1-score: 0.7401169543844206
