In [1]:
import pandas as pd
from numpy import isnan
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier


In [2]:
def get_training_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [3]:
def test_mult_model(model):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    print(classification_report(y_test, y_preds))
    print(confusion_matrix(y_test, y_preds))

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average="weighted")

In [4]:
def get_models():
    models = (KNeighborsClassifier(n_jobs=-1), )
    
    for model in models:
      yield model

In [5]:
def print_results(accuracy, f1):
    print("")

    if not isnan(accuracy):
      print(f"\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\tf1-score: {f1}")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results(*test_mult_model(model))

In [6]:
model = get_models()

In [7]:
eval(next(model))

KNeighborsClassifier:
              precision    recall  f1-score   support

           0       0.33      0.39      0.36      4089
           1       0.60      0.72      0.66     11132
           2       0.25      0.49      0.33      6062
           3       1.00      0.96      0.98     18871
           4       0.94      0.72      0.82     37000
           5       0.61      0.66      0.63      3496

    accuracy                           0.74     80650
   macro avg       0.62      0.66      0.63     80650
weighted avg       0.81      0.74      0.77     80650

[[ 1614  2174   148    11    46    96]
 [ 1931  8055   509    16   193   428]
 [  711   785  2962     3  1425   176]
 [  112   398   109 18205    13    34]
 [  310  1471  7720     8 26738   753]
 [  172   557   330     4   129  2304]]

	accuracy: 0.7424426534407935
	f1-score: 0.7650004239734276
