In [1]:
import pandas as pd
from numpy import isnan
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

In [2]:
def get_training_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [3]:
def test_mult_model(model):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    print(classification_report(y_test, y_preds))
    print(confusion_matrix(y_test, y_preds))

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average="weighted")

In [4]:
def get_models():
    models = (LinearSVC(dual="auto"), SVC())
    for model in models:
      yield model

In [5]:
def print_results(accuracy, f1):
    print("")

    if not isnan(accuracy):
      print(f"\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\tf1-score: {f1}")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results(*test_mult_model(model))

In [6]:
model = get_models()

In [7]:
eval(next(model))

LinearSVC:
              precision    recall  f1-score   support

           0       0.21      0.06      0.10      4089
           1       0.53      0.78      0.63     11132
           2       0.24      0.65      0.35      6062
           3       0.99      0.96      0.98     18871
           4       0.95      0.62      0.75     37000
           5       0.50      0.58      0.53      3496

    accuracy                           0.69     80650
   macro avg       0.57      0.61      0.56     80650
weighted avg       0.79      0.69      0.72     80650

[[  265  3357   237    35    88   107]
 [  488  8658  1081    37   625   243]
 [  487   972  3962     8   395   238]
 [    0   429   223 18162    15    42]
 [    0  2089 10503    29 22967  1412]
 [    0   791   660    12    19  2014]]

	accuracy: 0.6947055176689398
	f1-score: 0.7151156336074437


In [8]:
eval(next(model))

SVC:
              precision    recall  f1-score   support

           0       0.47      0.03      0.05      4089
           1       0.56      0.87      0.68     11132
           2       0.25      0.70      0.37      6062
           3       1.00      0.96      0.98     18871
           4       0.99      0.62      0.76     37000
           5       0.49      0.65      0.56      3496

    accuracy                           0.71     80650
   macro avg       0.63      0.64      0.57     80650
weighted avg       0.83      0.71      0.73     80650

[[  103  3609   230    20    10   117]
 [   56  9729  1003     2    37   305]
 [    0  1414  4258     8   136   246]
 [   51   390   217 18161     2    50]
 [    0  1527 11046     1 22818  1608]
 [    7   799   403     7     2  2278]]

	accuracy: 0.7110601363918165
	f1-score: 0.7264611219985795
