In [16]:
import pandas as pd
from numpy import mean, isnan
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB, BernoulliNB

In [17]:
def get_training_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [18]:
def get_models():
    models = (GaussianNB(), MultinomialNB(), ComplementNB(), BernoulliNB())
    for model in models:
      yield model

def test_bin_model(model, scaler):
    X_train, y_train = get_training_dataset(binary=True)
    X_test, y_test = get_testing_dataset(binary=True)

    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds)

def test_mult_model(model):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    print(classification_report(y_test, y_preds))
    print(confusion_matrix(y_test, y_preds))

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average="weighted")


In [19]:
def print_results(accuracy, f1):
    print("")

    if not isnan(accuracy):
      print(f"\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\tf1-score: {f1}")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results(*test_mult_model(model))

In [20]:
model = get_models()

In [21]:
eval(next(model))

GaussianNB:

	Without scaler:

		Simple:
			accuracy: 0.4374333441693614
			f1-score: 0.39560818739190856

		One vs Rest:
			accuracy: 0.4361729430081955
			f1-score: 0.3938754638312814

		One vs One:
			accuracy: 0.43918421818057385
			f1-score: 0.3998829052489188


	Standard scaler:

		Simple:
			accuracy: 0.4886877569992187
			f1-score: 0.5452154781585322

		One vs Rest:
			accuracy: 0.5133881978544664
			f1-score: 0.52316078877573

		One vs One:
			accuracy: 0.49014206603133326
			f1-score: 0.5465834638181118


	Min-Max scaler:

		Simple:
			accuracy: 0.4989249519507702
			f1-score: 0.5536080352536963

		One vs Rest:
			accuracy: 0.5185096469165797
			f1-score: 0.5274651165687967

		One vs One:
			accuracy: 0.4976132222355296
			f1-score: 0.5527997211489889



In [22]:
eval(next(model))

MultinomialNB:

	Without scaler:

		Simple:
			accuracy: 0.459088290816181
			f1-score: 0.33514234859465064

		One vs Rest:
			accuracy: 0.24924575541373667
			f1-score: 0.18110841881943884

		One vs One:
			accuracy: 0.459088290816181
			f1-score: 0.33514234859465064


	Min-Max scaler:

		Simple:
			accuracy: 0.5673573208776043
			f1-score: 0.4371076157860748

		One vs Rest:
			accuracy: 0.5625552494852887
			f1-score: 0.43107454552508373

		One vs One:
			accuracy: 0.5673573208776043
			f1-score: 0.4371076157860748



In [23]:
eval(next(model))

ComplementNB:

	Without scaler:

		Simple:
			accuracy: 0.5287468418681313
			f1-score: 0.38385688311976895

		One vs Rest:
			accuracy: 0.24924575541373667
			f1-score: 0.18110841881943884

		One vs One:
			accuracy: 0.459088290816181
			f1-score: 0.33514234859465064


	Min-Max scaler:

		Simple:
			accuracy: 0.5291631734734032
			f1-score: 0.385898818097579

		One vs Rest:
			accuracy: 0.5390125526830576
			f1-score: 0.4436284481732666

		One vs One:
			accuracy: 0.5922288569131008
			f1-score: 0.6221775054321922



In [24]:
eval(next(model))

BernoulliNB:

	Without scaler:

		Simple:
			accuracy: 0.5160458763209974
			f1-score: 0.3888107319390423

		One vs Rest:
			accuracy: 0.506749704860814
			f1-score: 0.3636764103778444

		One vs One:
			accuracy: 0.5160458763209974
			f1-score: 0.3888107319390423


	Standard scaler:

		Simple:
			accuracy: 0.6354246867532408
			f1-score: 0.6318122209066825

		One vs Rest:
			accuracy: 0.5662452022059872
			f1-score: 0.4417984459081934

		One vs One:
			accuracy: 0.6354246867532408
			f1-score: 0.6318122209066825


	Min-Max scaler:

		Simple:
			accuracy: 0.5683952983044468
			f1-score: 0.5089913279084225

		One vs Rest:
			accuracy: 0.5160116572849476
			f1-score: 0.3808212982292138

		One vs One:
			accuracy: 0.5683952983044468
			f1-score: 0.5089913279084225

