In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [None]:
import pandas as pd
from numpy import mean, isnan
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [None]:
def get_training_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [None]:
def get_models():
    models = (DecisionTreeClassifier(), ExtraTreeClassifier())
    for model in models:
      yield model

def test_bin_model(model, scaler):
    X_train, y_train = get_training_dataset(binary=True)
    X_test, y_test = get_testing_dataset(binary=True)

    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds)

def test_mult_model(model, scaler=None):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    if scaler:
      X_train = scaler.fit_transform(X_train)
      X_test = scaler.transform(X_test)

    ovr_clf = OneVsRestClassifier(model, n_jobs=-1).fit(X_train, y_train)
    ovo_clf = OneVsOneClassifier(model, n_jobs=-1).fit(X_train, y_train)
    clf = model.fit(X_train, y_train)

    ovr_y_preds = ovr_clf.predict(X_test)
    ovo_y_preds = ovo_clf.predict(X_test)
    y_preds = clf.predict(X_test)

    return (accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average='weighted'),
    accuracy_score(y_test, ovr_y_preds), f1_score(y_test, ovr_y_preds, average='weighted'),
    accuracy_score(y_test, ovo_y_preds), f1_score(y_test, ovo_y_preds, average='weighted'))

In [1]:
def print_results(scaler, accuracy, f1, ovr_accuracy, ovr_f1, ovo_accuracy, ovo_f1):
    print("")

    print(f"\t{scaler}:")

    print("")

    print(f"\t\tSimple:")
    if not isnan(accuracy):
      print(f"\t\t\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\t\t\tf1-score: {f1}")

    print("")

    print(f"\t\tOne vs Rest:")
    if not isnan(ovr_accuracy):
      print(f"\t\t\taccuracy: {ovr_accuracy}")
    if not isnan(ovr_f1):
      print(f"\t\t\tf1-score: {ovr_f1}")

    print("")

    print(f"\t\tOne vs One:")
    if not isnan(ovo_accuracy):
      print(f"\t\t\taccuracy: {ovo_accuracy}")
    if not isnan(ovo_f1):
      print(f"\t\t\tf1-score: {ovo_f1}")

    print("")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results("Without scaler", *test_mult_model(model))
    print_results("Standard scaler", *test_mult_model(model, StandardScaler()))
    print_results("Min-Max scaler", *test_mult_model(model, MinMaxScaler()))


In [None]:
model = get_models()

In [None]:
eval(next(model))

DecisionTreeClassifier:

	Without scaler:

		Simple:
			accuracy: 0.7384182820903269
			f1-score: 0.7100651860919803

		One vs Rest:
			accuracy: 0.7245652756628512
			f1-score: 0.6954679620477363

		One vs One:
			accuracy: 0.7484159437895301
			f1-score: 0.7232772807621207


	Standard scaler:

		Simple:
			accuracy: 0.7375342903257082
			f1-score: 0.7082833644868759

		One vs Rest:
			accuracy: 0.7241432408849042
			f1-score: 0.6954773691335155

		One vs One:
			accuracy: 0.7473950758807124
			f1-score: 0.7225655681952734


	Min-Max scaler:

		Simple:
			accuracy: 0.739359305581695
			f1-score: 0.7121273134944026

		One vs Rest:
			accuracy: 0.7218790813329455
			f1-score: 0.6938472124377744

		One vs One:
			accuracy: 0.748301880336031
			f1-score: 0.7237050649257363



In [None]:
eval(next(model))

ExtraTreeClassifier:

	Without scaler:

		Simple:
			accuracy: 0.7275879571805796
			f1-score: 0.7010622465207512

		One vs Rest:
			accuracy: 0.7106210184725763
			f1-score: 0.6858342898945474

		One vs One:
			accuracy: 0.7417774507958778
			f1-score: 0.7226811966708774


	Standard scaler:

		Simple:
			accuracy: 0.7278217872602529
			f1-score: 0.7052932829583275

		One vs Rest:
			accuracy: 0.7083739684386424
			f1-score: 0.6810366843568724

		One vs One:
			accuracy: 0.7428097250500454
			f1-score: 0.7232115933663279


	Min-Max scaler:

		Simple:
			accuracy: 0.7299319611499877
			f1-score: 0.7058682537841603

		One vs Rest:
			accuracy: 0.7113111023662464
			f1-score: 0.6857539198833003

		One vs One:
			accuracy: 0.7370666301663615
			f1-score: 0.7186563700968419

