In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [2]:
import pandas as pd
from numpy import mean, isnan
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, PassiveAggressiveClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [3]:
def get_training_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('../UNSW-NB15/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [4]:
def get_models():
    models = (LogisticRegression(), RidgeClassifier(), SGDClassifier(), PassiveAggressiveClassifier())
    for model in models:
      yield model

def test_bin_model(model, scaler):
    X_train, y_train = get_training_dataset(binary=True)
    X_test, y_test = get_testing_dataset(binary=True)

    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds)

def test_mult_model(model, scaler=None):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    if scaler:
      X_train = scaler.fit_transform(X_train)
      X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    ovr_clf = OneVsRestClassifier(model, n_jobs=-1).fit(X_train, y_train)
    ovo_clf = OneVsOneClassifier(model, n_jobs=-1).fit(X_train, y_train)

    y_preds = model.predict(X_test)
    ovr_y_preds = ovr_clf.predict(X_test)
    ovo_y_preds = ovo_clf.predict(X_test)

    return (accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average='weighted'),
    accuracy_score(y_test, ovr_y_preds), f1_score(y_test, ovr_y_preds, average='weighted'),
    accuracy_score(y_test, ovo_y_preds), f1_score(y_test, ovo_y_preds, average='weighted'))

In [5]:
def print_results(scaler, accuracy, f1, ovr_accuracy, ovr_f1, ovo_accuracy, ovo_f1):
    print("")

    print(f"\t{scaler}:")

    print("")

    print(f"\t\tSimple:")
    if not isnan(accuracy):
      print(f"\t\t\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\t\t\tf1-score: {f1}")

    print("")

    print(f"\t\tOne vs Rest:")
    if not isnan(ovr_accuracy):
      print(f"\t\t\taccuracy: {ovr_accuracy}")
    if not isnan(ovr_f1):
      print(f"\t\t\tf1-score: {ovr_f1}")

    print("")

    print(f"\t\tOne vs One:")
    if not isnan(ovo_accuracy):
      print(f"\t\t\taccuracy: {ovo_accuracy}")
    if not isnan(ovo_f1):
      print(f"\t\t\tf1-score: {ovo_f1}")

    print("")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results("Without scaler", *test_mult_model(model))


In [6]:
model = get_models()

In [7]:
eval(next(model))

LogisticRegression:



	Without scaler:

		Simple:
			accuracy: 0.5890176359131322
			f1-score: 0.6388127991137064

		One vs Rest:
			accuracy: 0.6131880678229606
			f1-score: 0.6576057322397556

		One vs One:
			accuracy: 0.5864791332653161
			f1-score: 0.6400702315290787



In [8]:
eval(next(model))

RidgeClassifier:

	Without scaler:

		Simple:
			accuracy: 0.5646285769810038
			f1-score: 0.6153353650157474

		One vs Rest:
			accuracy: 0.5646285769810038
			f1-score: 0.6153353650157474

		One vs One:
			accuracy: 0.5535879123548559
			f1-score: 0.616497092376662



In [9]:
eval(next(model))

SGDClassifier:

	Without scaler:

		Simple:
			accuracy: 0.6509498129524365
			f1-score: 0.681437079659142

		One vs Rest:
			accuracy: 0.6526988291308361
			f1-score: 0.6846572018778484

		One vs One:
			accuracy: 0.5777826361560511
			f1-score: 0.6329730627527644



In [10]:
eval(next(model))

PassiveAggressiveClassifier:

	Without scaler:

		Simple:
			accuracy: 0.5776368848078511
			f1-score: 0.6369104892828891

		One vs Rest:
			accuracy: 0.544964290919691
			f1-score: 0.6104074654213013

		One vs One:
			accuracy: 0.6015886896953797
			f1-score: 0.6415362847097074

