In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [3]:
import pandas as pd
from numpy import mean, isnan
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, PassiveAggressiveClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [4]:
def get_training_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [5]:
def get_models():
    models = (LogisticRegression(), RidgeClassifier(), SGDClassifier(), PassiveAggressiveClassifier())
    for model in models:
      yield model

def test_bin_model(model, scaler):
    X_train, y_train = get_training_dataset(binary=True)
    X_test, y_test = get_testing_dataset(binary=True)

    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds)

def test_mult_model(model, scaler=None):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    if scaler:
      X_train = scaler.fit_transform(X_train)
      X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    ovr_clf = OneVsRestClassifier(model, n_jobs=-1).fit(X_train, y_train)
    ovo_clf = OneVsOneClassifier(model, n_jobs=-1).fit(X_train, y_train)

    y_preds = model.predict(X_test)
    ovr_y_preds = ovr_clf.predict(X_test)
    ovo_y_preds = ovo_clf.predict(X_test)

    return (accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average='weighted'),
    accuracy_score(y_test, ovr_y_preds), f1_score(y_test, ovr_y_preds, average='weighted'),
    accuracy_score(y_test, ovo_y_preds), f1_score(y_test, ovo_y_preds, average='weighted'))

In [6]:
def print_results(scaler, accuracy, f1, ovr_accuracy, ovr_f1, ovo_accuracy, ovo_f1):
    print("")

    print(f"\t{scaler}:")

    print("")

    print(f"\t\tSimple:")
    if not isnan(accuracy):
      print(f"\t\t\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\t\t\tf1-score: {f1}")

    print("")

    print(f"\t\tOne vs Rest:")
    if not isnan(ovr_accuracy):
      print(f"\t\t\taccuracy: {ovr_accuracy}")
    if not isnan(ovr_f1):
      print(f"\t\t\tf1-score: {ovr_f1}")

    print("")

    print(f"\t\tOne vs One:")
    if not isnan(ovo_accuracy):
      print(f"\t\t\taccuracy: {ovo_accuracy}")
    if not isnan(ovo_f1):
      print(f"\t\t\tf1-score: {ovo_f1}")

    print("")

def eval(model):
    print(f"{type(model).__name__}:")
    print_results("Without scaler", *test_mult_model(model))
    print_results("Standard scaler", *test_mult_model(model, StandardScaler()))
    print_results("Min-Max scaler", *test_mult_model(model, MinMaxScaler()))


In [7]:
model = get_models()

In [8]:
eval(next(model))

LogisticRegression:

	Without scaler:

		Simple:
			accuracy: 0.5232774992728455
			f1-score: 0.37330610559065647

		One vs Rest:
			accuracy: 0.5405352997872717
			f1-score: 0.40465694973590627

		One vs One:
			accuracy: 0.5370677708008965
			f1-score: 0.41001046610282216


	Standard scaler:

		Simple:
			accuracy: 0.7027221243177579
			f1-score: 0.6567714116739231

		One vs Rest:
			accuracy: 0.6941958811686941
			f1-score: 0.6462323574156875

		One vs One:
			accuracy: 0.7129650224419845
			f1-score: 0.6683457212775444


	Min-Max scaler:

		Simple:
			accuracy: 0.6902835047136722
			f1-score: 0.6418273424229081

		One vs Rest:
			accuracy: 0.6829093024449501
			f1-score: 0.6296102787253843

		One vs One:
			accuracy: 0.6891827923874051
			f1-score: 0.6383842569234061



In [9]:
eval(next(model))

RidgeClassifier:

	Without scaler:

		Simple:
			accuracy: 0.6596232484130922
			f1-score: 0.5713338003761708

		One vs Rest:
			accuracy: 0.6596232484130922
			f1-score: 0.5713338003761708

		One vs One:
			accuracy: 0.656167125772067
			f1-score: 0.5823840304834185


	Standard scaler:

		Simple:
			accuracy: 0.6596289515857672
			f1-score: 0.5713023912399733

		One vs Rest:
			accuracy: 0.6596289515857672
			f1-score: 0.5713023912399733

		One vs One:
			accuracy: 0.6558306385842444
			f1-score: 0.5815029649506283


	Min-Max scaler:

		Simple:
			accuracy: 0.6595434039956428
			f1-score: 0.5709313116526349

		One vs Rest:
			accuracy: 0.6595434039956428
			f1-score: 0.5709313116526349

		One vs One:
			accuracy: 0.6540284360189573
			f1-score: 0.5795655988110371



In [10]:
eval(next(model))

SGDClassifier:

	Without scaler:

		Simple:
			accuracy: 0.21474156073023423
			f1-score: 0.23340147154798377

		One vs Rest:
			accuracy: 0.5273780804261411
			f1-score: 0.4556160206545639

		One vs One:
			accuracy: 0.5943105149394609
			f1-score: 0.5100778939167135


	Standard scaler:

		Simple:
			accuracy: 0.6897759223456008
			f1-score: 0.6394050677758928

		One vs Rest:
			accuracy: 0.6851392429608592
			f1-score: 0.63546691327503

		One vs One:
			accuracy: 0.7108548485522496
			f1-score: 0.648869382164752


	Min-Max scaler:

		Simple:
			accuracy: 0.6632561694070411
			f1-score: 0.593006860247984

		One vs Rest:
			accuracy: 0.6590130089368715
			f1-score: 0.589716709835899

		One vs One:
			accuracy: 0.679572946430099
			f1-score: 0.629570842138672



In [11]:
eval(next(model))

PassiveAggressiveClassifier:

	Without scaler:

		Simple:
			accuracy: 0.45734311997764354
			f1-score: 0.37158653972038796

		One vs Rest:
			accuracy: 0.287525450408062
			f1-score: 0.21542645796629628

		One vs One:
			accuracy: 0.5449723681283898
			f1-score: 0.4531404936962981


	Standard scaler:

		Simple:
			accuracy: 0.6924221944667819
			f1-score: 0.6602920650274705

		One vs Rest:
			accuracy: 0.7090982713683622
			f1-score: 0.6998525378940295

		One vs One:
			accuracy: 0.6618303762383013
			f1-score: 0.6138613122501565


	Min-Max scaler:

		Simple:
			accuracy: 0.6943213509675432
			f1-score: 0.6225487394935151

		One vs Rest:
			accuracy: 0.6888291956815576
			f1-score: 0.6348074877222407

		One vs One:
			accuracy: 0.7048322982074928
			f1-score: 0.67770991729936

