In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
from numpy import mean, isnan
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, PassiveAggressiveClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB, BernoulliNB
from sklearn.ensemble import (AdaBoostClassifier, BaggingClassifier,
                              RandomForestClassifier, ExtraTreesClassifier,
                              GradientBoostingClassifier)
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler


# Inherent multiclass
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
from sklearn.semi_supervised import LabelPropagation, LabelSpreading
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier


In [None]:
def get_training_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_training-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

def get_testing_dataset(binary=False):
    df = pd.read_csv('/content/drive/MyDrive/DSTTProject/data/UNSW_NB15_testing-set_cleaned.csv')
    labels = df.iloc[:, -1] if binary else df.iloc[:, -2]
    return df.iloc[:, 0:-2], labels

In [None]:
def get_models():
    models = (RidgeClassifier(), PassiveAggressiveClassifier(n_jobs=-1),
              KNeighborsClassifier(n_jobs=-1), DecisionTreeClassifier(),
              ExtraTreeClassifier(), GaussianNB(),
              MultinomialNB(), ComplementNB(), BernoulliNB(),
              AdaBoostClassifier(), BaggingClassifier(n_jobs=-1),
              RandomForestClassifier(n_jobs=-1), ExtraTreesClassifier(n_jobs=-1),
              GradientBoostingClassifier(), LinearDiscriminantAnalysis(),
              QuadraticDiscriminantAnalysis(), SGDClassifier(n_jobs=-1), SVC()
            )
              # GaussianProcessClassifier(n_jobs=-1)
    for model in models:
      yield model


def test_bin_model(model):
    X_train, y_train = get_training_dataset(binary=True)
    X_test, y_test = get_testing_dataset(binary=True)

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    return accuracy_score(y_test, y_preds), f1_score(y_test, y_preds)




In [None]:
# cv = KFold(n_splits=10, random_state=1, shuffle=True)
# for model in get_models():
#     print(f"{type(model).__name__}:")

#     mean_score, _, _ = evaluate_model(model, cv, binary=True)
#     accuracy, f1 = test_model(model, binary=True)
#     print("\tBinary scores:")
#     if not isnan(mean_score):
#       print(f"\t\tcross_val_score: {mean_score}")
#     if not isnan(accuracy):
#       print(f"\t\taccuracy: {accuracy}")
#     if not isnan(f1):
#       print(f"\t\tf1-score: {f1}")

#     mean_score, _, _ = evaluate_model(model, cv, binary=False)
#     accuracy, f1 = test_model(model, binary=False)
#     print("\tMulticlass scores:")
#     if not isnan(mean_score):
#       print(f"\t\tcross_val_score: {mean_score}")
#     if not isnan(accuracy):
#       print(f"\t\taccuracy: {accuracy}")
#     if not isnan(f1):
#       print(f"\t\tf1-score: {f1}")

In [None]:
def get_multiclass_models():
    models = (DecisionTreeClassifier(), ExtraTreeClassifier(),
              LabelPropagation(), LabelSpreading(), KNeighborsClassifier(),
              LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis(), MLPClassifier(), RandomForestClassifier(),)

    for model in models:
      yield model

def test_mult_model(model):
    X_train, y_train = get_training_dataset()
    X_test, y_test = get_testing_dataset()

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)

    clf = OneVsRestClassifier(model, n_jobs=-1).fit(X_train, y_train)
    ovr_y_preds = clf.predict(X_test)

    clf = OneVsOneClassifier(model, n_jobs=-1).fit(X_train, y_train)
    ovo_y_preds = clf.predict(X_test)

    return (accuracy_score(y_test, y_preds), f1_score(y_test, y_preds, average='weighted'),
    accuracy_score(y_test, ovr_y_preds), f1_score(y_test, ovr_y_preds, average='weighted'),
    accuracy_score(y_test, ovo_y_preds), f1_score(y_test, ovo_y_preds, average='weighted'))


In [None]:
def eval(model):
    print(f"{type(model).__name__}:")

    accuracy, f1, ovr_accuracy, ovr_f1, ovo_accuracy, ovo_f1 = test_mult_model(model)

    print("")

    print(f"\tSimple:")
    if not isnan(accuracy):
      print(f"\t\taccuracy: {accuracy}")
    if not isnan(f1):
      print(f"\t\tf1-score: {f1}")

    print("")

    print(f"\tOne vs Rest:")
    if not isnan(ovr_accuracy):
      print(f"\t\taccuracy: {ovr_accuracy}")
    if not isnan(ovr_f1):
      print(f"\t\tf1-score: {ovr_f1}")

    print("")

    print(f"\tOne vs One:")
    if not isnan(ovo_accuracy):
      print(f"\t\taccuracy: {ovo_accuracy}")
    if not isnan(ovo_f1):
      print(f"\t\tf1-score: {ovo_f1}")

    print("")

model_gen = get_multiclass_models()

In [None]:
eval(next(model_gen))

DecisionTreeClassifier:

	Simple:
		accuracy: 0.737420226872209
		f1-score: 0.70780279438551

	One vs Rest:
		accuracy: 0.7217022829800218
		f1-score: 0.6943042190439725

	One vs One:
		accuracy: 0.7483132866813809
		f1-score: 0.7235791215132502



In [None]:
eval(next(model_gen))

ExtraTreeClassifier:

	Simple:
		accuracy: 0.733935588367809
		f1-score: 0.7096248099753794

	One vs Rest:
		accuracy: 0.7084252969927171
		f1-score: 0.6786842734795713

	One vs One:
		accuracy: 0.7451081036380538
		f1-score: 0.7249583637777306



In [None]:
eval(next(model_gen))

LabelPropagation:


In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))

In [None]:
eval(next(model_gen))