### Preprocess logs

In [6]:
from sklearn import preprocessing
import numpy
import pandas as pd

def log_csv_to_dataset(csv_file_path, memory_reduction):
    data_df = pd.read_csv(csv_file_path, quotechar="'")

    slice_start_col = 0
    slice_end_col = len(data_df.columns)-1

    x = data_df.iloc[:, slice_start_col:slice_end_col]
    y = data_df.iloc[:, slice_end_col:]

    if memory_reduction:
        enc = preprocessing.OrdinalEncoder(dtype=numpy.int8)
        x = enc.fit_transform(x)

    data = (x, y)
    return data


bpic17_logs_with_interventions_path = './data_bpic17_readyToUse_preprocessed_for_adaptation_classification.csv'
bpic17_logs_with_interventions = log_csv_to_dataset(bpic17_logs_with_interventions_path, memory_reduction=True)

In [4]:
%matplotlib inline


### Classifier comparison

A comparison of a several classifiers in scikit-learn 


In [5]:
# Code source: Gaël Varoquaux
#              Andreas Müller
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.inspection import DecisionBoundaryDisplay

from sklearn import tree
from sklearn.tree import export_text
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score

names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    # "Gaussian Process",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    # GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=6),
    RandomForestClassifier(max_depth=6, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1, max_iter=1000),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]

datasets = [
    bpic17_logs_with_interventions,
]

# iterate over datasets
for ds_cnt, ds in enumerate(datasets):
    # preprocess dataset, normalize features
    X, y = ds
    X = StandardScaler().fit_transform(X)

    # iterate over classifiers
    for name, clf in zip(names, classifiers):

        print('-----------------------------------------------------------')
        print('Classifier:', name)

        scores = cross_val_score(clf, X, y.values.ravel(), cv=10, scoring='f1_macro')
        print('10-fold cross-validation mean F1:', numpy.mean(scores))


-----------------------------------------------------------
Classifier: Nearest Neighbors
10-fold cross-validation mean F1: 0.6143051147026661
-----------------------------------------------------------
Classifier: Linear SVM
10-fold cross-validation mean F1: 0.5823711777082041
-----------------------------------------------------------
Classifier: RBF SVM
10-fold cross-validation mean F1: 0.36084627707076866
-----------------------------------------------------------
Classifier: Decision Tree
10-fold cross-validation mean F1: 0.7655936068453801
-----------------------------------------------------------
Classifier: Random Forest
10-fold cross-validation mean F1: 0.6716211525453585
-----------------------------------------------------------
Classifier: Neural Net
10-fold cross-validation mean F1: 0.722852116498167
-----------------------------------------------------------
Classifier: AdaBoost
10-fold cross-validation mean F1: 0.7090081514073595
----------------------------------------