In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
import pickle

In [None]:
file_list = ['S006.csv', 'S008.csv', 'S009.csv', 'S010.csv', 'S012.csv', 'S013.csv', 'S014.csv', 'S015_fix.csv',
             'S016.csv',
             'S017.csv', 'S018.csv', 'S019.csv', 'S020.csv', 'S021_fix.csv', 'S022.csv', 'S023_fix.csv', 'S024.csv',
             'S025.csv',
             'S026.csv', 'S027.csv', 'S028.csv', 'S029.csv', ]

In [None]:
# S006.csv        [00]
# S008.csv        [01]
# S009.csv        [02]
# S010.csv        [03]
# S012.csv        [04]
# S013.csv        [05]
# S014.csv        [06]
# S015_fix.csv    [07]
# S016.csv        [08]
# S017.csv        [09]
# S018.csv        [10]
# S019.csv        [11]
# S020.csv        [12]
# S021_fix.csv    [13]
# S022.csv        [14]
# S023_fix.csv    [15]
# S024.csv        [16]
# S025.csv        [17]
# S026.csv        [18]
# S027.csv        [19]
# S028.csv        [20]
# S029.csv        [21]

In [None]:
def get_classifier(option):
    if option == 1:
        classifier = MLPClassifier(max_iter=500)
    elif option == 2:
        classifier = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=42)
    elif option == 3:
        classifier = GaussianNB()
    else:
        raise ValueError('Invalid option')

    return classifier

In [None]:
# 1: Neural Network
# 2: Random Forest  
# 3: Bayesian Network

In [None]:
models_train_acc = []
models_test_acc = []
models_precisions = []
models_recalls = []
models_f1s = []

for number in range(1, 4):
    option = number
    train_acc = []
    test_acc = []
    precisions = []
    recalls = []
    f1s = []
    for i, file in enumerate(file_list):
        df = pd.read_csv(os.path.join('harth/', file))
        df = df.drop('timestamp', axis = 1)
        
        X = df.drop(['label'], axis = 1)
        Y = df['label']
        
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
        
        clf = get_classifier(option)
        clf = clf.fit(X_train, Y_train)
        predictions = clf.predict(X_test)
        
        print(f"Classifier {option} yields training accuracy for file {file} of {clf.score(X_train,Y_train)} with a testing accuracy of {accuracy_score(Y_test, predictions)}")
        
        train_acc.append(clf.score(X_train,Y_train))
        test_acc.append(accuracy_score(Y_test, predictions))
        precisions.append(precision_score(Y_test, predictions, average='macro'))
        recalls.append(recall_score(Y_test, predictions, average='macro'))
        f1s.append(f1_score(Y_test, predictions, average='macro'))
        
    models_train_acc.append(np.mean(train_acc))
    models_test_acc.append(np.mean(test_acc))
    models_precisions.append(np.mean(precisions))
    models_recalls.append(np.mean(recalls))
    models_f1s.append(np.mean(f1s))

In [None]:
with open('metrics.pickle', 'wb') as file:
    pickle.dump([models_train_acc, models_test_acc, models_precisions, models_recalls, models_f1s], file)
    
# with open('metrics.pickle', 'rb') as file:
#     models_train_acc, models_test_acc, models_precisions, models_recalls, models_f1s = pickle.load(file)

In [None]:
fig = plt.figure("Classification Results")
x_axis = np.arange(len(models_train_acc))
plt.bar(x_axis-0.2, models_train_acc, 0.4, label = "Train set")
plt.bar(x_axis+0.2, models_test_acc, 0.4, label = 'Test Set')
plt.xticks(x_axis)
plt.xlabel("Models")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

In [None]:
fig = plt.figure("Classification Results")
x_axis = np.arange(len(models_train_acc))
plt.bar(x_axis-0.4, models_test_acc, 0.2, label = "Accuracy")
plt.bar(x_axis-0.2, models_precisions, 0.2, label = 'Precision')
plt.bar(x_axis, models_recalls, 0.2, label = 'Recall')
plt.bar(x_axis+0.2, models_f1s, 0.2, label = 'F1')
plt.xticks(x_axis)
plt.xlabel("Models")
plt.ylabel("Accuracy")
plt.legend()
plt.show()