## Classifiers

This notebook holds a Dummy Classifier, kNN Classifier, Naive Bayes Classifier, and Decision Tree Classifier using k-fold cross validation with (k = 10) with the Stress Detection Dataset

In [11]:
import importlib
from tabulate import tabulate

import mysklearn.myutils
importlib.reload(mysklearn.myutils)
import mysklearn.myutils as myutils

import mysklearn.mypytable
importlib.reload(mysklearn.mypytable)
from mysklearn.mypytable import MyPyTable 

import mysklearn.myclassifiers
importlib.reload(mysklearn.myclassifiers)
from mysklearn.myclassifiers import MyKNeighborsClassifier, MyDummyClassifier, MyNaiveBayesClassifier, MyDecisionTreeClassifier

import mysklearn.myevaluation
importlib.reload(mysklearn.myevaluation)
import mysklearn.myevaluation as myevaluation

stress_data = MyPyTable()
stress_data.load_from_file("cleaned_data.csv")

<mysklearn.mypytable.MyPyTable at 0x7f5db8f35850>

#### Test Classifier Accuracy Using:
1. Accuracy and error rate
2. Precision, recall, and F1 measure

In [12]:
X = [
    [
        row[15], # screen_on_time
        row[10], # sleep_duration
        row[9],  # wake_time
        row[8]   # sleep_time
    ]
    for row in stress_data.data
]
y = stress_data.get_column("PSS_score")
header = ["screen_on_time", "sleep_duration", "wake_time", "sleep_time"]
header_map = ["att0", "att1", "att2", "att3"]

knn_avg_acc, knn_error_rate, nb_avg_acc, nb_error_rate, knn_y_actual, knn_y_pred, nb_y_actual, nb_y_pred, knn_binary_ps, nb_binary_ps, knn_recall, nb_recall, knn_f1, nb_f1, dummy_avg_acc, dummy_error_rate, dummy_binary_ps, dummy_recall, dummy_f1, dummy_y_actual, dummy_y_pred = myutils.knn_nb_classifiers(X, y)
tree_avg_acc, tree_error_rate, tree_binary_ps, tree_recall, tree_f1, tree_y_actual, tree_y_pred = myutils.tree_classifier(X, y, header_map)

print(f"10-Fold Cross Validation")
print("________________________")
print()
print(f"Naive Bayes Classifier: accuracy = {nb_avg_acc:.2f}, error rate = {nb_error_rate:.2f}, precision = {nb_binary_ps:.2f}, recall = {nb_recall:.2f}, F1 = {nb_f1:.2f},")
print()
print(f"k Nearest Neighbors Classifier: accuracy = {knn_avg_acc:.2f}, error rate = {knn_error_rate:.2f}, precision = {knn_binary_ps:.2f}, recall = {knn_recall:.2f}, F1 = {knn_f1:.2f},")
print()
print(f"Dummy Classifier: accuracy = {dummy_avg_acc:.2f}, error rate = {dummy_error_rate:.2f}, precision = {dummy_binary_ps:.2f}, recall = {dummy_recall:.2f}, F1 = {dummy_f1:.2f},")
print()
print(f"Decision Tree Classifier: accuracy = {tree_avg_acc:.2f}, error rate = {tree_error_rate:.2f}, precision = {tree_binary_ps:.2f}, recall = {tree_recall:.2f}, F1 = {tree_f1:.2f},")

10-Fold Cross Validation
________________________

Naive Bayes Classifier: accuracy = 0.00, error rate = 0.00, precision = 0.00, recall = 0.00, F1 = 0.00,

k Nearest Neighbors Classifier: accuracy = 0.45, error rate = 0.55, precision = 0.00, recall = 0.00, F1 = 0.00,

Dummy Classifier: accuracy = 0.45, error rate = 0.55, precision = 0.00, recall = 0.00, F1 = 0.00,

Decision Tree Classifier: accuracy = 0.00, error rate = 1.00, precision = 0.00, recall = 0.00, F1 = 0.00,


## Confusion Matrices

In [13]:
print("============================================================")
print("STEP 4: Confusion Matrices")
print("============================================================")
print()
labels = sorted(set(knn_y_actual) | set(knn_y_pred))
labels_strings = list(map(str, labels))
kNN_matrix = myevaluation.confusion_matrix(knn_y_actual, knn_y_pred, labels)
print("kNN Classifier (10-fold Cross Validation Confusion Matrix)")
print()
print("PSS_score")
print(tabulate(kNN_matrix, headers = labels_strings, showindex = labels_strings))
print()
print("------------------------------------------------------------")
print()
print("Dummy Classifier (10-fold Cross Validation Confusion Matrix)")
print()
print("PSS_score")
dummy_matrix = myevaluation.confusion_matrix(dummy_y_actual, dummy_y_pred, labels)
print(tabulate(dummy_matrix, headers = labels_strings, showindex = labels_strings))
print("------------------------------------------------------------")
print()
print("Naive Bayes Classifier (10-fold Cross Validation Confusion Matrix)")
print()
print("PSS_score")
labels = sorted(set(nb_y_actual) | set(nb_y_pred))
labels_strings = list(map(str, labels))
nb_matrix = myevaluation.confusion_matrix(nb_y_actual, nb_y_pred, labels)
print(tabulate(nb_matrix, headers = labels_strings, showindex = labels_strings))
print("------------------------------------------------------------")
print("Decision Tree Classifier (10-fold Cross Validation Confusion Matrix)")
print()
print("PSS_score")
labels = sorted(set(tree_y_actual) | set(tree_y_pred))
labels_strings = list(map(str, labels))
tree_matrix = myevaluation.confusion_matrix(tree_y_actual, tree_y_pred, labels)
print(tabulate(tree_matrix, headers = labels_strings, showindex = labels_strings))
print("------------------------------------------------------------")

STEP 4: Confusion Matrices

kNN Classifier (10-fold Cross Validation Confusion Matrix)

PSS_score
            high    low    moderate
--------  ------  -----  ----------
high           0      0        1280
low            0      0         378
moderate       0      0        1342

------------------------------------------------------------

Dummy Classifier (10-fold Cross Validation Confusion Matrix)

PSS_score
            high    low    moderate
--------  ------  -----  ----------
high           0      0        1280
low            0      0         378
moderate       0      0        1342
------------------------------------------------------------

Naive Bayes Classifier (10-fold Cross Validation Confusion Matrix)

PSS_score

------------------------------------------------------------
Decision Tree Classifier (10-fold Cross Validation Confusion Matrix)

PSS_score


TypeError: '<' not supported between instances of 'NoneType' and 'str'