In [9]:
import pandas as pd

from sklearn.metrics import classification_report
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv('dataset.csv')
X = df.drop(columns=['Activity'])
X = StandardScaler().fit_transform(X.copy())
y = df['Activity']


# balanced_accuracy = (recall + specificity) / 2
# roc_auc_ovr = ROC AUC OneVsRest (sensitive to class imbalance)
# roc_auc_ovo = ROC AUC OneVsOne (insensitive to class imbalance)
metrics = ['f1_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'accuracy']

## KNN

In [3]:
knn_results = []

# KNN classifiers with odd K in range of (1,19)
# search for best K
for i in range(10):
    k = i*2+1
    knnClass = KNeighborsClassifier(n_neighbors=k, n_jobs=-1)
    result = cross_validate(knnClass, X, y, cv=10, scoring=metrics,
                            return_train_score=True, n_jobs=-1)
    knn_results.append(result)
    print("k = " + str(k) + ":")

    print("\tf1:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_f1_weighted').mean()*100,
                std = result.get('test_f1_weighted').std()))

    print("\tROC AUC OneVsRest:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
                std = result.get('test_roc_auc_ovr_weighted').std()))

    print("\tROC AUC OneVsOne:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
                std = result.get('test_roc_auc_ovo_weighted').std()))

    print("\tbalanced accuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_balanced_accuracy').mean()*100,
                std = result.get('test_balanced_accuracy').std()))

    print("\taccuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
          .format(mean = result.get('test_accuracy').mean()*100,
                  std = result.get('test_accuracy').std()))

# spoiler it's: 15 for F1

k = 1:
	f1:
		mean = 86.02%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 91.85%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 90.20%;
		std = 0.01;
	balanced accuracy:
		mean = 78.38%;
		std = 0.03;
	accuracy:
		mean = 86.10%;
		std = 0.02;
k = 3:
	f1:
		mean = 87.77%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 96.99%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 96.22%;
		std = 0.01;
	balanced accuracy:
		mean = 80.22%;
		std = 0.02;
	accuracy:
		mean = 87.88%;
		std = 0.02;
k = 5:
	f1:
		mean = 88.62%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 98.20%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 97.82%;
		std = 0.01;
	balanced accuracy:
		mean = 80.35%;
		std = 0.02;
	accuracy:
		mean = 88.73%;
		std = 0.02;
k = 7:
	f1:
		mean = 89.13%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 98.59%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 98.34%;
		std = 0.01;
	balanced accuracy:
		mean = 80.50%;
		std = 0.03;
	accuracy:
		mean = 89.24%;
		std = 0.02;
k = 9:
	f1:
		mean = 88.99%;
		std = 0.02;
	ROC AUC 

## KNN report

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)
knn = KNeighborsClassifier(n_neighbors=15, n_jobs=-1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)
# skf = StratifiedKFold(n_splits=10)
# for train_index, test_index in skf.split(X, y):
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]
#     knn = KNeighborsClassifier(n_neighbors=7, n_jobs=-1)
#     knn.fit(X_train, y_train)
#     y_pred = knn.predict(X_test)
#     report = classification_report(y_test, y_pred)
#     print(report)

                    precision    recall  f1-score   support

            LAYING       0.99      0.99      0.99       587
        LIE_TO_SIT       0.66      0.81      0.72        26
      LIE_TO_STAND       0.79      0.44      0.56        25
           SITTING       0.91      0.87      0.89       540
        SIT_TO_LIE       0.60      0.78      0.68        32
      SIT_TO_STAND       0.71      1.00      0.83        10
          STANDING       0.90      0.92      0.91       594
      STAND_TO_LIE       0.88      0.55      0.68        42
      STAND_TO_SIT       0.71      0.24      0.36        21
           WALKING       0.98      1.00      0.99       517
WALKING_DOWNSTAIRS       0.99      0.96      0.98       422
  WALKING_UPSTAIRS       0.93      0.99      0.96       463

          accuracy                           0.94      3279
         macro avg       0.84      0.80      0.80      3279
      weighted avg       0.94      0.94      0.94      3279



## Decision Tree

In [6]:
tree_results = []

# Search for the best tree depth
for i in range(1, 17):
    curr = DecisionTreeClassifier(random_state=42, max_depth=i)
    result = cross_validate(curr, X, y, cv=10, scoring=metrics,
                            return_train_score=True, n_jobs=-1)
    tree_results.append(result)
    print("Depth = " + str(i) + ":")

    print("\tf1:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_f1_weighted').mean()*100,
                std = result.get('test_f1_weighted').std()))

    print("\tROC AUC OneVsRest:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
                std = result.get('test_roc_auc_ovr_weighted').std()))

    print("\tROC AUC OneVsOne:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
                std = result.get('test_roc_auc_ovo_weighted').std()))

    print("\tbalanced accuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_balanced_accuracy').mean()*100,
                std = result.get('test_balanced_accuracy').std()))

    print("\taccuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
          .format(mean = result.get('test_accuracy').mean()*100,
                  std = result.get('test_accuracy').std()))

# spoiler it's 10

Depth = 1:
	f1:
		mean = 24.16%;
		std = 0.00;
	ROC AUC OneVsRest:
		mean = 67.26%;
		std = 0.00;
	ROC AUC OneVsOne:
		mean = 62.27%;
		std = 0.00;
	balanced accuracy:
		mean = 16.53%;
		std = 0.00;
	accuracy:
		mean = 35.72%;
		std = 0.00;
Depth = 2:
	f1:
		mean = 38.03%;
		std = 0.00;
	ROC AUC OneVsRest:
		mean = 86.65%;
		std = 0.00;
	ROC AUC OneVsOne:
		mean = 80.71%;
		std = 0.01;
	balanced accuracy:
		mean = 25.44%;
		std = 0.01;
	accuracy:
		mean = 51.50%;
		std = 0.00;
Depth = 3:
	f1:
		mean = 66.63%;
		std = 0.04;
	ROC AUC OneVsRest:
		mean = 92.91%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 86.31%;
		std = 0.01;
	balanced accuracy:
		mean = 37.48%;
		std = 0.02;
	accuracy:
		mean = 72.28%;
		std = 0.03;
Depth = 4:
	f1:
		mean = 78.11%;
		std = 0.03;
	ROC AUC OneVsRest:
		mean = 95.26%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 91.26%;
		std = 0.01;
	balanced accuracy:
		mean = 42.10%;
		std = 0.02;
	accuracy:
		mean = 79.82%;
		std = 0.03;
Depth = 5:
	f1:
		mean = 79.21%;
		s

## Decision Tree Report

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.7, random_state=42)
dtree = DecisionTreeClassifier(random_state=42, max_depth=10)
dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)
# skf = StratifiedKFold(n_splits=10)
# for train_index, test_index in skf.split(X, y):
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]
#     dtree = DecisionTreeClassifier(random_state=42, max_depth=10)
#     dtree.fit(X_train, y_train)
#     y_pred = dtree.predict(X_test)
#     report = classification_report(y_test, y_pred)
#     print(report)

                    precision    recall  f1-score   support

            LAYING       0.98      0.99      0.98       587
        LIE_TO_SIT       0.62      0.58      0.60        26
      LIE_TO_STAND       0.61      0.56      0.58        25
           SITTING       0.89      0.90      0.90       540
        SIT_TO_LIE       0.57      0.75      0.65        32
      SIT_TO_STAND       0.30      0.30      0.30        10
          STANDING       0.91      0.91      0.91       594
      STAND_TO_LIE       0.71      0.48      0.57        42
      STAND_TO_SIT       0.64      0.33      0.44        21
           WALKING       0.93      0.92      0.93       517
WALKING_DOWNSTAIRS       0.91      0.93      0.92       422
  WALKING_UPSTAIRS       0.90      0.91      0.91       463

          accuracy                           0.91      3279
         macro avg       0.75      0.71      0.72      3279
      weighted avg       0.91      0.91      0.91      3279

