In [1]:
import pandas as pd

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline

In [2]:
# Preparation

df = pd.read_csv('dataset.csv')
X = df.drop(columns=['Activity'])
y = df['Activity']

cv = StratifiedKFold(n_splits=10)

metrics = ['f1_weighted', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'accuracy']

## KNN

In [3]:
# KNN classifiers with odd K in range of (1,19)
# search for best K
for i in range(10):
    k = i*2+1

    steps = [StandardScaler(), SMOTE(random_state=42), KNeighborsClassifier(n_neighbors=k, n_jobs=-1)]
    imba_pipeline = make_pipeline(*steps)

    result = cross_validate(imba_pipeline, X, y, cv=10, scoring=metrics, n_jobs=-1)

    print("k = " + str(k) + ":")

    print("\tf1:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_f1_weighted').mean()*100,
                std = result.get('test_f1_weighted').std()))

    print("\tROC AUC OneVsRest:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
                std = result.get('test_roc_auc_ovr_weighted').std()))

    print("\tROC AUC OneVsOne:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
                std = result.get('test_roc_auc_ovo_weighted').std()))

    print("\tbalanced accuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_balanced_accuracy').mean()*100,
                std = result.get('test_balanced_accuracy').std()))

    print("\taccuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
          .format(mean = result.get('test_accuracy').mean()*100,
                  std = result.get('test_accuracy').std()))

# spoiler it's: 15

k = 1:
	f1:
		mean = 86.31%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 91.98%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 91.10%;
		std = 0.01;
	balanced accuracy:
		mean = 81.19%;
		std = 0.04;
	accuracy:
		mean = 86.27%;
		std = 0.02;
k = 3:
	f1:
		mean = 87.45%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 96.32%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 94.89%;
		std = 0.01;
	balanced accuracy:
		mean = 81.55%;
		std = 0.04;
	accuracy:
		mean = 87.41%;
		std = 0.02;
k = 5:
	f1:
		mean = 88.34%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 97.54%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 96.04%;
		std = 0.01;
	balanced accuracy:
		mean = 82.30%;
		std = 0.03;
	accuracy:
		mean = 88.28%;
		std = 0.02;
k = 7:
	f1:
		mean = 88.71%;
		std = 0.02;
	ROC AUC OneVsRest:
		mean = 98.02%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 96.60%;
		std = 0.01;
	balanced accuracy:
		mean = 82.43%;
		std = 0.04;
	accuracy:
		mean = 88.62%;
		std = 0.02;
k = 9:
	f1:
		mean = 88.98%;
		std = 0.02;
	ROC AUC 

## Report KNN

In [7]:
X_norm = StandardScaler().fit_transform(X.copy())

X_train, X_test, y_train, y_test = train_test_split(X_norm, y, stratify=y, train_size=0.7, random_state=42)

X_train, y_train = SMOTE(n_jobs=-1, random_state=42).fit_sample(X_train, y_train)

knn = KNeighborsClassifier(n_neighbors=15, n_jobs=-1)

knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)

                    precision    recall  f1-score   support

            LAYING       0.99      0.98      0.99       587
        LIE_TO_SIT       0.56      0.58      0.57        26
      LIE_TO_STAND       0.48      0.44      0.46        25
           SITTING       0.84      0.92      0.88       540
        SIT_TO_LIE       0.68      0.81      0.74        32
      SIT_TO_STAND       0.48      1.00      0.65        10
          STANDING       0.94      0.84      0.89       594
      STAND_TO_LIE       0.71      0.71      0.71        42
      STAND_TO_SIT       0.82      0.86      0.84        21
           WALKING       0.99      0.99      0.99       517
WALKING_DOWNSTAIRS       0.99      0.99      0.99       422
  WALKING_UPSTAIRS       0.99      0.99      0.99       463

          accuracy                           0.94      3279
         macro avg       0.79      0.84      0.81      3279
      weighted avg       0.94      0.94      0.94      3279



## Decision Tree

In [4]:
for i in range(1, 17):
    steps = [StandardScaler(), SMOTE(random_state=42), DecisionTreeClassifier(random_state=42, max_depth=i)]

    imba_pipeline = make_pipeline(*steps)

    result = cross_validate(imba_pipeline, X, y, cv=10, scoring=metrics, n_jobs=-1)

    print("Depth = " + str(i) + ":")

    print("\tf1:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_f1_weighted').mean()*100,
                std = result.get('test_f1_weighted').std()))

    print("\tROC AUC OneVsRest:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovr_weighted').mean()*100,
                std = result.get('test_roc_auc_ovr_weighted').std()))

    print("\tROC AUC OneVsOne:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_roc_auc_ovo_weighted').mean()*100,
                std = result.get('test_roc_auc_ovo_weighted').std()))

    print("\tbalanced accuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
        .format(mean = result.get('test_balanced_accuracy').mean()*100,
                std = result.get('test_balanced_accuracy').std()))

    print("\taccuracy:")
    print("\t\tmean = {mean:.2f}%;\n\t\tstd = {std:.2f};"
          .format(mean = result.get('test_accuracy').mean()*100,
                  std = result.get('test_accuracy').std()))

# spoiler it's 11

Depth = 1:
	f1:
		mean = 9.29%;
		std = 0.00;
	ROC AUC OneVsRest:
		mean = 78.66%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 77.13%;
		std = 0.00;
	balanced accuracy:
		mean = 16.34%;
		std = 0.01;
	accuracy:
		mean = 18.62%;
		std = 0.00;
Depth = 2:
	f1:
		mean = 30.52%;
		std = 0.00;
	ROC AUC OneVsRest:
		mean = 86.81%;
		std = 0.00;
	ROC AUC OneVsOne:
		mean = 84.59%;
		std = 0.01;
	balanced accuracy:
		mean = 32.48%;
		std = 0.01;
	accuracy:
		mean = 36.98%;
		std = 0.01;
Depth = 3:
	f1:
		mean = 55.05%;
		std = 0.04;
	ROC AUC OneVsRest:
		mean = 91.12%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 93.50%;
		std = 0.01;
	balanced accuracy:
		mean = 45.93%;
		std = 0.03;
	accuracy:
		mean = 62.12%;
		std = 0.04;
Depth = 4:
	f1:
		mean = 68.80%;
		std = 0.03;
	ROC AUC OneVsRest:
		mean = 94.31%;
		std = 0.01;
	ROC AUC OneVsOne:
		mean = 95.43%;
		std = 0.01;
	balanced accuracy:
		mean = 59.25%;
		std = 0.04;
	accuracy:
		mean = 73.40%;
		std = 0.03;
Depth = 5:
	f1:
		mean = 81.77%;
		st

## Report Decision Tree

In [6]:
X_norm = StandardScaler().fit_transform(X.copy())

X_train, X_test, y_train, y_test = train_test_split(X_norm, y, stratify=y, train_size=0.7, random_state=42)

X_train, y_train = SMOTE(n_jobs=-1, random_state=42).fit_sample(X_train, y_train)

dtree = DecisionTreeClassifier(random_state=42, max_depth=11)

dtree.fit(X_train, y_train)
y_pred = dtree.predict(X_test)
report = classification_report(y_test, y_pred)
print(report)



                    precision    recall  f1-score   support

            LAYING       0.99      0.99      0.99       587
        LIE_TO_SIT       0.67      0.62      0.64        26
      LIE_TO_STAND       0.56      0.56      0.56        25
           SITTING       0.89      0.88      0.89       540
        SIT_TO_LIE       0.52      0.78      0.62        32
      SIT_TO_STAND       0.43      0.60      0.50        10
          STANDING       0.91      0.90      0.91       594
      STAND_TO_LIE       0.60      0.62      0.61        42
      STAND_TO_SIT       0.79      0.52      0.63        21
           WALKING       0.96      0.92      0.94       517
WALKING_DOWNSTAIRS       0.92      0.91      0.92       422
  WALKING_UPSTAIRS       0.87      0.92      0.90       463

          accuracy                           0.91      3279
         macro avg       0.76      0.77      0.76      3279
      weighted avg       0.91      0.91      0.91      3279

