In [1]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as tts

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report, roc_auc_score

In [2]:
rand = 0xC0FFEE
data = pd.read_csv('combined_data.csv')

# first column is time
X = data[data.columns[1:-1]]
y = data['activity']

In [3]:
X.head()

Unnamed: 0,acc_Acceleration x (m/s^2),acc_Acceleration y (m/s^2),acc_Acceleration z (m/s^2),gyr_Gyroscope x (rad/s),gyr_Gyroscope y (rad/s),gyr_Gyroscope z (rad/s),lin_acc_Linear Acceleration x (m/s^2),lin_acc_Linear Acceleration y (m/s^2),lin_acc_Linear Acceleration z (m/s^2)
0,-1.457379,3.879065,8.795268,-0.13011,-0.904436,0.261214,-0.185778,0.186615,0.108793
1,-0.298654,4.020241,9.806834,-0.29733,-0.67804,0.269078,0.074469,0.23948,0.572785
2,-0.338285,3.731756,9.367452,-0.056045,-0.13599,0.028786,-0.010581,-0.000861,0.067117
3,-0.450748,3.181258,8.893374,-0.074142,-0.201961,0.201121,-0.176372,-0.652862,0.020059
4,1.228714,1.318445,8.843274,-0.291527,-1.689527,0.561903,-0.12887,-1.563116,-0.194413


In [4]:
X_train, X_test, y_train, y_test = tts(X, y, random_state=rand, stratify=y)

In [5]:
len(X_train)/len(X)

0.7499884542557613

In [6]:
bayes = GaussianNB()
bayes.fit(X_train, y_train)
pred_p = bayes.predict_proba(X_test)
pred = bayes.predict(X_test)

print('naive bayes')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

naive bayes
              precision    recall  f1-score   support

     cycling       0.95      0.95      0.95      2592
     running       0.96      0.74      0.84       916
     walking       0.83      0.93      0.88      1906

    accuracy                           0.91      5414
   macro avg       0.92      0.87      0.89      5414
weighted avg       0.91      0.91      0.91      5414

ROC AUC Score: 0.9808982593193275


In [7]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_p = knn.predict_proba(X_test)
pred = knn.predict(X_test)

print('KNN')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

KNN
              precision    recall  f1-score   support

     cycling       0.99      1.00      0.99      2592
     running       1.00      0.96      0.98       916
     walking       0.99      1.00      0.99      1906

    accuracy                           0.99      5414
   macro avg       0.99      0.98      0.99      5414
weighted avg       0.99      0.99      0.99      5414

ROC AUC Score: 0.9949590992150408


In [8]:
knn_gs = GridSearchCV(KNeighborsClassifier(), 
                      {
                          'n_neighbors':[1, 3, 5, 11],
                          'leaf_size':[3, 5, 10, 15]
                        }, cv=5, scoring='roc_auc_ovr').fit(X_train, y_train)

pred_p = knn_gs.predict_proba(X_test)
pred = knn_gs.predict(X_test)

print('KNN gridsearch')
print(knn_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

KNN gridsearch
{'leaf_size': 3, 'n_neighbors': 11}
              precision    recall  f1-score   support

     cycling       0.99      1.00      0.99      2592
     running       1.00      0.95      0.97       916
     walking       0.98      0.99      0.99      1906

    accuracy                           0.99      5414
   macro avg       0.99      0.98      0.98      5414
weighted avg       0.99      0.99      0.99      5414

ROC AUC Score: 0.9958874489020966


In [9]:
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
pred_p = tree.predict_proba(X_test)
pred = tree.predict(X_test)

print('decision tree')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

decision tree
              precision    recall  f1-score   support

     cycling       0.99      0.98      0.98      2592
     running       0.96      0.94      0.95       916
     walking       0.97      0.98      0.97      1906

    accuracy                           0.97      5414
   macro avg       0.97      0.97      0.97      5414
weighted avg       0.97      0.97      0.97      5414

ROC AUC Score: 0.9777257967808796


In [10]:
tree_gs = GridSearchCV(DecisionTreeClassifier(), 
                      {
                          'max_depth':[5, 10, 15, 50],
                        }, cv=5, scoring='roc_auc_ovr').fit(X_train, y_train)

pred_p = tree_gs.predict_proba(X_test)
pred = tree_gs.predict(X_test)

print('tree gridsearch')
print(tree_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

tree gridsearch
{'max_depth': 15}
              precision    recall  f1-score   support

     cycling       0.99      0.98      0.98      2592
     running       0.97      0.95      0.96       916
     walking       0.97      0.98      0.97      1906

    accuracy                           0.98      5414
   macro avg       0.97      0.97      0.97      5414
weighted avg       0.98      0.98      0.98      5414

ROC AUC Score: 0.999645936226132


In [11]:
forest = RandomForestClassifier()
forest.fit(X_train, y_train)
pred_p = forest.predict_proba(X_test)
pred = forest.predict(X_test)

print('Random forest')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

Random forest
              precision    recall  f1-score   support

     cycling       0.99      1.00      0.99      2592
     running       0.99      0.97      0.98       916
     walking       0.99      1.00      0.99      1906

    accuracy                           0.99      5414
   macro avg       0.99      0.99      0.99      5414
weighted avg       0.99      0.99      0.99      5414

ROC AUC Score: 0.9989607772054662


In [12]:
forest_gs = GridSearchCV(RandomForestClassifier(), 
                      {
                          'max_depth':[3, 5, 10, 20],
                          'n_estimators':[10, 50, 100]
                        }, cv=5, scoring='roc_auc_ovr').fit(X_train, y_train)

pred_p = forest_gs.predict_proba(X_test)
pred = forest_gs.predict(X_test)

print('random forest gridsearch')
print(tree_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

random forest gridsearch
{'max_depth': 15}
              precision    recall  f1-score   support

     cycling       0.99      1.00      0.99      2592
     running       0.99      0.97      0.98       916
     walking       0.99      1.00      0.99      1906

    accuracy                           0.99      5414
   macro avg       0.99      0.99      0.99      5414
weighted avg       0.99      0.99      0.99      5414

ROC AUC Score: 0.9999997571588951
