In [2]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as tts

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report, roc_auc_score

In [3]:
rand = 0xC0FFEE
data = pd.read_csv('combined_data.csv')

# Temporarily excluding the first column with time info since it seems to make knn unhappy
X = data[data.columns[1:-1]]
y = data['activity']

In [12]:
X.head()

Unnamed: 0,acc_Acceleration x (m/s^2),acc_Acceleration y (m/s^2),acc_Acceleration z (m/s^2),gyr_Gyroscope x (rad/s),gyr_Gyroscope y (rad/s),gyr_Gyroscope z (rad/s),lin_acc_Linear Acceleration x (m/s^2),lin_acc_Linear Acceleration y (m/s^2),lin_acc_Linear Acceleration z (m/s^2),acc_magnitude,...,average_gyr_Gyroscope x (rad/s),average_gyr_Gyroscope y (rad/s),average_gyr_Gyroscope z (rad/s),average_lin_acc_Linear Acceleration x (m/s^2),average_lin_acc_Linear Acceleration y (m/s^2),average_lin_acc_Linear Acceleration z (m/s^2),average_acc_magnitude,average_gyr_magnitude,average_lin_acc_magnitude,average_filtered_acc_magnitude
0,-1.457379,3.879065,8.795268,-0.13011,-0.904436,0.261214,-0.185778,0.186615,0.108793,9.722543,...,-0.066888,-0.749777,0.184285,-0.185778,0.186615,0.108793,9.604793,0.777542,0.284912,9.519643
1,-0.298654,4.020241,9.806834,-0.29733,-0.67804,0.269078,0.074469,0.23948,0.572785,10.603091,...,-0.143702,-0.725865,0.212549,-0.055654,0.213048,0.340789,9.937559,0.780944,0.455098,9.549299
2,-0.338285,3.731756,9.367452,-0.056045,-0.13599,0.028786,-0.010581,-0.000861,0.067117,10.089083,...,-0.121788,-0.578396,0.166609,-0.04063,0.141745,0.249565,9.97544,0.623177,0.326049,9.582442
3,-0.450748,3.181258,8.893374,-0.074142,-0.201961,0.201121,-0.176372,-0.652862,0.020059,9.455986,...,-0.112258,-0.503109,0.173511,-0.074565,-0.056907,0.192189,9.871549,0.557444,0.413678,9.620854
4,1.228714,1.318445,8.843274,-0.291527,-1.689527,0.561903,-0.12887,-1.563116,-0.194413,9.02505,...,-0.142137,-0.700845,0.238243,-0.085426,-0.358149,0.114868,9.730466,0.76524,0.647027,9.665326


In [4]:
X_train, X_test, y_train, y_test = tts(X, y, random_state=rand, stratify=y)

In [9]:
bayes = GaussianNB()
bayes.fit(X_train, y_train)
pred_p = bayes.predict_proba(X_test)
pred = bayes.predict(X_test)

print('naive bayes')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

naive bayes
              precision    recall  f1-score   support

     cycling       0.99      1.00      1.00      5184
     running       0.98      1.00      0.99      1831
     walking       1.00      0.98      0.99      3812

    accuracy                           0.99     10827
   macro avg       0.99      0.99      0.99     10827
weighted avg       0.99      0.99      0.99     10827

ROC AUC Score: 0.9983572034775149


In [11]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_p = knn.predict_proba(X_test)
pred = knn.predict(X_test)

print('KNN')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

KNN
              precision    recall  f1-score   support

     cycling       1.00      1.00      1.00      5184
     running       1.00      0.99      0.99      1831
     walking       1.00      1.00      1.00      3812

    accuracy                           1.00     10827
   macro avg       1.00      1.00      1.00     10827
weighted avg       1.00      1.00      1.00     10827

ROC AUC Score: 0.9996465613480368


In [14]:
knn_gs = GridSearchCV(KNeighborsClassifier(), 
                      {
                          'n_neighbors':[1, 3, 5, 11],
                          'leaf_size':[3, 5, 10, 15]
                        }, cv=5, scoring='roc_auc_ovr').fit(X_train, y_train)

pred_p = knn_gs.predict_proba(X_test)
pred = knn_gs.predict(X_test)

print('KNN gridsearch')
print(knn_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

KNN gridsearch
{'leaf_size': 3, 'n_neighbors': 11}
              precision    recall  f1-score   support

     cycling       1.00      1.00      1.00      5184
     running       1.00      0.99      0.99      1831
     walking       1.00      1.00      1.00      3812

    accuracy                           1.00     10827
   macro avg       1.00      0.99      0.99     10827
weighted avg       1.00      1.00      1.00     10827

ROC AUC Score: 0.9999752116535041


In [15]:
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
pred_p = tree.predict_proba(X_test)
pred = tree.predict(X_test)

print('decision tree')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

decision tree
              precision    recall  f1-score   support

     cycling       1.00      1.00      1.00      5184
     running       1.00      1.00      1.00      1831
     walking       1.00      1.00      1.00      3812

    accuracy                           1.00     10827
   macro avg       1.00      1.00      1.00     10827
weighted avg       1.00      1.00      1.00     10827

ROC AUC Score: 0.9994163691194703


In [17]:
tree_gs = GridSearchCV(DecisionTreeClassifier(), 
                      {
                          'max_depth':[5, 10, 15, 50],
                        }, cv=5, scoring='roc_auc_ovr').fit(X_train, y_train)

pred_p = tree_gs.predict_proba(X_test)
pred = tree_gs.predict(X_test)

print('tree gridsearch')
print(tree_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

tree gridsearch
{'max_depth': 5}
              precision    recall  f1-score   support

     cycling       1.00      1.00      1.00      5184
     running       1.00      1.00      1.00      1831
     walking       1.00      1.00      1.00      3812

    accuracy                           1.00     10827
   macro avg       1.00      1.00      1.00     10827
weighted avg       1.00      1.00      1.00     10827

ROC AUC Score: 0.9996910205417633


In [18]:
forest = RandomForestClassifier()
forest.fit(X_train, y_train)
pred_p = forest.predict_proba(X_test)
pred = forest.predict(X_test)

print('Random forest')
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

Random forest
              precision    recall  f1-score   support

     cycling       1.00      1.00      1.00      5184
     running       1.00      1.00      1.00      1831
     walking       1.00      1.00      1.00      3812

    accuracy                           1.00     10827
   macro avg       1.00      1.00      1.00     10827
weighted avg       1.00      1.00      1.00     10827

ROC AUC Score: 1.0


In [20]:
forest_gs = GridSearchCV(RandomForestClassifier(), 
                      {
                          'max_depth':[3, 5, 10, 20],
                          'n_estimators':[10, 50, 100]
                        }, cv=5, scoring='roc_auc_ovr').fit(X_train, y_train)

pred_p = forest_gs.predict_proba(X_test)
pred = forest_gs.predict(X_test)

print('random forest gridsearch')
print(tree_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))

roc_auc = roc_auc_score(y_test, pred_p, multi_class='ovr')
print(f'ROC AUC Score: {roc_auc}')

random forest gridsearch
{'max_depth': 5}
              precision    recall  f1-score   support

     cycling       1.00      1.00      1.00      5184
     running       1.00      1.00      1.00      1831
     walking       1.00      1.00      1.00      3812

    accuracy                           1.00     10827
   macro avg       1.00      1.00      1.00     10827
weighted avg       1.00      1.00      1.00     10827

ROC AUC Score: 0.99999983810593
