In [8]:
# knn classification model for fitness data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_curve


# read in data
fitness_df = pd.DataFrame()
for name in ['Lunge', 'Pullup']:
    df = pd.read_csv(f'./data/{name}.csv')
    df['Exercise'] = name
    fitness_df = pd.concat([fitness_df, df], axis=0, ignore_index=True)

In [9]:
fitness_df

Unnamed: 0,left_elbow_angle_period,right_elbow_angle_period,left_wrist_angle_period,right_wrist_angle_period,left_shoulder_angle_period,right_shoulder_angle_period,hip_angle_period,waist_angle_period,left_knee_angle_period,right_knee_angle_period,...,right_wrist_angle_power,left_shoulder_angle_power,right_shoulder_angle_power,hip_angle_power,waist_angle_power,left_knee_angle_power,right_knee_angle_power,left_ankle_angle_power,right_ankle_angle_power,Exercise
0,354.0,354.0,354.0,118.0,118.0,177.0,118.0,354.0,118.0,118.0,...,32648.446063,1170.339045,1035.793438,7184.252633,1567.870451,50389.808037,109061.188403,2373.861999,6260.094119,Lunge
1,118.0,118.0,118.0,118.0,118.0,59.0,118.0,35.4,118.0,118.0,...,7289.262835,656.262866,278.464232,12199.997929,161.127965,112831.632565,213051.694440,575.263239,4394.967307,Lunge
2,354.0,118.0,118.0,118.0,354.0,354.0,118.0,354.0,118.0,118.0,...,6437.395068,24154.563862,10003.045417,10438.403142,58.031892,103394.526283,191951.850049,3103.764694,3651.291837,Lunge
3,118.0,118.0,118.0,354.0,118.0,354.0,118.0,118.0,118.0,118.0,...,1200.202032,1881.984979,581.114539,11324.047599,767.355930,136100.646578,272289.019067,2355.199953,4659.102401,Lunge
4,118.0,118.0,59.0,118.0,118.0,59.0,118.0,118.0,118.0,118.0,...,1864.637903,1050.592678,128.148998,12389.265165,355.479708,108453.038985,254294.777490,2543.739282,6071.651672,Lunge
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,180.0,180.0,180.0,90.0,180.0,180.0,180.0,45.0,180.0,180.0,...,3112.979149,138860.001451,107153.772941,1002.662873,30.703757,3194.279858,6011.882564,13425.548823,4848.659436,Pullup
64,90.0,90.0,90.0,90.0,90.0,90.0,90.0,90.0,180.0,180.0,...,7649.975658,169661.136473,117918.850427,997.343841,90.241013,751.848219,9547.374023,13667.763373,3529.529337,Pullup
65,90.0,90.0,90.0,90.0,90.0,90.0,90.0,180.0,45.0,90.0,...,5349.156930,137499.986378,114394.661052,624.726500,35.437197,2228.428778,6917.887349,13971.537760,8718.913251,Pullup
66,90.0,90.0,90.0,90.0,90.0,90.0,36.0,90.0,45.0,90.0,...,4890.550119,100617.023121,96001.372271,78.606035,28.202200,1921.145080,3923.622241,12317.677005,4980.790359,Pullup


In [10]:
seed = 42
x = fitness_df.drop(['Exercise'], axis=1)
y = fitness_df['Exercise']

# split data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=seed, stratify=y)

In [15]:
# scale data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# create model
knn = KNeighborsClassifier()

# fit model
knn.fit(x_train, y_train)

# predict on test set
y_pred_test = knn.predict(x_test)
y_pred_train = knn.predict(x_train)

# evaluate model
print("Accuracy on test set: ", accuracy_score(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))
print("Accuracy on train set: ", accuracy_score(y_train, y_pred_train))
print(classification_report(y_train, y_pred_train))

print(confusion_matrix(y_test, y_pred_test))
print(confusion_matrix(y_train, y_pred_train))

# cross validation
cv_scores = cross_val_score(knn, x_train, y_train, cv=5)
print(cv_scores)
print(np.mean(cv_scores))


Accuracy on test set:  0.9285714285714286
              precision    recall  f1-score   support

       Lunge       1.00      0.83      0.91         6
      Pullup       0.89      1.00      0.94         8

    accuracy                           0.93        14
   macro avg       0.94      0.92      0.93        14
weighted avg       0.94      0.93      0.93        14

Accuracy on train set:  0.9814814814814815
              precision    recall  f1-score   support

       Lunge       0.95      1.00      0.98        21
      Pullup       1.00      0.97      0.98        33

    accuracy                           0.98        54
   macro avg       0.98      0.98      0.98        54
weighted avg       0.98      0.98      0.98        54

[[5 1]
 [0 8]]
[[21  0]
 [ 1 32]]
[0.90909091 1.         0.90909091 1.         1.        ]
0.9636363636363636


In [16]:
# grid search
param_grid = {'n_neighbors': np.arange(1, 25)}
knn_cv = GridSearchCV(knn, param_grid, cv=5)
knn_cv.fit(x_train, y_train)
print(knn_cv.best_params_)
print(knn_cv.best_score_)
print(knn_cv.best_estimator_)
print(knn_cv.best_index_)
print(knn_cv.scorer_)
print(knn_cv.n_splits_)
print(knn_cv.refit_time_)
print(knn_cv.cv_results_)
print(knn_cv.predict(x_test))
print(knn_cv.predict_proba(x_test))
print(knn_cv.score(x_test, y_test))


{'n_neighbors': 15}
1.0
KNeighborsClassifier(n_neighbors=15)
14
<sklearn.metrics._scorer._PassthroughScorer object at 0x2a228a110>
5
0.00015091896057128906
{'mean_fit_time': array([0.00037251, 0.00026202, 0.00026979, 0.00020251, 0.00028443,
       0.00018854, 0.00025983, 0.00031443, 0.00021687, 0.00029473,
       0.00017424, 0.00016661, 0.00019422, 0.00019665, 0.00017767,
       0.00018802, 0.00028729, 0.00020742, 0.00019555, 0.00017385,
       0.0002183 , 0.00017519, 0.0001812 , 0.00019422]), 'std_fit_time': array([1.24464966e-04, 1.12608303e-05, 6.56908910e-05, 1.14317669e-05,
       1.37839429e-04, 1.19669978e-05, 1.46825850e-04, 1.57894270e-04,
       6.69808728e-05, 1.32209618e-04, 7.54458255e-06, 1.05982355e-06,
       1.28742501e-05, 2.28242179e-05, 7.07328096e-06, 6.55993459e-06,
       1.41220539e-04, 7.37634873e-06, 2.08773009e-05, 9.68742540e-06,
       8.29961888e-05, 1.17793256e-05, 4.10190833e-06, 1.26011380e-05]), 'mean_score_time': array([0.00129347, 0.00103168, 0.00090

In [17]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=seed)
rf.fit(x_train, y_train)
y_pred_test = rf.predict(x_test)
y_pred_train = rf.predict(x_train)

print("Accuracy on test set: ", accuracy_score(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))
print("Accuracy on train set: ", accuracy_score(y_train, y_pred_train))
print(classification_report(y_train, y_pred_train))

Accuracy on test set:  1.0
              precision    recall  f1-score   support

       Lunge       1.00      1.00      1.00         6
      Pullup       1.00      1.00      1.00         8

    accuracy                           1.00        14
   macro avg       1.00      1.00      1.00        14
weighted avg       1.00      1.00      1.00        14

Accuracy on train set:  1.0
              precision    recall  f1-score   support

       Lunge       1.00      1.00      1.00        21
      Pullup       1.00      1.00      1.00        33

    accuracy                           1.00        54
   macro avg       1.00      1.00      1.00        54
weighted avg       1.00      1.00      1.00        54

