In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

import warnings
warnings.filterwarnings("ignore")

# Read data

In [20]:
actions = pd.read_csv("../data/user16_vs_the_world.csv")
actions.head()

Unnamed: 0,traveled_distance_pixel,elapsed_time,straightness,num_points,sum_of_angles,mean_curv,sd_curv,max_curv,min_curv,mean_omega,...,action_3,action_4,direction_1,direction_2,direction_3,direction_4,direction_5,direction_6,direction_7,user_16
0,7.0,0.125,1.0,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,1
1,89.008474,0.437,0.999968,5,0.016948,-0.000634,0.001366,0.000287,-0.002825,0.009086,...,0,0,0,0,0,0,0,0,0,1
2,3.0,0.125,1.0,4,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,0,1
3,1051.510748,3.369,0.310032,29,12.039747,0.06057,0.315803,1.570796,-0.39734,0.563722,...,1,0,0,0,0,0,0,0,0,1
4,42.0,0.39,1.0,5,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,1


In [21]:
actions.columns

Index(['traveled_distance_pixel', 'elapsed_time', 'straightness', 'num_points',
       'sum_of_angles', 'mean_curv', 'sd_curv', 'max_curv', 'min_curv',
       'mean_omega', 'sd_omega', 'max_omega', 'min_omega', 'largest_deviation',
       'dist_end_to_end_line', 'num_critical_points', 'mean_vx', 'sd_vx',
       'max_vx', 'min_vx', 'mean_vy', 'sd_vy', 'max_vy', 'min_vy', 'mean_v',
       'sd_v', 'max_v', 'min_v', 'mean_a', 'sd_a', 'max_a', 'min_a',
       'mean_jerk', 'sd_jerk', 'max_jerk', 'min_jerk', 'a_beg_time',
       'action_3', 'action_4', 'direction_1', 'direction_2', 'direction_3',
       'direction_4', 'direction_5', 'direction_6', 'direction_7', 'user_16'],
      dtype='object')

In [22]:
# split into features and target variable
X = actions.drop("user_16", axis=1)
y = actions["user_16"]

norm_X = normalize(X)

X_train, X_test, y_train, y_test = train_test_split(norm_X, y, test_size=0.3,
                                                    random_state=42)

# Hypertuning model parameters using GridSearchCV

In [33]:
param_grid = {"n_neighbors": np.arange(1, 50, 2)}

In [34]:
knn_gscv = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)

In [35]:
knn_gscv.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
       35, 37, 39, 41, 43, 45, 47, 49])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [36]:
knn_gscv.best_params_

{'n_neighbors': 47}

In [37]:
knn_gscv.best_score_

0.6292708817090161

In [38]:
pred_gscv = knn_gscv.predict(X_test)

In [39]:
print(classification_report(y_test, pred_gscv))
print(confusion_matrix(y_test, pred_gscv))

              precision    recall  f1-score   support

           0       0.67      0.48      0.56      3214
           1       0.60      0.77      0.67      3247

   micro avg       0.62      0.62      0.62      6461
   macro avg       0.64      0.62      0.62      6461
weighted avg       0.64      0.62      0.62      6461

[[1544 1670]
 [ 754 2493]]
