In [50]:
%run data_getter_and_processor.ipynb

import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
# Obtain Training and testing data.
train_x, test_x, train_y, test_y = get_split_train_data(random_state=10)


In [93]:
from sklearn.model_selection import GridSearchCV

# Apply gridSearch to search best KNN model for Best Stress levels.
worst_stress_levels = train_y.loc[:,"worst_stress_level"]
balanced_train_x, worst_stress_levels = balance_data(train_x, worst_stress_levels)

param_grid = [
  {'n_neighbors': [3, 5, 7, 10, 20]},
 ]

clf = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring="accuracy")
clf.fit(balanced_train_x, worst_stress_levels)
print(clf.best_params_, clf.best_score_, clf.best_estimator_)

# predicting only worst stress levels.
neigh = clf.estimator
neigh.fit(balanced_train_x, worst_stress_levels)
pred_worst_stress_levels = neigh.predict(test_x)

score = accuracy_score(test_y.loc[:,"worst_stress_level"], pred_worst_stress_levels, normalize=True)
f1 = f1_score(test_y.iloc[:,0], pred_worst_stress_levels, average=None)

print("Worst stress levels accuracy is "+ str(score * 100) + " %")
print("Worst stress levels f_1 score ", f1)

{'n_neighbors': 3} 0.6925 KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')
Worst stress levels accuracy is 25.0 %
Worst stress levels f_1 score  [0.21428571 0.23952096 0.35245902 0.1958042  0.07407407]


In [94]:
# Fixing Balancing of the train splits.
best_stress_level = train_y.loc[:,"best_stress_level"]
balanced_train_x, best_stress_level = balance_data(train_x, best_stress_levels)

param_grid = [
  {'n_neighbors': [3, 5, 7, 10, 20]},
 ]

clf = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring="accuracy")
clf.fit(balanced_train_x, best_stress_level)
print(clf.best_params_, clf.best_score_, clf.best_estimator_)

# predicting only only best stress levels.
neigh = clf.estimator
neigh.fit(balanced_train_x, best_stress_level)
pred_best_stress_level = neigh.predict(test_x)

score = accuracy_score(test_y.loc[:,"best_stress_level"], pred_best_stress_level, normalize=True)
f1 = f1_score(test_y.iloc[:,0], pred_best_stress_level, average=None)

print("Best stress levels accuracy is "+ str(score * 100) + " %")
print("Worst stress levels f_1 score ", f1)

{'n_neighbors': 3} 0.6922680412371134 KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')
Best stress levels accuracy is 22.938144329896907 %
Worst stress levels f_1 score  [0.21333333 0.2        0.296      0.12598425 0.12658228]
