In [1]:
%run data_getter_and_processor.ipynb

import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import GridSearchCV

# Obtain Training and testing data.
train_x, test_x, train_y, test_y = get_split_train_data(random_state=10)

In [10]:
from sklearn.preprocessing import scale

# Apply gridSearch to search best Random Forest model for imbalanced data for worst
worst_stress_levels = train_y.loc[:, "worst_stress_level"]
balanced_train_x, worst_stress_levels = get_balanced_dataset_by_hybrid_approach(train_x, worst_stress_levels)
balanced_train_x = scale(balanced_train_x)

param_grid = [
    {'kernel' : ['rbf'],
     'C' : [1,3,5],
     'gamma' : [0.1, 0.01 ,0.001, 0.0001, 0.00001]},
    
    {'kernel' : ['poly'],
     'C' : [1,3,5],
     'gamma' : [0.01 ,0.001, 0.0001],
     'degree': [2, 3, 5],
     'cache_size' :[1000]
    }
  ]

# param_grid = [
#     {'kernel' : ['poly'],
#      'C' : [1],
#      'gamma' : [0.001],
#      'degree': [2, 3, 5],
#      'cache_size' :[1000]   }
#   ]

clf = GridSearchCV(SVC(), param_grid, cv=5, scoring="accuracy", n_jobs=-1)
clf.fit(balanced_train_x, worst_stress_levels)
print(clf.best_params_, clf.best_score_, clf.best_estimator_)

# predicting only worst stress levels.
neigh = clf.estimator
neigh.fit(balanced_train_x, worst_stress_levels)
pred_worst_stress_levels = neigh.predict(test_x)

score = accuracy_score(test_y.loc[:,"worst_stress_level"], pred_worst_stress_levels, normalize=True)
f1 = f1_score(test_y.iloc[:,0], pred_worst_stress_levels, average=None)
p_score = precision_score(test_y.iloc[:,0], pred_worst_stress_levels, average=None)
r_score = recall_score(test_y.iloc[:,0], pred_worst_stress_levels, average=None)

print("Worst stress levels accuracy is "+ str(score * 100) + " %")
print("Worst stress levels f_1 score ", f1)
print("Worst stress levels precision score ", p_score)
print("Worst stress levels recall score ", r_score)
print("predicted values", pred_worst_stress_levels)

{'C': 5, 'gamma': 0.1, 'kernel': 'rbf'} 0.5922222222222222 SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
Worst stress levels accuracy is 42.52577319587629 %
Worst stress levels f_1 score  [0.         0.         0.59674503 0.         0.        ]
Worst stress levels precision score  [0.         0.         0.42525773 0.         0.        ]
Worst stress levels recall score  [0. 0. 1. 0. 0.]
predicted values [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
