In [3]:
%run data_getter_and_processor.ipynb

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
# Obtain Training and testing data.
train_x, test_x, train_y, test_y = get_split_train_data(random_state=10)

In [12]:
from sklearn.model_selection import GridSearchCV

# Apply gridSearch to search best Random Forest model for imbalanced data for worst
worst_stress_levels = train_y.loc[:,"worst_stress_level"]

param_grid = [
    {'n_estimators': [5, 7, 10, 15, 20, 30, 40, 50]},
    {'max_features' : [3 ,5 ,8]},
    {'criterion' : ["gini", "entropy"]}
 ]

clf = GridSearchCV(RandomForestClassifier(), param_grid, cv=5, scoring="accuracy", n_jobs=-1)
clf.fit(train_x, worst_stress_levels)
print(clf.best_params_, clf.best_score_, clf.best_estimator_)

# predicting only worst stress levels.
neigh = clf.estimator
neigh.fit(train_x, worst_stress_levels)
pred_worst_stress_levels = neigh.predict(test_x)

score = accuracy_score(test_y.loc[:,"worst_stress_level"], pred_worst_stress_levels, normalize=True)
f1 = f1_score(test_y.iloc[:,0], pred_worst_stress_levels, average=None)

print("Worst stress levels accuracy is "+ str(score * 100) + " %")
print("Worst stress levels f_1 score ", f1)
print("predicted values", pred_worst_stress_levels)

{'n_estimators': 40} 0.4303097345132743 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=40, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
Worst stress levels accuracy is 39.43298969072165 %
Worst stress levels f_1 score  [0.3537415  0.18918919 0.54830287 0.21333333 0.        ]
predicted values [3 2 3 2 1 3 3 3 3 3 3 3 3 1 3 2 3 3 1 3 3 1 2 3 3 3 3 2 3 1 2 3 2 1 1 3 1
 3 3 1 2 2 2 4 2 1 2 1 3 1 1 4 1 3 3 3 3 1 3 3 2 3 3 3 3 3 3 2 1 2 3 3 1 3
 2 3 2 1 1 3 3 4 3 3 3 2 3 1 2 4 1 3 3 3 1 3 1 4 1 3 1 3 2 3 2 3 3 3 2 3 3
 1 2 3 1 2 3 3 4 1 3 2 3 3 3 3 3 1 3 2 3 1 2 3 3 3 3 4 3 1 3 3 3 2 2 2 1 1
 2 3 3 1 2 3 2 3 2 1 1 3 3 2 3 3 1 2 3 2 2 3 3 3 3 3 3 2 1 1 3 1 1 3 3 3 3
 3 3 3 

In [13]:
from sklearn.model_selection import GridSearchCV

# Apply gridSearch to search best Random Forest model for Best Stress levels.
worst_stress_levels = train_y.loc[:,"worst_stress_level"]
balanced_train_x, worst_stress_levels = balance_data(train_x, worst_stress_levels)

param_grid = [
    {'n_estimators': [5, 7, 10, 15, 20, 30, 40, 50]},
    {'max_features' : [3 ,5 ,8]},
    {'criterion' : ["gini", "entropy"]}
 ]

clf = GridSearchCV(RandomForestClassifier(), param_grid, cv=5, scoring="accuracy", n_jobs=-1)
clf.fit(balanced_train_x, worst_stress_levels)
print(clf.best_params_, clf.best_score_, clf.best_estimator_)

# predicting only worst stress levels.
neigh = clf.estimator
neigh.fit(balanced_train_x, worst_stress_levels)
pred_worst_stress_levels = neigh.predict(test_x)

score = accuracy_score(test_y.loc[:,"worst_stress_level"], pred_worst_stress_levels, normalize=True)
f1 = f1_score(test_y.iloc[:,0], pred_worst_stress_levels, average=None)

print("Worst stress levels accuracy is "+ str(score * 100) + " %")
print("Worst stress levels f_1 score ", f1)
print("predicted values", pred_worst_stress_levels)

{'n_estimators': 20} 0.843 RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
Worst stress levels accuracy is 33.24742268041237 %
Worst stress levels f_1 score  [0.35761589 0.11347518 0.4756447  0.2020202  0.05555556]
predicted values [4 3 5 2 2 5 3 2 3 3 3 3 3 2 3 4 3 3 1 3 1 1 1 2 1 1 3 4 1 3 2 4 2 4 1 3 1
 2 3 1 4 1 3 2 2 2 3 1 3 1 1 4 1 4 3 2 2 1 2 1 3 3 3 3 4 3 5 2 5 2 2 5 1 3
 3 3 3 3 3 1 3 3 1 3 5 2 1 1 2 3 2 1 2 3 4 1 1 3 1 3 1 3 3 1 2 3 3 3 3 3 5
 1 2 4 3 2 2 3 3 3 4 2 1 1 3 5 3 1 3 3 3 3 2 3 4 2 3 4 3 3 3 1 3 3 3 3 3 3
 2 3 3 3 3 4 2 3 3 4 2 3 3 4 3 3 3 2 3 5 2 3 3 3 1 1 2 3 1 3 3 4 1 2 1 3 4
 3 3 3 3 2 3 1 2 3 3