## Random Forest Classifier

In [41]:
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
print(random_grid)

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False]}


In [42]:
from sklearn.ensemble import RandomForestClassifier
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestClassifier()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 5, verbose=10, random_state=42, n_jobs = -1)
# Fit the random search model
rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


RandomizedSearchCV(cv=5, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': [10, 20, 30, 40, 50, 60,
                                                      70, 80, 90, 100, 110,
                                                      None],
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=42, verbose=10)

In [43]:
pred = rf_random.predict(X_test)

In [45]:
# evaluate accuracy
acc = accuracy_score(y_test, pred) * 100
print('\nThe accuracy of the random forest classifier is %f%%' % (acc))


The accuracy of the random forest classifier is 70.971429%


In [46]:
print(classification_report(y_test,rf_random.predict(X_test)))

              precision    recall  f1-score   support

           1       0.79      0.77      0.78       273
           2       0.70      0.73      0.71       266
           3       0.66      0.65      0.65       336

    accuracy                           0.71       875
   macro avg       0.71      0.71      0.71       875
weighted avg       0.71      0.71      0.71       875



In [47]:
rf_random.best_params_

{'n_estimators': 400,
 'min_samples_split': 5,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'max_depth': 100,
 'bootstrap': True}

In [48]:
rf_random.best_estimator_

RandomForestClassifier(max_depth=100, max_features='sqrt', min_samples_split=5,
                       n_estimators=400)

In [49]:
best_model = rf_random.best_estimator_

In [51]:
best_model.fit(X_train, y_train)
pred = best_model.predict(X_test)

# evaluate accuracy
acc = accuracy_score(y_test, pred) * 100
print('\nThe accuracy of the random forest classifier is %f%%' % (acc))


The accuracy of the random forest classifier is 70.742857%


In [52]:
print(classification_report(y_test,best_model.predict(X_test)))

              precision    recall  f1-score   support

           1       0.78      0.77      0.77       273
           2       0.70      0.73      0.71       266
           3       0.66      0.64      0.65       336

    accuracy                           0.71       875
   macro avg       0.71      0.71      0.71       875
weighted avg       0.71      0.71      0.71       875

