In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import datasets


In [7]:
digit_data = datasets.load_digits()

image_features = digit_data.images.reshape((len(digit_data.images), -1))
image_targets = digit_data.target

feature_train, feature_test, target_train, target_test = train_test_split(image_features, image_targets, test_size=.2)


In [16]:
random_forest_model = RandomForestClassifier(n_jobs=-1, max_features='sqrt')

param_grid = {
    "n_estimators": [10, 100, 500, 1000],
    "max_depth": [1, 5, 10],
    "min_samples_leaf": [1, 2, 4, 10, 15, 30, 50]
}

grid_search = GridSearchCV(estimator=random_forest_model, param_grid=param_grid, cv=10)
grid_search.fit(feature_train, target_train)
print(grid_search.best_params_)


{'max_depth': 10, 'min_samples_leaf': 1, 'n_estimators': 500}


In [17]:
optimal_estimators = grid_search.best_params_.get("n_estimators")
optimal_depth = grid_search.best_params_.get("max_depth")
optimal_leaf = grid_search.best_params_.get("min_samples_leaf")

print("Optimal n_estimators: %s" % optimal_estimators)
print("Optimal optimal_depth: %s" % optimal_depth)
print("Optimal optimal_leaf: %s" % optimal_leaf)


Optimal n_estimators: 500
Optimal optimal_depth: 10
Optimal optimal_leaf: 1


In [18]:
grid_predictions = grid_search.predict(feature_test)
print(confusion_matrix(target_test, grid_predictions))
print(accuracy_score(target_test, grid_predictions))


[[28  0  0  0  1  0  0  0  0  0]
 [ 0 31  0  0  0  0  0  0  0  0]
 [ 1  1 36  0  0  0  0  0  0  0]
 [ 0  0  0 25  0  0  0  0  0  0]
 [ 0  0  0  0 40  0  0  0  0  0]
 [ 0  0  0  0  0 45  0  0  0  0]
 [ 0  0  0  0  0  0 33  0  0  0]
 [ 0  0  0  0  1  0  0 34  0  0]
 [ 0  2  0  0  0  0  0  0 32  0]
 [ 0  0  0  1  0  1  0  0  0 48]]
0.9777777777777777
