In [0]:
%matplotlib inline

In [1]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [2]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [3]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [4]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [5]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [6]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)


{'bootstrap': False, 'criterion': 'gini', 'max_depth': None, 'max_features': 3, 'min_samples_leaf': 1, 'min_samples_split': 10}


In [7]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [8]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [9]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 10,
 'min_samples_leaf': 1,
 'min_samples_split': 2}

In [10]:
grid_search.cv_results_['mean_test_score']

array([0.81246828, 0.83697307, 0.81916435, 0.7991628 , 0.80971526,
       0.82303312, 0.81640359, 0.81581244, 0.81136955, 0.83193903,
       0.8330455 , 0.83918137, 0.84752708, 0.81914113, 0.82918137,
       0.84196379, 0.83137728, 0.82526153, 0.82305478, 0.80301145,
       0.82137574, 0.83194212, 0.82750696, 0.81747601, 0.84586041,
       0.81692974, 0.82581863, 0.92601207, 0.92489632, 0.91041473,
       0.90819406, 0.91765552, 0.90042402, 0.88483751, 0.87647168,
       0.88871402, 0.93546116, 0.93157536, 0.92878985, 0.92878521,
       0.93156763, 0.92655989, 0.90430053, 0.91264469, 0.89985299,
       0.93379604, 0.93324048, 0.92378675, 0.92878675, 0.92378366,
       0.92377747, 0.90819406, 0.90708604, 0.90763695, 0.81971835,
       0.79691736, 0.80635871, 0.82531724, 0.81915661, 0.7957784 ,
       0.81023522, 0.80469359, 0.79412102, 0.83419375, 0.83028165,
       0.84030486, 0.83809037, 0.83696688, 0.84031105, 0.84921387,
       0.84698391, 0.84029248, 0.83083256, 0.83251006, 0.83192

In [11]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, max_features=10, n_estimators=50)