In [33]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [2]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [3]:
X

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [4]:
y

array([0, 1, 2, ..., 8, 9, 8])

In [5]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [10]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [28]:
# run randomized search
samples = 5  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples, cv=3) #default cv = 3

In [29]:
randomCV.fit(X, y)

 
randomCV.best_params_


{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 7,
 'min_samples_leaf': 4,
 'min_samples_split': 10}

In [35]:
param_grid={'n_estimators':[50,100,150], 
            'learning_rate':[0.01,0.1,0.2], 
            'gamma':[0,1,3]}

xgb = XGBClassifier(random_state=1)

#Calling RandomizedSearchCV
clf = RandomizedSearchCV(estimator=xgb, param_distributions=param_grid, cv=5, 
                          random_state=1, n_iter = 10)

clf.fit(X,y)













































































































































































































RandomizedSearchCV(cv=5,
                   estimator=XGBClassifier(base_score=None, booster=None,
                                           colsample_bylevel=None,
                                           colsample_bynode=None,
                                           colsample_bytree=None, gamma=None,
                                           gpu_id=None, importance_type='gain',
                                           interaction_constraints=None,
                                           learning_rate=None,
                                           max_delta_step=None, max_depth=None,
                                           min_child_weight=None, missing=nan,
                                           monotone_constraints=None,
                                           n_estimators=100, n_jobs=None,
                                           num_parallel_tree=None,
                                           random_state=1, reg_alpha=None,
                             

In [37]:
clf.cv_results_['mean_test_score']

array([0.90930672, 0.90931291, 0.85366605, 0.9131987 , 0.89651037,
       0.87814918, 0.86813525, 0.89483906, 0.91931755, 0.88649644])

In [30]:
randomCV.cv_results_['mean_test_score']

array([0.8130217 , 0.93043962, 0.82359488, 0.80634391, 0.81135225])

In [13]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [14]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [15]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 3}

In [18]:
grid_search.cv_results_['mean_test_score'].max()

0.9465939337666356

In [20]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, criterion='entropy', max_features=3,
                       min_samples_split=3, n_estimators=50)

In [21]:
randomCV.best_estimator_

RandomForestClassifier(bootstrap=False, max_features=8, min_samples_leaf=6,
                       min_samples_split=9, n_estimators=50)

In [22]:
randomCV.get_params()

{'cv': None,
 'error_score': nan,
 'estimator__bootstrap': True,
 'estimator__ccp_alpha': 0.0,
 'estimator__class_weight': None,
 'estimator__criterion': 'gini',
 'estimator__max_depth': None,
 'estimator__max_features': 'auto',
 'estimator__max_leaf_nodes': None,
 'estimator__max_samples': None,
 'estimator__min_impurity_decrease': 0.0,
 'estimator__min_impurity_split': None,
 'estimator__min_samples_leaf': 1,
 'estimator__min_samples_split': 2,
 'estimator__min_weight_fraction_leaf': 0.0,
 'estimator__n_estimators': 50,
 'estimator__n_jobs': None,
 'estimator__oob_score': False,
 'estimator__random_state': None,
 'estimator__verbose': 0,
 'estimator__warm_start': False,
 'estimator': RandomForestClassifier(n_estimators=50),
 'n_iter': 10,
 'n_jobs': None,
 'param_distributions': {'max_depth': [3, None],
  'max_features': <scipy.stats._distn_infrastructure.rv_frozen at 0x7fcf3264cd00>,
  'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen at 0x7fcf3264c940>,
  'min_sampl

In [23]:
grid_search.get_params()

{'cv': None,
 'error_score': nan,
 'estimator__bootstrap': True,
 'estimator__ccp_alpha': 0.0,
 'estimator__class_weight': None,
 'estimator__criterion': 'gini',
 'estimator__max_depth': None,
 'estimator__max_features': 'auto',
 'estimator__max_leaf_nodes': None,
 'estimator__max_samples': None,
 'estimator__min_impurity_decrease': 0.0,
 'estimator__min_impurity_split': None,
 'estimator__min_samples_leaf': 1,
 'estimator__min_samples_split': 2,
 'estimator__min_weight_fraction_leaf': 0.0,
 'estimator__n_estimators': 50,
 'estimator__n_jobs': None,
 'estimator__oob_score': False,
 'estimator__random_state': None,
 'estimator__verbose': 0,
 'estimator__warm_start': False,
 'estimator': RandomForestClassifier(n_estimators=50),
 'n_jobs': None,
 'param_grid': {'max_depth': [3, None],
  'max_features': [1, 3, 10],
  'min_samples_split': [2, 3, 10],
  'min_samples_leaf': [1, 3, 10],
  'bootstrap': [True, False],
  'criterion': ['gini', 'entropy']},
 'pre_dispatch': '2*n_jobs',
 'refit': Tr