## GridSearch vs RandomSearch

In [1]:
import numpy as np 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV 
from time import time

In [2]:
# Generate sample data 
X = np.random.rand(200, 10) 
y = np.random.randint(2, size=200)

In [3]:
# Define the model and the hyperparameter 
# search space 
model = RandomForestClassifier() 
param_grid = { 
'n_estimators': [10, 50, 100, 200], 
'max_depth': [None, 5, 10, 15], 
'min_samples_split': np.linspace(0.1, 1, 11),  
} 

In [4]:
# Use RandomizedSearchCV to sample 
# from the search space and fit the model 
random_search = RandomizedSearchCV( model, param_grid, 
    cv=5, 
    n_iter=10, 
    random_state=42)

In [5]:
# time the duration
start = time()
random_search.fit(X, y) 
end = time()
rtime = end - start

In [6]:
# Use GridSearchCV to explore the entire search space and fit the model 
grid_search = GridSearchCV(model, param_grid, cv=5) 

In [14]:
# time the duration
start = time()
grid_search.fit(X, y)
end = time()
gtime = end - start

In [11]:
# Print the best hyperparameters found by each method 
print(f"Best hyperparameters found by RandomizedSearchCV: {random_search.best_params_}") 
print(f"Best hyperparameters found by GridSearchCV: {grid_search.best_params_}")

Best hyperparameters found by RandomizedSearchCV: {'n_estimators': 10, 'min_samples_split': 0.45999999999999996, 'max_depth': None}
Best hyperparameters found by GridSearchCV: {'max_depth': 5, 'min_samples_split': 0.64, 'n_estimators': 10}


In [12]:
# Print time taken
print(f"Duration of RandomizedSearchCV: {rtime}") 
print(f"Duration of GridSearchCV: {gtime}")

Duration of RandomizedSearchCV: 12.340409994125366
Duration of GridSearchCV: 189.2243127822876
