<h1>Hyperparameter Tuning</h1>

<h2>1. Randomized SearchCV</h2>

In [1]:
from sklearn.model_selection import RandomizedSearchCV 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import load_iris
import numpy as np

# Load iris dataset
iris = load_iris() 
X = iris.data
y = iris.target

In [2]:
# Create a Random Forest classifier
rfc = RandomForestClassifier()
# Define the hyperparameter distribution to sample from
param_dist = {'n_estimators': [100, 200, 300, 400, 500], 
              'max_depth': [3, 5, 7, 9, 11, 13, 15, None],
              'min_samples_split': [2, 3, 4, 5, 6, 7, 8, 9, 10], 
              'min_samples_leaf': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 
              'bootstrap': [True, False],
              'criterion': ['gini', 'entropy']}

In [5]:
# Perform randomized search with 5-fold cross-validation and the accuracy score as the scoring metric
random_search = RandomizedSearchCV(rfc, param_distributions = param_dist) 
random_search.fit(X, y)
# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters:", random_search.best_params_)
print("Best accuracy score:", random_search.best_score_)

Best hyperparameters: {'n_estimators': 100, 'min_samples_split': 8, 'min_samples_leaf': 8, 'max_depth': 11, 'criterion': 'gini', 'bootstrap': True}
Best accuracy score: 0.96


<h2>2. GridSearchCV</h2>

In [6]:
from sklearn.model_selection import GridSearchCV 
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris

# Load iris dataset
iris = load_iris() 
X = iris.data
y = iris.target

In [7]:
# Create a Decision Tree classifier
dtc = DecisionTreeClassifier()
# Define the hyperparameter grid to search over
param_grid = {'criterion': ['gini', 'entropy'], 
              'max_depth': [3, 4, 5, 6],
              'min_samples_split': [2, 3, 4, 5],
              'min_samples_leaf': [1, 2, 3, 4]}
# Perform grid search with 5-fold cross-validation and the accuracy score as the scoring metric
grid_search = GridSearchCV(dtc, param_grid, cv = 5, scoring = 'accuracy') 
grid_search.fit(X, y)
# Print the best hyperparameters and the corresponding accuracy score
print("Best hyperparameters:", grid_search.best_params_) 
print("Best accuracy score:", grid_search.best_score_)

Best hyperparameters: {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 3}
Best accuracy score: 0.9733333333333334
