# Hyperparameter
tuning is a crucial step in optimizing machine learning models. It involves selecting the best set of hyperparameters to improve a model's performance. Here are some common hyperparameter tuning techniques:

# Grid Search:
Grid Search exhaustively searches over a specified parameter grid to find the best combination of hyperparameters.
It evaluates all possible combinations of hyperparameters to determine the best one based on cross-validation performance.
# Random Search:
Random Search randomly samples a specified number of hyperparameter combinations from a grid.
It is more efficient than Grid Search because it does not evaluate all possible combinations, making it faster and often finding good hyperparameters with fewer iterations.
# Bayesian Optimization:
Bayesian Optimization builds a probabilistic model of the function mapping hyperparameters to the objective score (e.g., accuracy).
It iteratively updates the model with new hyperparameter sets, balancing exploration and exploitation to find the optimal set efficiently.

In [1]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [2]:
data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
model = RandomForestClassifier()

# Define hyperparameter grid for Grid Search
param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Define hyperparameter grid for Random Search
param_dist = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'min_samples_split': [2, 5, 10, 15, 20]
}


In [4]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_grid = grid_search.best_estimator_
y_pred = best_grid.predict(X_test)
print(f'Grid Search Best Parameters: {grid_search.best_params_}')
print(f'Grid Search Accuracy: {accuracy_score(y_test, y_pred)}')


Grid Search Best Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 10}
Grid Search Accuracy: 1.0


In [5]:
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=50, cv=5, n_jobs=-1, random_state=42)
random_search.fit(X_train, y_train)

best_random = random_search.best_estimator_
y_pred = best_random.predict(X_test)
print(f'Random Search Best Parameters: {random_search.best_params_}')
print(f'Random Search Accuracy: {accuracy_score(y_test, y_pred)}')


Random Search Best Parameters: {'n_estimators': 50, 'min_samples_split': 10, 'max_depth': 40}
Random Search Accuracy: 1.0


In [6]:
# For Bayesian Optimization, we can use libraries like hyperopt:


from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from sklearn.model_selection import cross_val_score

def objective(params):
    model = RandomForestClassifier(**params)
    accuracy = cross_val_score(model, X_train, y_train, cv=5).mean()
    return {'loss': -accuracy, 'status': STATUS_OK}

param_space = {
    'n_estimators': hp.choice('n_estimators', [10, 50, 100, 200]),
    'max_depth': hp.choice('max_depth', [None, 10, 20, 30, 40, 50]),
    'min_samples_split': hp.choice('min_samples_split', [2, 5, 10, 15, 20])
}

trials = Trials()
best_params = fmin(fn=objective, space=param_space, algo=tpe.suggest, max_evals=50, trials=trials)

# Convert best_params back to the values used in model
best_params['n_estimators'] = [10, 50, 100, 200][best_params['n_estimators']]
best_params['max_depth'] = [None, 10, 20, 30, 40, 50][best_params['max_depth']]
best_params['min_samples_split'] = [2, 5, 10, 15, 20][best_params['min_samples_split']]

best_bayes = RandomForestClassifier(**best_params)
best_bayes.fit(X_train, y_train)
y_pred = best_bayes.predict(X_test)
print(f'Bayesian Optimization Best Parameters: {best_params}')
print(f'Bayesian Optimization Accuracy: {accuracy_score(y_test, y_pred)}')

100%|██████████| 50/50 [00:37<00:00,  1.34trial/s, best loss: -0.9523809523809523]
Bayesian Optimization Best Parameters: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 100}
Bayesian Optimization Accuracy: 1.0
