# Bayesian Optimization for Hyperparameter Tuning

In [2]:
pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.2-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.2 colorlog-6.9.0 optuna-4.4.0


**Step 1: Import and Prepare Data**

In [4]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import optuna

In [5]:
x, y = load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

**Step 2: Define the Objective Function**

This function:

*  Takes a trial (suggested hyperparams)
*  Builds a model
*  Runs cross-validation
*  Returns score (to maximize)

In [8]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    max_depth = trial.suggest_int('max_depth', 3, 15)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])

    clf = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        criterion=criterion,
        random_state=42
    )

    # 5-fold cross-validation
    score = cross_val_score(clf, x_train, y_train, cv=5, scoring='accuracy').mean()
    return score


**Step 3: Run the Optimization**

In [9]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)  # Try 20 intelligently selected combinations


[I 2025-06-19 07:03:49,927] A new study created in memory with name: no-name-ad636e8d-b936-4dbb-a600-c71529977521
[I 2025-06-19 07:03:52,273] Trial 0 finished with value: 0.95 and parameters: {'n_estimators': 224, 'max_depth': 14, 'criterion': 'entropy'}. Best is trial 0 with value: 0.95.
[I 2025-06-19 07:03:54,613] Trial 1 finished with value: 0.95 and parameters: {'n_estimators': 264, 'max_depth': 8, 'criterion': 'entropy'}. Best is trial 0 with value: 0.95.
[I 2025-06-19 07:03:55,182] Trial 2 finished with value: 0.95 and parameters: {'n_estimators': 58, 'max_depth': 8, 'criterion': 'entropy'}. Best is trial 0 with value: 0.95.
[I 2025-06-19 07:03:58,051] Trial 3 finished with value: 0.95 and parameters: {'n_estimators': 241, 'max_depth': 14, 'criterion': 'entropy'}. Best is trial 0 with value: 0.95.
[I 2025-06-19 07:03:58,996] Trial 4 finished with value: 0.95 and parameters: {'n_estimators': 87, 'max_depth': 3, 'criterion': 'gini'}. Best is trial 0 with value: 0.95.
[I 2025-06-19 

**Step 4: Get Best Hyperparameters**

In [10]:
print("Best Parameters:", study.best_params)

Best Parameters: {'n_estimators': 295, 'max_depth': 7, 'criterion': 'entropy'}


**Step 5: Evaluate Best Model**

In [12]:
best_params = study.best_params
best_model = RandomForestClassifier(**best_params, random_state=42)
best_model.fit(x_train, y_train)

y_pred = best_model.predict(x_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))


Test Accuracy: 1.0
