In [1]:
import numpy as np
import pandas as pd


from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.linear_model import LinearRegression , Ridge , Lasso, LogisticRegression
from sklearn.preprocessing import StandardScaler, RobustScaler, QuantileTransformer, OneHotEncoder, FunctionTransformer, PolynomialFeatures, MaxAbsScaler, MinMaxScaler
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score,accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
import warnings
warnings.filterwarnings('ignore')

# **Why Optuna is usually better than Grid / Random search**

* *Smarter sampling (Bayesian/TPE) — Optuna’s default TPESampler models which regions of the search space work well and focuses future trials there. Grid/Random are blind.*

* *Fewer trials → less compute — because it learns from past trials, Optuna often finds good hyperparams with far fewer evaluations.*

* *Early stopping / pruning — Optuna can stop unpromising trials early (via pruners), saving huge time for expensive trainings (e.g., deep nets or large XGBoost runs).*

* *Flexible search spaces — handles continuous ranges, log scales, categorical choices, conditional hyperparameters (e.g., only suggest subsample if booster == 'gbtree').*

* *Good reproducibility & logging — persistent storage (RDB), visualization tools (plot_optimization_history, plot_param_importances) and trial history make analysis easy.*

* *Parallel & distributed-friendly — can run trials in parallel with a shared study DB (and integrates with Ray Tune, Dask, etc.).*

* *Multi-objective — supports optimizing multiple objectives (e.g., accuracy and inference time).*
---
# **When to use each method**
## **Grid Search**
Use when:

* *The search space is very small and you want exhaustive coverage (e.g., 2–3 hyperparams with a few values each).*

*  *You need deterministic, exhaustive evaluation for reproducibility or debugging.*

Don't use when:
* You have many hyperparams or large ranges — grid explodes combinatorially.*

## **Randomized Search**
Use when:

* *You have many hyperparameters or large ranges and want a cheap baseline.*

* *Evaluation is cheap and you can try many random combos.*

* *You want a simple, robust baseline before applying smarter methods.*

Don't use when:
* *Each trial is expensive it wastes budget on poor regions because it doesn't learn.*

## **Optuna (Bayesian/TPE + pruning)**
Use when:

* *Trials are expensive (training large models, long CV). Optuna will find good settings faster.*

* *You have continuous hyperparams, conditional parameters, or need pruning.*

* *You want visualizations, persistent results, and easy integration.*


Don’t use when:

* *Search dimension is extremely high (>>20–30) and individual parameters are independent — random search may be competitive.*

* *You want a tiny, guaranteed exhaustive search (then Grid is fine).*
---
## **Other hyperparameter optimization tools / frameworks (short list & when to pick them)**

| **Tool**          | **Algo(s)**                        | **Best for**                                                   |
| ------------- | ------------------------------ | ---------------------------------------------------------- |
| Optuna        | TPE + pruners, multi-objective | Expensive trials, conditional spaces, parallel runs        |
| Hyperopt      | TPE                            | Lightweight TPE usage                                      |
| skopt         | GP                             | Low-dim continuous problems                                |
| Ax/BoTorch    | GP, Botorch                    | Research/complex Bayesian optimization                     |
| SMAC3         | RF surrogate                   | Categorical/conditional heavy spaces                       |
| BOHB          | BO + Hyperband                 | Multi-fidelity, many cheap proxies                         |
| Ray Tune      | Orchestration                  | Distributed large-scale tuning (works with many searchers) |
| Grid / Random | Exhaustive/Random              | Simple baselines or tiny spaces                            |


#GridSearchCV

In [46]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [7]:
model = RandomForestClassifier()
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [3]:
# Hyperparameter grid
param_grid = {
    'n_estimators': [10,50,100,500],
    'max_depth': [2, 4, 8, 16, 32, 64, None],
    'min_samples_split':[2,10, 30, 50, 100, 200, 300, 700]
}

# GridSearchCV object
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

# Fit
grid_search.fit(X_train, y_train)

# Best parameters
print("Best parameters:", grid_search.best_params_)

Best parameters: {'max_depth': None, 'min_samples_split': 10, 'n_estimators': 50}
Best CV accuracy: 0.9636363636363636
Test accuracy: 1.0


In [4]:
# Best score
print("Best CV accuracy:", grid_search.best_score_)
# Evaluate on test set
print("Test accuracy:", grid_search.score(X_test, y_test))
print("Train accuracy:", grid_search.score(X_train,y_train))

Train accuracy: 0.9553571428571429


# RandomizedSearchCv

In [30]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [33]:
pipe=Pipeline([
    ('scaler',StandardScaler()),
    ('model',XGBClassifier(
        objective='multi:softprob',num_class=7,eval_metric='mlogloss',use_label_encoder=False,verbosity=0
        ))
])

#pipe.get_params()

In [34]:
import scipy.stats as stats
pram_grid={
    'model__n_estimators':[50,100,200],
    'model__max_depth':stats.randint(4,10),
    'model__learning_rate':[0.01,0.1,0.3],
    'model__reg_lambda':[0.01,0.1,1,10,100],
    'model__reg_alpha':[0.01,0.1,1,10,100],
    'scaler':[StandardScaler(),RobustScaler(),MaxAbsScaler(),MinMaxScaler()]
}

grid=RandomizedSearchCV(pipe,pram_grid,cv=5,n_jobs=-1,verbose=1,scoring='accuracy')
grid.fit(X_train,y_train)


best_params=grid.best_params_

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [35]:
pipe.set_params(**best_params)

pipe.fit(X_train,y_train,model__eval_set=[(X_test,y_test)])

[0]	validation_0-mlogloss:1.10090
[1]	validation_0-mlogloss:1.10340
[2]	validation_0-mlogloss:1.10610
[3]	validation_0-mlogloss:1.10898
[4]	validation_0-mlogloss:1.11205
[5]	validation_0-mlogloss:1.11529
[6]	validation_0-mlogloss:1.11869
[7]	validation_0-mlogloss:1.12226
[8]	validation_0-mlogloss:1.12597
[9]	validation_0-mlogloss:1.12983
[10]	validation_0-mlogloss:1.13382
[11]	validation_0-mlogloss:1.13794
[12]	validation_0-mlogloss:1.14219
[13]	validation_0-mlogloss:1.14655
[14]	validation_0-mlogloss:1.15102
[15]	validation_0-mlogloss:1.15560
[16]	validation_0-mlogloss:1.16028
[17]	validation_0-mlogloss:1.16505
[18]	validation_0-mlogloss:1.16990
[19]	validation_0-mlogloss:1.17484
[20]	validation_0-mlogloss:1.17985
[21]	validation_0-mlogloss:1.18494
[22]	validation_0-mlogloss:1.19009
[23]	validation_0-mlogloss:1.19531
[24]	validation_0-mlogloss:1.20058
[25]	validation_0-mlogloss:1.20591
[26]	validation_0-mlogloss:1.21129
[27]	validation_0-mlogloss:1.21671
[28]	validation_0-mlogloss:1.2

# Optuna

In [36]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


In [37]:
import optuna

In [38]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [39]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 10, 200)                       #write the range of the parameter
    max_depth = trial.suggest_int('max_depth', 2, 32)
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return accuracy_score(y_test, y_pred)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

study.best_params

[I 2025-10-08 17:39:44,988] A new study created in memory with name: no-name-629af135-c8fd-407e-a843-3abd4d3350cf
[I 2025-10-08 17:39:45,147] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 58, 'max_depth': 25}. Best is trial 0 with value: 1.0.
[I 2025-10-08 17:39:45,247] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 35, 'max_depth': 24}. Best is trial 0 with value: 1.0.
[I 2025-10-08 17:39:45,357] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 35, 'max_depth': 27}. Best is trial 0 with value: 1.0.
[I 2025-10-08 17:39:45,660] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 89, 'max_depth': 22}. Best is trial 0 with value: 1.0.
[I 2025-10-08 17:39:46,267] Trial 4 finished with value: 1.0 and parameters: {'n_estimators': 200, 'max_depth': 22}. Best is trial 0 with value: 1.0.
[I 2025-10-08 17:39:46,935] Trial 5 finished with value: 1.0 and parameters: {'n_estimators': 170, 'max_depth': 30}. Best is trial 0 with va

{'n_estimators': 58, 'max_depth': 25}

In [41]:
import optuna
from optuna.samplers import TPESampler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, RobustScaler, MaxAbsScaler, MinMaxScaler
from xgboost import XGBClassifier
import numpy as np

def objective(trial):

    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 4, 10)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
    reg_lambda = trial.suggest_float('reg_lambda', 0.01, 100, log=True)
    reg_alpha = trial.suggest_float('reg_alpha', 0.01, 100, log=True)
    scaler_name = trial.suggest_categorical('scaler',
                    ['standard', 'robust', 'maxabs', 'minmax'])


    scalers = {
        'standard': StandardScaler(),
        'robust': RobustScaler(),
        'maxabs': MaxAbsScaler(),
        'minmax': MinMaxScaler()
    }
    scaler = scalers[scaler_name]


    pipe = Pipeline([
        ('scaler', scaler),
        ('model', XGBClassifier(
            objective='multi:softprob',
            num_class=7,
            eval_metric='mlogloss',
            use_label_encoder=False,
            verbosity=0,
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            reg_lambda=reg_lambda,
            reg_alpha=reg_alpha
        ))
    ])

    scores = cross_val_score(pipe, X_train, y_train, cv=5, scoring='accuracy', n_jobs=-1)
    return np.mean(scores)


In [42]:
study = optuna.create_study(
    direction='maximize',
    sampler=TPESampler(seed=42)
)

study.optimize(objective, n_trials=50, show_progress_bar=True)

[I 2025-10-08 17:48:33,814] A new study created in memory with name: no-name-99aadb69-119c-437b-a719-1d7e15b6076f


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-10-08 17:48:36,091] Trial 0 finished with value: 0.9371541501976285 and parameters: {'n_estimators': 106, 'max_depth': 10, 'learning_rate': 0.1205712628744377, 'reg_lambda': 2.481040974867813, 'reg_alpha': 0.04207988669606638, 'scaler': 'maxabs'}. Best is trial 0 with value: 0.9371541501976285.
[I 2025-10-08 17:48:36,338] Trial 1 finished with value: 0.9371541501976285 and parameters: {'n_estimators': 156, 'max_depth': 4, 'learning_rate': 0.2708160864249968, 'reg_lambda': 21.368329072358772, 'reg_alpha': 0.07068974950624607, 'scaler': 'minmax'}. Best is trial 0 with value: 0.9371541501976285.
[I 2025-10-08 17:48:36,515] Trial 2 finished with value: 0.9371541501976285 and parameters: {'n_estimators': 115, 'max_depth': 6, 'learning_rate': 0.08012737503998542, 'reg_lambda': 0.03613894271216528, 'reg_alpha': 0.14742753159914673, 'scaler': 'maxabs'}. Best is trial 0 with value: 0.9371541501976285.
[I 2025-10-08 17:48:36,721] Trial 3 finished with value: 0.9454545454545455 and parame

In [45]:
optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_param_importances(study)

In [43]:
print("Best Accuracy:", study.best_value)
print("Best Params:", study.best_params)

Best Accuracy: 0.9545454545454547
Best Params: {'n_estimators': 166, 'max_depth': 6, 'learning_rate': 0.026477567045245964, 'reg_lambda': 0.014264892990890433, 'reg_alpha': 4.018492967981209, 'scaler': 'maxabs'}


In [44]:
best_params = study.best_params
best_scaler = {
    'standard': StandardScaler(),
    'robust': RobustScaler(),
    'maxabs': MaxAbsScaler(),
    'minmax': MinMaxScaler()
}[best_params['scaler']]

best_model = Pipeline([
    ('scaler', best_scaler),
    ('model', XGBClassifier(
        objective='multi:softprob',
        num_class=7,
        eval_metric='mlogloss',
        use_label_encoder=False,
        verbosity=0,
        **{k: v for k, v in best_params.items() if k != 'scaler'}
    ))
])
#best_model.fit(X_train, y_train)