## Hyperparameter Tuning using Optuna

In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns 

In [2]:
healthexp = sns.load_dataset('healthexp')

In [11]:
healthexp

X = healthexp[['Year', 'Country', 'Spending_USD']]

y = healthexp[['Life_Expectancy']]

In [15]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(drop='first')

enc.fit(X_train[['Country']])

encoded_columns = list(enc.get_feature_names_out(['Country']))

X_train[encoded_columns] = enc.transform(X_train[['Country']]).toarray()

X_val[encoded_columns] = enc.transform(X_val[['Country']]).toarray()

In [24]:
X_train.drop(['Country'], inplace=True, axis=1)
X_val.drop(['Country'], inplace=True, axis=1)


In [32]:
from sklearn.metrics import r2_score

def metrics(p, X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val): 

    p.fit(X_train, y_train)

    y_preds = p.predict(X_val)

    print(f'R^2 Score is {r2_score(y_true=y_val, y_pred=y_preds)}')


In [34]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression(n_jobs=-1)

metrics(lr)

R^2 Score is 0.9887186991451886


In [35]:
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor()

metrics(rfr)

R^2 Score is 0.9869062790406754


  return fit_method(estimator, *args, **kwargs)


In [None]:
# Stacking

from sklearn.ensemble import StackingRegressor

sr = StackingRegressor(estimators=[
    ('rfr', RandomForestRegressor()),
    ('lr', LinearRegression())
])

metrics(sr)

  y = column_or_1d(y, warn=True)


R^2 Score is 0.9938283657031628


In [None]:
# Hyperparameter Fine Tuning with Random Search CV and Grid Search CV 

In [41]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from scipy.stats import randint

param_grid = {
    'n_estimators': randint(low=40, high=600),
    'max_depth': randint(low=1, high = 70)
}

rnd_search = RandomizedSearchCV(rfr, param_distributions=param_grid, n_iter=50, cv=4, scoring='r2', random_state=42)

rnd_search.fit(X_train, y_train)

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **

In [42]:
rnd_search.best_params_

{'max_depth': 14, 'n_estimators': 281}

In [52]:
# Hyperparameter Fine Tuning Using Optuna 

import optuna

from sklearn.model_selection import cross_val_score

from optuna import Trial

def objective(trial: Trial): 

    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    max_depth = trial.suggest_int('max_depth', 10, 50)
    min_sample_leaf = trial.suggest_int('min_sample_leaf', 1, 32)

    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_leaf=min_sample_leaf
    )

    score = cross_val_score(model, X_train, y_train, cv=4, scoring='r2')

    return score.mean()


In [53]:
study = optuna.create_study(direction='maximize')

[I 2025-09-04 16:34:52,997] A new study created in memory with name: no-name-5877de94-3824-4cdb-b923-b258aff53439


In [54]:
study.optimize(objective, n_trials=30)

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
[I 2025-09-04 16:35:28,611] Trial 0 finished with value: 0.7862446013469162 and parameters: {'n_estimators': 121, 'max_depth': 46, 'min_sample_leaf': 15}. Best is trial 0 with value: 0.7862446013469162.
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
[I 2025-09-04 16:35:29,129] Trial 1 finished with value: 0.8675513354239175 and parameters: {'n_estimators': 282, 'max_depth': 10, 'min_sample_leaf': 8}. Best is trial 1 with value: 0.8675513354239175.
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
[I 2025-09-04 16:35