# AutoML and Hyperparameter Optimization

This notebook demonstrates:
- Optuna for hyperparameter optimization
- Pruning (early stopping of bad trials)
- Multi-objective optimization
- Comparing HPO strategies

**Requirements**: `pip install optuna`

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import optuna
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

optuna.logging.set_verbosity(optuna.logging.WARNING)

housing = fetch_california_housing(as_frame=True)
X, y = housing.data, housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'Train: {X_train.shape}, Test: {X_test.shape}')

## 1. Basic Optuna

In [None]:
# Why: Optuna uses Bayesian optimization (Tree-structured Parzen Estimator) to sample
# promising hyperparameter regions, converging much faster than grid search. suggest_float
# with log=True samples learning_rate on a log scale because its effect on performance
# is multiplicative, not additive.
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 2, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
    }
    model = GradientBoostingRegressor(**params, random_state=42)
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    return -scores.mean()

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, show_progress_bar=True)

print(f'Best MSE: {study.best_value:.4f}')
print(f'Best params: {study.best_params}')

In [None]:
# Visualization
fig = optuna.visualization.plot_optimization_history(study)
fig.show()

fig = optuna.visualization.plot_param_importances(study)
fig.show()

## 2. Evaluate Best Model

In [None]:
# Why: Comparing the optimized model against the default model quantifies the actual
# benefit of HPO — if the improvement is small, the default hyperparameters may be
# sufficient and the optimization effort is not worthwhile for this dataset.
best = GradientBoostingRegressor(**study.best_params, random_state=42)
best.fit(X_train, y_train)
y_pred = best.predict(X_test)

# Compare with default
default = GradientBoostingRegressor(random_state=42)
default.fit(X_train, y_train)
y_pred_def = default.predict(X_test)

print(f'Default   - R²: {r2_score(y_test, y_pred_def):.4f}, RMSE: {np.sqrt(mean_squared_error(y_test, y_pred_def)):.4f}')
print(f'Optimized - R²: {r2_score(y_test, y_pred):.4f}, RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}')

## 3. Multi-Objective Optimization

In [None]:
# Why: Multi-objective optimization finds Pareto-optimal trade-offs between accuracy
# and model complexity — in production, a slightly less accurate but 10x simpler model
# may be preferred for faster inference and easier maintenance.
def multi_obj(trial):
    n_est = trial.suggest_int('n_estimators', 10, 500)
    depth = trial.suggest_int('max_depth', 2, 10)
    lr = trial.suggest_float('learning_rate', 0.01, 0.3, log=True)
    
    model = GradientBoostingRegressor(n_estimators=n_est, max_depth=depth, learning_rate=lr, random_state=42)
    scores = cross_val_score(model, X_train, y_train, cv=3, scoring='neg_mean_squared_error')
    mse = -scores.mean()
    # Why: n_estimators * 2^max_depth approximates total leaf nodes, serving as a proxy
    # for model complexity (memory footprint and inference latency).
    complexity = n_est * (2 ** depth)
    return mse, complexity

mo_study = optuna.create_study(directions=['minimize', 'minimize'])
mo_study.optimize(multi_obj, n_trials=50, show_progress_bar=True)

# Pareto front
pareto = mo_study.best_trials
print(f'Pareto solutions: {len(pareto)}')
for t in pareto[:5]:
    print(f'  MSE={t.values[0]:.4f}, Complexity={t.values[1]:.0f}')

In [None]:
# Plot Pareto front
all_mse = [t.values[0] for t in mo_study.trials if t.state == optuna.trial.TrialState.COMPLETE]
all_cplx = [t.values[1] for t in mo_study.trials if t.state == optuna.trial.TrialState.COMPLETE]
pareto_mse = [t.values[0] for t in pareto]
pareto_cplx = [t.values[1] for t in pareto]

plt.figure(figsize=(10, 6))
plt.scatter(all_mse, all_cplx, alpha=0.4, label='All trials')
plt.scatter(pareto_mse, pareto_cplx, c='red', s=80, marker='*', label='Pareto front')
plt.xlabel('MSE (lower=better)'); plt.ylabel('Complexity (lower=simpler)')
plt.title('Multi-Objective: Accuracy vs Complexity')
plt.legend(); plt.tight_layout(); plt.show()