# Lab 1.6.2: Hyperparameter Optimization - SOLUTIONS

This notebook contains complete solutions to all exercises from Lab 1.6.2.

In [None]:
# Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time
import warnings
warnings.filterwarnings('ignore')

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error
import xgboost as xgb
import optuna

np.random.seed(42)

# Plotting style with fallback for older matplotlib versions
try:
    plt.style.use('seaborn-v0_8-whitegrid')
except OSError:
    try:
        plt.style.use('seaborn-whitegrid')
    except OSError:
        pass  # Use default style

# Load data
housing = fetch_california_housing()
X, y = housing.data, housing.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_sub, X_val, y_train_sub, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

print("âœ… Data loaded successfully!")

## Exercise 1 Solution: XGBoost Pruning Callback

In [None]:
# Exercise 1: Using Optuna's pruning to speed up optimization

def objective_with_pruning(trial):
    """
    Objective function with early stopping/pruning.
    Uses XGBoost's native API for fine-grained control.
    """
    # Hyperparameter suggestions
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'device': 'cuda',
        'verbosity': 0
    }
    
    # Create DMatrix for XGBoost native API
    dtrain = xgb.DMatrix(X_train_sub, label=y_train_sub)
    dval = xgb.DMatrix(X_val, label=y_val)
    
    # Pruning callback
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, 'validation-rmse')
    
    # Train with early stopping and pruning
    try:
        bst = xgb.train(
            params,
            dtrain,
            num_boost_round=500,
            evals=[(dval, 'validation')],
            early_stopping_rounds=20,
            callbacks=[pruning_callback],
            verbose_eval=False
        )
    except optuna.TrialPruned:
        raise
    
    # Get best score
    return bst.best_score

# Run optimization with pruning
print("Running Optuna with pruning...")
pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=50)

study_pruning = optuna.create_study(
    direction='minimize',
    pruner=pruner,
    study_name='xgboost_pruning'
)

start = time()
study_pruning.optimize(objective_with_pruning, n_trials=50, show_progress_bar=True)
pruning_time = time() - start

# Results
print(f"\nOptimization with Pruning Complete!")
print(f"Time: {pruning_time:.1f} seconds")
print(f"Best RMSE: {study_pruning.best_value:.4f}")
print(f"Pruned trials: {len([t for t in study_pruning.trials if t.state == optuna.trial.TrialState.PRUNED])}")
print(f"Completed trials: {len([t for t in study_pruning.trials if t.state == optuna.trial.TrialState.COMPLETE])}")

## Exercise 2 Solution: LightGBM Tuning

In [None]:
# Exercise 2: Tuning LightGBM with Optuna

try:
    import lightgbm as lgb
    LGBM_AVAILABLE = True
except ImportError:
    LGBM_AVAILABLE = False
    print("LightGBM not installed. Install with: pip install lightgbm")

if LGBM_AVAILABLE:
    def lgb_objective(trial):
        """
        Optuna objective for LightGBM.
        LightGBM uses different parameter names!
        """
        params = {
            'objective': 'regression',
            'metric': 'rmse',
            # LightGBM-specific parameters
            'num_leaves': trial.suggest_int('num_leaves', 20, 300),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'n_estimators': trial.suggest_int('n_estimators', 100, 500),
            'max_depth': trial.suggest_int('max_depth', 3, 12),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
            'device': 'gpu',
            'verbose': -1,
            'random_state': 42
        }
        
        model = lgb.LGBMRegressor(**params)
        cv_scores = cross_val_score(
            model, X_train, y_train,
            cv=5,
            scoring='neg_root_mean_squared_error',
            n_jobs=-1
        )
        
        return -cv_scores.mean()
    
    # Run LightGBM optimization
    print("Running Optuna for LightGBM...")
    study_lgb = optuna.create_study(direction='minimize', study_name='lightgbm_housing')
    study_lgb.optimize(lgb_objective, n_trials=50, show_progress_bar=True)
    
    print(f"\nLightGBM Results:")
    print(f"Best RMSE: {study_lgb.best_value:.4f}")
    print(f"\nBest Parameters:")
    for key, value in study_lgb.best_params.items():
        print(f"  {key}: {value}")
    
    # Compare with XGBoost
    print(f"\n Comparison:")
    print(f"  XGBoost best RMSE: {study_pruning.best_value:.4f}")
    print(f"  LightGBM best RMSE: {study_lgb.best_value:.4f}")

## Exercise 3 Solution: Multi-Objective Optimization

In [None]:
# Exercise 3: Multi-objective optimization (accuracy AND speed)

def multi_objective(trial):
    """
    Optimize for both RMSE and training time.
    Returns tuple: (rmse, training_time)
    """
    params = {
        'objective': 'reg:squarederror',
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'device': 'cuda',
        'verbosity': 0,
        'random_state': 42
    }
    
    model = xgb.XGBRegressor(**params)
    
    # Time training
    start = time()
    model.fit(X_train, y_train)
    train_time = time() - start
    
    # Evaluate
    pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    
    return rmse, train_time

# Create multi-objective study
print("Running multi-objective optimization...")
study_multi = optuna.create_study(
    directions=['minimize', 'minimize'],  # Minimize both RMSE and time
    study_name='xgboost_multi'
)

study_multi.optimize(multi_objective, n_trials=50, show_progress_bar=True)

# Get Pareto front
pareto_trials = study_multi.best_trials

print(f"\nMulti-objective Results:")
print(f"Number of Pareto-optimal solutions: {len(pareto_trials)}")
print(f"\nPareto Front:")
print(f"{'RMSE':>10} | {'Time (s)':>10}")
print("-" * 25)
for trial in pareto_trials:
    print(f"{trial.values[0]:>10.4f} | {trial.values[1]:>10.3f}")

# Visualize Pareto front
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 6))

# All trials
all_rmse = [t.values[0] for t in study_multi.trials if t.state == optuna.trial.TrialState.COMPLETE]
all_time = [t.values[1] for t in study_multi.trials if t.state == optuna.trial.TrialState.COMPLETE]

ax.scatter(all_rmse, all_time, alpha=0.5, label='All Trials')

# Pareto front
pareto_rmse = [t.values[0] for t in pareto_trials]
pareto_time = [t.values[1] for t in pareto_trials]
ax.scatter(pareto_rmse, pareto_time, c='red', s=100, marker='*', label='Pareto Front')

ax.set_xlabel('RMSE (lower is better)')
ax.set_ylabel('Training Time (s) (lower is better)')
ax.set_title('Multi-Objective Optimization: RMSE vs Training Time')
ax.legend()

plt.tight_layout()
plt.show()

print("\nInterpretation:")
print("  The Pareto front shows the trade-off between accuracy and speed.")
print("  Points on the front are 'optimal' - improving one metric worsens the other.")

## Key Takeaways

1. **Pruning dramatically speeds up optimization** by stopping unpromising trials early
2. **LightGBM often matches XGBoost** with faster training (different parameter names!)
3. **Multi-objective optimization** helps find the best trade-off between competing goals
4. **The Pareto front** shows all optimal solutions - no single "best" when goals conflict