# Optuna and NNs

In [None]:
# Optuna + MLPRegressor on California Housing
# - 80/10/10 split (same as before)
# - StandardScaler fit on train; applied to val/test
# - Optuna tunes MLP hyperparams by minimizing validation MAE
# - Retrain best model on train+val; evaluate on test (R², MAE, MAPE)
# - Scatter plot (pred vs actual) for test

%pip install optuna

import warnings
warnings.filterwarnings("ignore")  # MLP may emit ConvergenceWarning

import optuna
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

RANDOM_STATE = 42

# 1) Load data
data = fetch_california_housing(as_frame=True)
X = data.frame.drop(columns=["MedHouseVal"])
y = data.frame["MedHouseVal"]

# 2) 80/10/10 split (fixed random_state for reproducibility)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=RANDOM_STATE
)

# 3) Scale features with train-only statistics
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)

# 4) Define Optuna objective: minimize validation MAE
def objective(trial):
    # Architecture: 1–3 layers, 32–512 units each
    n_layers = trial.suggest_int("n_layers", 1, 3)
    hidden_sizes = []
    for i in range(n_layers):
        units = trial.suggest_int(f"n_units_l{i+1}", 32, 512, step=32)
        hidden_sizes.append(units)
    hidden_sizes = tuple(hidden_sizes)

    params = {
        "hidden_layer_sizes": hidden_sizes,
        "activation": trial.suggest_categorical("activation", ["relu", "tanh"]),
        "solver": "adam",  # stable choice for MLP
        "alpha": trial.suggest_float("alpha", 1e-6, 1e-1, log=True),  # L2
        "learning_rate_init": trial.suggest_float("learning_rate_init", 1e-4, 1e-1, log=True),
        "batch_size": trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512]),
        "max_iter": 1000,                # give it room to converge
        "early_stopping": False,         # we use external val set; avoid internal split
        "random_state": RANDOM_STATE,
        "shuffle": True,
    }

    model = MLPRegressor(**params)
    model.fit(X_train_s, y_train)
    y_val_pred = model.predict(X_val_s)
    val_mae = mean_absolute_error(y_val, y_val_pred)
    return val_mae

sampler = optuna.samplers.TPESampler(seed=RANDOM_STATE)
study = optuna.create_study(direction="minimize", sampler=sampler)
study.optimize(objective, n_trials=40, show_progress_bar=True)

print("\nBest trial:")
print(f"  Validation MAE: {study.best_value:.4f}")
print("  Params:")
for k, v in study.best_params.items():
    print(f"    {k}: {v}")

# 5) Retrain best model on TRAIN+VAL, evaluate on TEST
best = study.best_params
# Rebuild hidden_layer_sizes from params
n_layers = best["n_layers"]
hidden_sizes = tuple(best[f"n_units_l{i+1}"] for i in range(n_layers))

final_params = {
    "hidden_layer_sizes": hidden_sizes,
    "activation": best["activation"],
    "solver": "adam",
    "alpha": best["alpha"],
    "learning_rate_init": best["learning_rate_init"],
    "batch_size": best["batch_size"],
    "max_iter": 2000,       # a bit more room to fully fit
    "early_stopping": False,
    "random_state": RANDOM_STATE,
    "shuffle": True,
}

# Combine train+val
X_trval = np.vstack([X_train_s, X_val_s])
y_trval = np.concatenate([y_train.values, y_val.values])

final_model = MLPRegressor(**final_params)
final_model.fit(X_trval, y_trval)

# Metrics helper
def metrics(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "MAE": mean_absolute_error(y_true, y_pred),
        "MAPE": mean_absolute_percentage_error(y_true, y_pred),
    }

# Evaluate on all splits for completeness
y_pred_train = final_model.predict(X_train_s)
y_pred_val   = final_model.predict(X_val_s)
y_pred_test  = final_model.predict(X_test_s)

rows = [
    {"split": "train", **metrics(y_train, y_pred_train)},
    {"split": "val",   **metrics(y_val,   y_pred_val)},
    {"split": "test",  **metrics(y_test,  y_pred_test)},
]
metrics_df = pd.DataFrame(rows)
print("\n=== Final Metrics (best params retrained on train+val) ===")
print(metrics_df.round(4).to_string(index=False))

# 6) Scatter plot (Test)
def scatter_with_reference(y_true, y_pred, title):
    plt.figure(figsize=(6,6))
    plt.scatter(y_true, y_pred, alpha=0.3, s=10)
    lo = min(np.min(y_true), np.min(y_pred))
    hi = max(np.max(y_true), np.max(y_pred))
    plt.plot([lo, hi], [lo, hi], linewidth=1)
    plt.xlabel("Actual MedHouseVal")
    plt.ylabel("Predicted MedHouseVal")
    plt.title(title)
    plt.tight_layout()
    plt.show()

scatter_with_reference(y_test, y_pred_test, "Predicted vs Actual — Test (Optuna-tuned MLP)")



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\dww05002\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.5-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl.metadata (9.8 kB)
Collecting PyYAML (from optuna)
  Downloading pyyaml-6.0.3-cp311-cp311-win_amd64.whl.metadata (2.4 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.4-cp311-cp311-win_amd64.whl.metadata (4.2 kB)
Collecting MarkupSafe>=0.9.2 (from Mako->alembic>=1.5.0->optuna)
  Downloading markupsafe-3.0.3-cp311-cp311-win_amd64.whl.metadata (2.8 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
   ---------------------------------------- 0.0/400.9 kB ? eta -:--:--


[I 2025-10-08 11:52:42,885] A new study created in memory with name: no-name-5df68409-07c8-4623-aee7-0597f43f0e2b
Best trial: 0. Best value: 0.354051:   2%|▎         | 1/40 [01:38<1:04:08, 98.69s/it]

[I 2025-10-08 11:54:21,560] Trial 0 finished with value: 0.35405134313968545 and parameters: {'n_layers': 2, 'n_units_l1': 512, 'n_units_l2': 384, 'activation': 'relu', 'alpha': 6.025215736203862e-06, 'learning_rate_init': 0.00014936568554617635, 'batch_size': 512}. Best is trial 0 with value: 0.35405134313968545.
