# Optuna and NNs

In [None]:
# Optuna + MLPRegressor (calmer search, ≤20 units/layer, early stopping)
# - 80/10/10 split
# - StandardScaler fit on train; applied to val/test
# - Optuna minimizes VALIDATION MAE
# - Retrain best on TRAIN+VAL; evaluate on TEST (R², MAE, MAPE)

import warnings
warnings.filterwarnings("ignore")

import optuna
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_absolute_percentage_error

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

RANDOM_STATE = 42

# 1) Load data
data = fetch_california_housing(as_frame=True)
X = data.frame.drop(columns=["MedHouseVal"])
y = data.frame["MedHouseVal"]

# 2) 80/10/10 split
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=RANDOM_STATE
)

# 3) Scale features (train-only stats)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)

# 4) Optuna objective — calmer search
def objective(trial):
    # 1–3 layers, EACH up to 20 units (small MLPs)
    n_layers = trial.suggest_int("n_layers", 1, 3)
    hidden_sizes = []
    for i in range(n_layers):
        units = trial.suggest_int(f"n_units_l{i+1}", 4, 20, step=4)  # <= 20 units
        hidden_sizes.append(units)
    hidden_sizes = tuple(hidden_sizes)

    params = {
        "hidden_layer_sizes": hidden_sizes,
        "activation": trial.suggest_categorical("activation", ["relu", "tanh"]),
        "solver": "adam",
        "alpha": trial.suggest_float("alpha", 1e-6, 1e-2, log=True),            # narrower L2
        "learning_rate_init": trial.suggest_float("learning_rate_init", 1e-4, 1e-2, log=True),
        "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256]),  # modest sizes
        "max_iter": 1000,
        "early_stopping": True,            # <-- enable early stopping
        "validation_fraction": 0.1,        # uses 10% of TRAIN internally
        "n_iter_no_change": 20,            # patience
        "random_state": RANDOM_STATE,
        "shuffle": True,
    }

    model = MLPRegressor(**params)
    model.fit(X_train_s, y_train)            # early stopping uses TRAIN split only
    y_val_pred = model.predict(X_val_s)      # evaluate on external VAL
    return mean_absolute_error(y_val, y_val_pred)

sampler = optuna.samplers.TPESampler(seed=RANDOM_STATE)
study = optuna.create_study(direction="minimize", sampler=sampler)
study.optimize(objective, n_trials=25, show_progress_bar=True)  # fewer trials

print("\nBest trial:")
print(f"  Validation MAE: {study.best_value:.4f}")
for k, v in study.best_params.items():
    print(f"    {k}: {v}")

# 5) Retrain best on TRAIN+VAL, then evaluate on TEST
best = study.best_params
n_layers = best["n_layers"]
hidden_sizes = tuple(best[f"n_units_l{i+1}"] for i in range(n_layers))

final_params = {
    "hidden_layer_sizes": hidden_sizes,
    "activation": best["activation"],
    "solver": "adam",
    "alpha": best["alpha"],
    "learning_rate_init": best["learning_rate_init"],
    "batch_size": best["batch_size"],
    "max_iter": 2000,
    "early_stopping": True,          # keep early stopping
    "validation_fraction": 0.1,
    "n_iter_no_change": 25,
    "random_state": RANDOM_STATE,
    "shuffle": True,
}

# Refit scaler on TRAIN+VAL for final model
scaler_final = StandardScaler().fit(np.vstack([X_train, X_val]))
X_trval_s = scaler_final.transform(np.vstack([X_train, X_val]))
y_trval   = np.concatenate([y_train.values, y_val.values])
X_test_sf = scaler_final.transform(X_test)

final_model = MLPRegressor(**final_params)
final_model.fit(X_trval_s, y_trval)

def metrics(y_true, y_pred):
    return {
        "R2": r2_score(y_true, y_pred),
        "MAE": mean_absolute_error(y_true, y_pred),
        "MAPE": mean_absolute_percentage_error(y_true, y_pred),
    }

# Evaluate
y_pred_train = final_model.predict(scaler_final.transform(X_train))
y_pred_val   = final_model.predict(scaler_final.transform(X_val))
y_pred_test  = final_model.predict(X_test_sf)

rows = [
    {"split": "train", **metrics(y_train, y_pred_train)},
    {"split": "val",   **metrics(y_val,   y_pred_val)},
    {"split": "test",  **metrics(y_test,  y_pred_test)},
]
metrics_df = pd.DataFrame(rows)
print("\n=== Final Metrics (train+val retrain, early stopping) ===")
print(metrics_df.round(4).to_string(index=False))

# 6) Test scatter
def scatter_with_reference(y_true, y_pred, title):
    plt.figure(figsize=(6,6))
    plt.scatter(y_true, y_pred, alpha=0.3, s=10)
    lo = min(np.min(y_true), np.min(y_pred))
    hi = max(np.max(y_true), np.max(y_pred))
    plt.plot([lo, hi], [lo, hi], linewidth=1)
    plt.xlabel("Actual MedHouseVal")
    plt.ylabel("Predicted MedHouseVal")
    plt.title(title)
    plt.tight_layout()
    plt.show()

scatter_with_reference(y_test, y_pred_test, "Predicted vs Actual — Test (Optuna-tuned, small MLP)")



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\dww05002\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.5-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Downloading sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl.metadata (9.8 kB)
Collecting PyYAML (from optuna)
  Downloading pyyaml-6.0.3-cp311-cp311-win_amd64.whl.metadata (2.4 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)
  Downloading greenlet-3.2.4-cp311-cp311-win_amd64.whl.metadata (4.2 kB)
Collecting MarkupSafe>=0.9.2 (from Mako->alembic>=1.5.0->optuna)
  Downloading markupsafe-3.0.3-cp311-cp311-win_amd64.whl.metadata (2.8 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
   ---------------------------------------- 0.0/400.9 kB ? eta -:--:--


[I 2025-10-08 11:52:42,885] A new study created in memory with name: no-name-5df68409-07c8-4623-aee7-0597f43f0e2b
Best trial: 0. Best value: 0.354051:   2%|▎         | 1/40 [01:38<1:04:08, 98.69s/it]

[I 2025-10-08 11:54:21,560] Trial 0 finished with value: 0.35405134313968545 and parameters: {'n_layers': 2, 'n_units_l1': 512, 'n_units_l2': 384, 'activation': 'relu', 'alpha': 6.025215736203862e-06, 'learning_rate_init': 0.00014936568554617635, 'batch_size': 512}. Best is trial 0 with value: 0.35405134313968545.


Best trial: 0. Best value: 0.354051:   5%|▌         | 2/40 [08:44<3:04:32, 291.38s/it]

[I 2025-10-08 12:01:27,820] Trial 1 finished with value: 0.39762959352902016 and parameters: {'n_layers': 3, 'n_units_l1': 128, 'n_units_l2': 96, 'n_units_l3': 96, 'activation': 'tanh', 'alpha': 0.00014445251022763054, 'learning_rate_init': 0.0007476312062252305, 'batch_size': 32}. Best is trial 0 with value: 0.35405134313968545.


Best trial: 0. Best value: 0.354051:   8%|▊         | 3/40 [08:56<1:41:00, 163.79s/it]

[I 2025-10-08 12:01:39,799] Trial 2 finished with value: 0.47543476778713895 and parameters: {'n_layers': 3, 'n_units_l1': 128, 'n_units_l2': 288, 'n_units_l3': 320, 'activation': 'tanh', 'alpha': 7.122305833333869e-06, 'learning_rate_init': 0.00015673095467235422, 'batch_size': 64}. Best is trial 0 with value: 0.35405134313968545.
