In [1]:
# imports
import os
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from datetime import datetime
from models.mlp import build_mlp
from utils.data_loader import load_for_mlp
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
from optuna.integration import TFKerasPruningCallback

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load data
features, targets = load_for_mlp(start_year=1958, end_year=2018, datasets=["exp1","exp3","exp5","exp7"])
    
mask = features[:, 10] == 1
features = np.delete(features, [10], axis=1)
features = features[mask]
targets = targets[mask]

N = 5_000_000  # number of samples
rng = np.random.default_rng(seed=42)   # seed for reproducibility
idx = rng.choice(len(features), size=N, replace=False)

filtered_features = features[idx]
filtered_targets = targets[idx]

X_train = filtered_features[:int(0.8 * len(filtered_features))]
Y_train = filtered_targets[:int(0.8 * len(filtered_targets))]
X_val = filtered_features[int(0.8 * len(filtered_features)):int(0.9 * len(filtered_features))]
Y_val = filtered_targets[int(0.8 * len(filtered_targets)):int(0.9 * len(filtered_targets))]
X_test = filtered_features[int(0.9 * len(filtered_features)):]
Y_test = filtered_targets[int(0.9 * len(filtered_targets)):]

feature_scaler = StandardScaler()

X_train = feature_scaler.fit_transform(X_train)
X_val = feature_scaler.transform(X_val)
X_test = feature_scaler.transform(X_test)

In [3]:
tf.get_logger().setLevel('ERROR')  # quieter logs

# Optional: determinism
tf.random.set_seed(42)
np.random.seed(42)

# ---------- define the objective ----------
def build_model(base_layer_size, dropout_rate, l2_reg, lr, optimizer_name):
    model = build_mlp(base_layer_size=base_layer_size, dropout_rate=dropout_rate, l2_reg=l2_reg, lr=lr)

        # ---- choose optimizer ----
    if optimizer_name == "adam":
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    elif optimizer_name == "adamw":
        optimizer = tf.keras.optimizers.AdamW(learning_rate=lr)
    elif optimizer_name == "nadam":
        optimizer = tf.keras.optimizers.Nadam(learning_rate=lr)
    elif optimizer_name == "rmsprop":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr)
    elif optimizer_name == "sgd":
        optimizer = tf.keras.optimizers.SGD(learning_rate=lr, momentum=0.9)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    model.compile(
        optimizer=optimizer,
        loss='mse',
        metrics=['mae', 'mse']
    )
    return model

def objective(trial: optuna.Trial):
    # ----- search space -----
    batch_size = trial.suggest_categorical("batch_size", [512, 1024, 2048, 4096])
    lr = trial.suggest_categorical("lr", [0.00001, 0.0001, 0.0002, 0.0005,0.001])
    base_layer_size = trial.suggest_categorical("base_layer_size", [128, 256, 512])
    dropout_rate = trial.suggest_categorical("dropout_rate", [0,0.05,0.1,0.15])
    l2_reg = trial.suggest_categorical("l2_reg", [0, 0.000001, 0.00001])
    optimizer_name = trial.suggest_categorical("optimizer", ["adam", "adamw", "nadam", "rmsprop"])

    model = build_model(base_layer_size, dropout_rate, l2_reg, lr, optimizer_name)

    # ----- callbacks (with pruning) -----
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=5, restore_best_weights=True
    )
    lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7
    )
    pruning_cb = TFKerasPruningCallback(trial, monitor='val_loss')

    # If your data are NumPy arrays X_train/Y_train, X_val/Y_val already exist in scope
    history = model.fit(
        X_train, Y_train,
        validation_data=(X_val, Y_val),
        epochs=30,
        batch_size=batch_size,
        verbose=0,
        callbacks=[early_stopping, lr_scheduler, pruning_cb],
        shuffle=True
    )

    # Return the best validation loss from training
    val_losses = history.history['val_loss']
    return float(np.min(val_losses))

# ---------- run the study ----------
study = optuna.create_study(
    direction="minimize",
    sampler=TPESampler(seed=42),
    pruner=MedianPruner(n_warmup_steps=10)
)
# Adjust n_trials to your budget
study.optimize(objective, n_trials=100, gc_after_trial=True)

print("Best trial:")
print("  value (val_loss):", study.best_trial.value)
print("  params:", study.best_trial.params)


[I 2025-09-25 08:04:23,790] A new study created in memory with name: no-name-fc3df68d-5fdc-4c95-b76c-7cb05e7326fc
[I 2025-09-25 09:01:09,411] Trial 0 finished with value: 0.07854608446359634 and parameters: {'batch_size': 1024, 'lr': 0.0005, 'base_layer_size': 512, 'dropout_rate': 0, 'l2_reg': 1e-06, 'optimizer': 'adamw'}. Best is trial 0 with value: 0.07854608446359634.
[I 2025-09-25 09:49:07,047] Trial 1 finished with value: 0.0864909365773201 and parameters: {'batch_size': 2048, 'lr': 0.0005, 'base_layer_size': 512, 'dropout_rate': 0, 'l2_reg': 1e-05, 'optimizer': 'adamw'}. Best is trial 0 with value: 0.07854608446359634.
[I 2025-09-25 10:04:55,856] Trial 2 finished with value: 0.20004016160964966 and parameters: {'batch_size': 2048, 'lr': 1e-05, 'base_layer_size': 128, 'dropout_rate': 0.1, 'l2_reg': 0, 'optimizer': 'nadam'}. Best is trial 0 with value: 0.07854608446359634.
[I 2025-09-25 11:24:01,694] Trial 3 finished with value: 0.10020259022712708 and parameters: {'batch_size': 51

InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/sequential_70_1/batch_normalization_211_1/moments/mul_1 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 211, in start

  File "/opt/homebrew/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/opt/homebrew/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/opt/homebrew/Cellar/python@3.11/3.11.10/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 519, in dispatch_queue

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 508, in process_one

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 368, in execute_request

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 767, in execute_request

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 455, in do_execute

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 577, in run_cell

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3116, in run_cell

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3171, in _run_cell

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3394, in run_cell_async

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3639, in run_ast_nodes

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3699, in run_code

  File "/var/folders/ns/4sxfm4t95836d2h_1wgqvzdh0000gn/T/ipykernel_1652/735363468.py", line 74, in <module>

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/optuna/study/study.py", line 490, in optimize

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 63, in _optimize

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 160, in _optimize_sequential

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/optuna/study/_optimize.py", line 201, in _run_trial

  File "/var/folders/ns/4sxfm4t95836d2h_1wgqvzdh0000gn/T/ipykernel_1652/735363468.py", line 53, in objective

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 377, in fit

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 220, in function

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 133, in multi_step_on_iterator

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 114, in one_step_on_data

  File "/Users/jakobmeggendorfer/Documents/CAU/Masterarbeit/master-thesis/venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 78, in train_step

Incompatible shapes: [0] vs. [1024,1024]
	 [[{{node gradient_tape/sequential_70_1/batch_normalization_211_1/moments/mul_1}}]] [Op:__inference_multi_step_on_iterator_32156599]

In [4]:
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M')
folder_path = "../../outputs/optuna/mlp"

df = study.trials_dataframe()

# sort descending by 'value'
df = df.sort_values(by="value", ascending=True)

# round the 'value' column to 4 decimals
df["value"] = df["value"].round(4)

# save to CSV
df.to_csv(folder_path + "/trials_" + timestamp + ".csv", index=False)