In [6]:
import os, sys, random, json, copy
from datetime import datetime, timezone

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import (
    MinMaxScaler, StandardScaler, RobustScaler, MaxAbsScaler
)
from sklearn.metrics import mean_absolute_error, mean_squared_error

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner

# set deterministic seeds
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

# pick device
device = (
    torch.device("cuda")
    if torch.cuda.is_available() else
    torch.device("mps")
    if torch.backends.mps.is_available() else
    torch.device("cpu")
)
print(f"> Using device: {device}")

> Using device: cuda


## 1. Data Loading and Feature Engineering

In [7]:
# Load the data
df = pd.read_csv("df_all.csv", index_col="startTime", parse_dates=True)

# Crop the DataFrame to the specified date range
df = df.loc["2021-07-01":"2025-06-30"]

# Drop unnecessary columns
df = df.drop(columns=[
    'Forecast Wind',
    'Forecast Solar',
    'Actual Wind',
    'Actual Solar',
    'Settlement Period',
])

# Calculate time of day features
minutes = df.index.hour * 60 + df.index.minute
frac_day = minutes / (24 * 60)
df['tod_sin'] = np.sin(2 * np.pi * frac_day)
df['tod_cos'] = np.cos(2 * np.pi * frac_day)

# Calculate day of week features
day_of_week = df.index.dayofweek
frac_week = day_of_week / 7
df['dow_sin'] = np.sin(2 * np.pi * frac_week)
df['dow_cos'] = np.cos(2 * np.pi * frac_week)

# Calculate month of year features
month = df.index.month
frac_year = (month - 1) / 12
df['moy_sin'] = np.sin(2 * np.pi * frac_year)
df['moy_cos'] = np.cos(2 * np.pi * frac_year)


# — splits
train_end = '2025-03-01'  # start of validation
val_end   = '2025-05-01'  # start of test

# slice once …
train_df = df.loc[:train_end]
val_df   = df.loc[train_end:val_end]
test_df  = df.loc[val_end:]

# … then unpack X & y in one go without repeating .drop
X_train, y_train = train_df.drop(columns=['Imbalance Price']), train_df['Imbalance Price']
X_val,      y_val      = val_df.drop(columns=['Imbalance Price']),    val_df['Imbalance Price']
X_test,     y_test     = test_df.drop(columns=['Imbalance Price']),   test_df['Imbalance Price']


# lists of feature‐columns
time_feats  = [c for c in ['tod_sin','tod_cos','dow_sin','dow_cos','moy_sin','moy_cos'] if c in df]
other_feats = [c for c in X_train.columns if c not in time_feats]

## 2. Models, Dataset & Factories

In [8]:
# ──────────── a. Dataset Definition ─────────────────────────────────

class LSTMDataset(Dataset):
    def __init__(self, X, y, seq_len, horizon=1):
        self.X = torch.as_tensor(X, dtype=torch.float32)
        self.y = torch.as_tensor(y, dtype=torch.float32)
        self.seq_len = seq_len
        self.horizon = horizon

    def __len__(self):
        return self.X.shape[0] - self.seq_len - self.horizon + 1

    def __getitem__(self, idx):
        x_seq    = self.X[idx : idx + self.seq_len]
        target_i = idx + self.seq_len - 1 + self.horizon
        y_target = self.y[target_i]
        return x_seq, y_target


# ──────────── b. Layer Definitions ──────────────────────────────────

class SeasonalAttn(nn.Module):
    def __init__(self, seq_len=48):
        super().__init__()
        self.attn = nn.Parameter(torch.empty(seq_len))
        nn.init.uniform_(self.attn, -0.01, 0.01)

    def forward(self, x):
        return x * self.attn.view(1, -1, 1) # (B,N,F)


class BiLSTM(nn.Module):
    def __init__(self, num_feats, hidden_size=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=num_feats,
            hidden_size=hidden_size,
            num_layers=num_layers,
            bidirectional=True,
            batch_first=True,
        )

    def forward(self, x):
        out, (_ , _) = self.lstm(x)
        return out


class AttentionPool(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.W_h = nn.Linear(input_dim, hidden_dim)
        self.v = nn.Linear(hidden_dim, 1, bias=False)

    def forward(self, x):
        e = self.v(torch.tanh(self.W_h(x)))   # (B, N, 1)
        alpha = torch.softmax(e, dim=1)       # (B, N, 1)
        context = torch.sum(alpha * x, dim=1) # (B, 2H)
        return context


# ──────────── c. Model Definitions ──────────────────────────────────

class SA_BiLSTM(nn.Module):
    def __init__(self, num_feats, seq_len=48, hidden_size=64, num_layers=2):
        super().__init__()
        self.seasonal = SeasonalAttn(seq_len)
        self.lstm = BiLSTM(num_feats, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size * 2, 1)

    def forward(self, x):
        x = self.seasonal(x)       # (B, N, F)
        out = self.lstm(x)         # (B, N, 2H)
        last = out[:, -1, :]       # (B, 2H)
        return self.fc(last).squeeze(-1)  # (B,)


class SA_BiLSTM_AttnPool(nn.Module):
    def __init__(self, num_feats, seq_len=48, hidden_size=64, num_layers=2):
        super().__init__()
        self.seasonal = SeasonalAttn(seq_len)
        self.lstm = BiLSTM(num_feats, hidden_size, num_layers)
        self.pool = AttentionPool(input_dim=hidden_size * 2, hidden_dim=hidden_size)
        self.fc = nn.Linear(hidden_size * 2, 1)

    def forward(self, x):
        x = self.seasonal(x)       # (B, N, F)
        out = self.lstm(x)         # (B, N, 2H)
        context = self.pool(out)   # (B, 2H)
        return self.fc(context).squeeze(-1)  # (B,)

TRANSFORMER_FACTORY = {
    "MinMax":   MinMaxScaler,
    "Standard": StandardScaler,
    "Robust":   RobustScaler,
    "MaxAbs":   MaxAbsScaler
}
MODEL_FACTORY = {
    "SA_BiLSTM":             SA_BiLSTM,
    "SA_BiLSTM_AttnPool":    SA_BiLSTM_AttnPool
}
LOSS_FACTORY = {
    "MSE":    nn.MSELoss,
    "MAE":    nn.L1Loss,
    "Huber":  nn.SmoothL1Loss
}


## 3. Search Stage 1 -- Big 3

In [9]:
def objective_stage1(trial):
    # ── 1) Sample hyper‐params ────────────────────────────────────────
    lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)
    hidden_size = trial.suggest_categorical("hidden_size", [32, 64, 128, 256])
    batch_size  = trial.suggest_categorical("batch_size", [64, 128, 256])

    # ── Fixed settings ────────────────────────────────────────────────
    seq_len, horizon, num_layers = 48, 1, 2
    model_name, scaler_used, loss_used = "SA_BiLSTM", "MaxAbs", "Huber"
    beta, max_epochs, patience = 0.01, 20, 10  # shorter for speed

    # report start
    print(f"\n→ Trial {trial.number}: lr={lr:.2e}, hidden={hidden_size}, bs={batch_size}")

    # ── 2) Data prep ──────────────────────────────────────────────────
    transformer = ColumnTransformer(
        [("scale", TRANSFORMER_FACTORY[scaler_used](), other_feats)],
        remainder="passthrough", verbose_feature_names_out=False
    )
    X_tr = transformer.fit_transform(X_train)
    X_va = transformer.transform(X_val)

    scaler_y = TRANSFORMER_FACTORY[scaler_used]()\
                   .fit(y_train.values.reshape(-1,1))
    y_tr = scaler_y.transform(y_train.values.reshape(-1,1)).ravel()
    y_va = scaler_y.transform(y_val.values.reshape(-1,1)).ravel()

    tr_lo = DataLoader(
        LSTMDataset(X_tr, y_tr, seq_len, horizon),
        batch_size=batch_size, shuffle=True,  pin_memory=True
    )
    va_lo = DataLoader(
        LSTMDataset(X_va, y_va, seq_len, horizon),
        batch_size=batch_size, shuffle=False, pin_memory=True
    )

    # ── 3) Model / Optimizer / Loss / Scheduler ───────────────────────
    model     = MODEL_FACTORY[model_name](
                    num_feats=X_tr.shape[1],
                    seq_len=seq_len,
                    hidden_size=hidden_size,
                    num_layers=num_layers
                ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = (LOSS_FACTORY[loss_used](beta=beta)
                 if loss_used=="Huber"
                 else LOSS_FACTORY[loss_used]())
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #                 optimizer, mode="min", factor=0.5,
    #                 patience=5, min_lr=1e-7
    #             )

    best_val = float("inf")
    epochs_no_improve = 0

    # ── 4) Training w/ pruning & prints ───────────────────────────────
    for epoch in range(1, max_epochs+1):
        # — train —
        model.train()
        tr_loss = 0.0
        for xb, yb in tr_lo:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            loss = criterion(model(xb).squeeze(-1), yb)
            loss.backward()
            optimizer.step()
            tr_loss += loss.item()
        tr_loss /= len(tr_lo)

        # — validate —
        model.eval()
        va_loss = 0.0
        with torch.no_grad():
            for xb, yb in va_lo:
                xb, yb = xb.to(device), yb.to(device)
                va_loss += criterion(model(xb).squeeze(-1), yb).item()
        va_loss /= len(va_lo)

        # — scheduler & LR print —
        # scheduler.step(va_loss)
        # current_lr = scheduler.get_last_lr()[0]
        print(
            f"[Trial {trial.number}] "
            f"Epoch {epoch:02d}/{max_epochs}  "
            # f"lr={current_lr:.2e}  "
            f"train={tr_loss:.4f}  val={va_loss:.4f}"
        )

        # — report & prune —
        trial.report(va_loss, epoch)
        if trial.should_prune():
            print(f"→ Trial {trial.number} pruned at epoch {epoch}")
            raise optuna.TrialPruned()

        # — manual early stopping —
        if va_loss < best_val:
            best_val = va_loss
            epochs_no_improve = 0
            best_weights = copy.deepcopy(model.state_dict())
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"→ Trial {trial.number} early-stopped at epoch {epoch}")
                break

    # ── 5) Load best & compute final metrics on validation set ─────────
    model.load_state_dict(best_weights)
    model.eval()
    scaled_preds, scaled_trues = [], []
    with torch.no_grad():
        for xb, yb in va_lo:
            xb = xb.to(device)
            p  = model(xb).squeeze(-1).cpu().numpy()
            scaled_preds.append(p)
            scaled_trues.append(yb.numpy())
    scaled_preds = np.concatenate(scaled_preds)
    scaled_trues = np.concatenate(scaled_trues)

    # invert scaling
    preds = scaler_y.inverse_transform(scaled_preds.reshape(-1,1)).flatten()
    trues = scaler_y.inverse_transform(scaled_trues.reshape(-1,1)).flatten()
    err   = trues - preds

    mae   = mean_absolute_error(trues, preds)
    rmse  = np.sqrt(mean_squared_error(trues, preds))
    smape = np.mean(2*np.abs(err)/(np.abs(trues)+np.abs(preds)+1e-8))*100

    print(
        f"→ Trial {trial.number} finished: "
        f"best_val={best_val:.4f}  "
        f"MAE={mae:.4f}  RMSE={rmse:.4f}  sMAPE={smape:.2f}%"
    )

    # store metrics
    trial.set_user_attr("val_mae", mae)
    trial.set_user_attr("val_rmse", rmse)
    trial.set_user_attr("val_smape", smape)

    return best_val


# ── Run Stage 1 ───────────────────────────────────────────────────────
study1 = optuna.create_study(
    direction="minimize",
    sampler=TPESampler(),
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=5)
)
study1.optimize(objective_stage1, n_trials=30)
print("✔ Stage 1 done:", study1.best_params, "val_loss=", study1.best_value)


[I 2025-07-29 15:18:17,995] A new study created in memory with name: no-name-47312671-c8ac-4f4b-b4a0-e1725b68b9e1
  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)



→ Trial 0: lr=1.44e-04, hidden=128, bs=128
[Trial 0] Epoch 01/20  train=0.0127  val=0.0033
[Trial 0] Epoch 02/20  train=0.0086  val=0.0034
[Trial 0] Epoch 03/20  train=0.0073  val=0.0025
[Trial 0] Epoch 04/20  train=0.0068  val=0.0027
[Trial 0] Epoch 05/20  train=0.0063  val=0.0024
[Trial 0] Epoch 06/20  train=0.0061  val=0.0023
[Trial 0] Epoch 07/20  train=0.0059  val=0.0023
[Trial 0] Epoch 08/20  train=0.0059  val=0.0020
[Trial 0] Epoch 09/20  train=0.0060  val=0.0020
[Trial 0] Epoch 10/20  train=0.0058  val=0.0024
[Trial 0] Epoch 11/20  train=0.0059  val=0.0023
[Trial 0] Epoch 12/20  train=0.0057  val=0.0022
[Trial 0] Epoch 13/20  train=0.0057  val=0.0019
[Trial 0] Epoch 14/20  train=0.0057  val=0.0023
[Trial 0] Epoch 15/20  train=0.0058  val=0.0019
[Trial 0] Epoch 16/20  train=0.0057  val=0.0022
[Trial 0] Epoch 17/20  train=0.0057  val=0.0023
[Trial 0] Epoch 18/20  train=0.0057  val=0.0018
[Trial 0] Epoch 19/20  train=0.0056  val=0.0018


[I 2025-07-29 15:20:32,848] Trial 0 finished with value: 0.0018062540279377413 and parameters: {'lr': 0.00014391731750860453, 'hidden_size': 128, 'batch_size': 128}. Best is trial 0 with value: 0.0018062540279377413.


[Trial 0] Epoch 20/20  train=0.0057  val=0.0018
→ Trial 0 finished: best_val=0.0018  MAE=19.3820  RMSE=24.9520  sMAPE=32.10%

→ Trial 1: lr=1.04e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 1] Epoch 01/20  train=0.0124  val=0.0030
[Trial 1] Epoch 02/20  train=0.0096  val=0.0044
[Trial 1] Epoch 03/20  train=0.0082  val=0.0040
[Trial 1] Epoch 04/20  train=0.0073  val=0.0044
[Trial 1] Epoch 05/20  train=0.0069  val=0.0033
[Trial 1] Epoch 06/20  train=0.0066  val=0.0047
[Trial 1] Epoch 07/20  train=0.0065  val=0.0027
[Trial 1] Epoch 08/20  train=0.0064  val=0.0025
[Trial 1] Epoch 09/20  train=0.0063  val=0.0040
[Trial 1] Epoch 10/20  train=0.0062  val=0.0024
[Trial 1] Epoch 11/20  train=0.0061  val=0.0023
[Trial 1] Epoch 12/20  train=0.0061  val=0.0022
[Trial 1] Epoch 13/20  train=0.0061  val=0.0022
[Trial 1] Epoch 14/20  train=0.0060  val=0.0021
[Trial 1] Epoch 15/20  train=0.0059  val=0.0022
[Trial 1] Epoch 16/20  train=0.0059  val=0.0020
[Trial 1] Epoch 17/20  train=0.0058  val=0.0024
[Trial 1] Epoch 18/20  train=0.0058  val=0.0021
[Trial 1] Epoch 19/20  train=0.0058  val=0.0021


[I 2025-07-29 15:21:21,464] Trial 1 finished with value: 0.001961229662613376 and parameters: {'lr': 0.00010417357283862782, 'hidden_size': 64, 'batch_size': 128}. Best is trial 0 with value: 0.0018062540279377413.


[Trial 1] Epoch 20/20  train=0.0057  val=0.0020
→ Trial 1 finished: best_val=0.0020  MAE=20.4642  RMSE=26.0129  sMAPE=34.21%

→ Trial 2: lr=2.20e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 2] Epoch 01/20  train=0.0122  val=0.0036
[Trial 2] Epoch 02/20  train=0.0083  val=0.0030
[Trial 2] Epoch 03/20  train=0.0069  val=0.0039
[Trial 2] Epoch 04/20  train=0.0064  val=0.0024
[Trial 2] Epoch 05/20  train=0.0061  val=0.0023
[Trial 2] Epoch 06/20  train=0.0059  val=0.0020
[Trial 2] Epoch 07/20  train=0.0059  val=0.0019
[Trial 2] Epoch 08/20  train=0.0058  val=0.0019
[Trial 2] Epoch 09/20  train=0.0058  val=0.0022
[Trial 2] Epoch 10/20  train=0.0057  val=0.0020
[Trial 2] Epoch 11/20  train=0.0057  val=0.0020
[Trial 2] Epoch 12/20  train=0.0057  val=0.0019
[Trial 2] Epoch 13/20  train=0.0057  val=0.0019
[Trial 2] Epoch 14/20  train=0.0057  val=0.0021
[Trial 2] Epoch 15/20  train=0.0057  val=0.0018
[Trial 2] Epoch 16/20  train=0.0057  val=0.0019
[Trial 2] Epoch 17/20  train=0.0057  val=0.0020
[Trial 2] Epoch 18/20  train=0.0056  val=0.0020
[Trial 2] Epoch 19/20  train=0.0056  val=0.0018


[I 2025-07-29 15:22:10,406] Trial 2 finished with value: 0.0017913100157823899 and parameters: {'lr': 0.00022008850294786365, 'hidden_size': 64, 'batch_size': 128}. Best is trial 2 with value: 0.0017913100157823899.


[Trial 2] Epoch 20/20  train=0.0056  val=0.0018
→ Trial 2 finished: best_val=0.0018  MAE=19.3175  RMSE=24.8319  sMAPE=32.17%

→ Trial 3: lr=5.34e-05, hidden=128, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 3] Epoch 01/20  train=0.0128  val=0.0045
[Trial 3] Epoch 02/20  train=0.0112  val=0.0038
[Trial 3] Epoch 03/20  train=0.0093  val=0.0038
[Trial 3] Epoch 04/20  train=0.0083  val=0.0050
[Trial 3] Epoch 05/20  train=0.0075  val=0.0043
[Trial 3] Epoch 06/20  train=0.0070  val=0.0038
[Trial 3] Epoch 07/20  train=0.0068  val=0.0035
[Trial 3] Epoch 08/20  train=0.0066  val=0.0033
[Trial 3] Epoch 09/20  train=0.0064  val=0.0035
[Trial 3] Epoch 10/20  train=0.0063  val=0.0039
[Trial 3] Epoch 11/20  train=0.0063  val=0.0024
[Trial 3] Epoch 12/20  train=0.0062  val=0.0028
[Trial 3] Epoch 13/20  train=0.0061  val=0.0023
[Trial 3] Epoch 14/20  train=0.0060  val=0.0023
[Trial 3] Epoch 15/20  train=0.0060  val=0.0022
[Trial 3] Epoch 16/20  train=0.0059  val=0.0024
[Trial 3] Epoch 17/20  train=0.0059  val=0.0028
[Trial 3] Epoch 18/20  train=0.0059  val=0.0021
[Trial 3] Epoch 19/20  train=0.0058  val=0.0028


[I 2025-07-29 15:24:20,943] Trial 3 finished with value: 0.002020339547332538 and parameters: {'lr': 5.343266341574893e-05, 'hidden_size': 128, 'batch_size': 128}. Best is trial 2 with value: 0.0017913100157823899.


[Trial 3] Epoch 20/20  train=0.0058  val=0.0020
→ Trial 3 finished: best_val=0.0020  MAE=20.7261  RMSE=26.4287  sMAPE=34.52%

→ Trial 4: lr=6.41e-05, hidden=256, bs=64


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 4] Epoch 01/20  train=0.0113  val=0.0040
[Trial 4] Epoch 02/20  train=0.0083  val=0.0043
[Trial 4] Epoch 03/20  train=0.0071  val=0.0038
[Trial 4] Epoch 04/20  train=0.0066  val=0.0034
[Trial 4] Epoch 05/20  train=0.0064  val=0.0034
[Trial 4] Epoch 06/20  train=0.0061  val=0.0021
[Trial 4] Epoch 07/20  train=0.0060  val=0.0021
[Trial 4] Epoch 08/20  train=0.0059  val=0.0021
[Trial 4] Epoch 09/20  train=0.0058  val=0.0019
[Trial 4] Epoch 10/20  train=0.0058  val=0.0022
[Trial 4] Epoch 11/20  train=0.0058  val=0.0018
[Trial 4] Epoch 12/20  train=0.0057  val=0.0022
[Trial 4] Epoch 13/20  train=0.0057  val=0.0019
[Trial 4] Epoch 14/20  train=0.0057  val=0.0019
[Trial 4] Epoch 15/20  train=0.0057  val=0.0022
[Trial 4] Epoch 16/20  train=0.0057  val=0.0018
[Trial 4] Epoch 17/20  train=0.0056  val=0.0021
[Trial 4] Epoch 18/20  train=0.0056  val=0.0021
[Trial 4] Epoch 19/20  train=0.0056  val=0.0019
[Trial 4] Epoch 20/20  train=0.0056  val=0.0019


[I 2025-07-29 15:30:27,075] Trial 4 finished with value: 0.0018214798094603516 and parameters: {'lr': 6.408584457072414e-05, 'hidden_size': 256, 'batch_size': 64}. Best is trial 2 with value: 0.0017913100157823899.


→ Trial 4 finished: best_val=0.0018  MAE=19.5465  RMSE=25.0125  sMAPE=32.80%

→ Trial 5: lr=7.31e-05, hidden=64, bs=256


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 5] Epoch 01/20  train=0.0135  val=0.0048
[Trial 5] Epoch 02/20  train=0.0125  val=0.0044
[Trial 5] Epoch 03/20  train=0.0117  val=0.0033
[Trial 5] Epoch 04/20  train=0.0102  val=0.0038


[I 2025-07-29 15:30:37,148] Trial 5 pruned. 


[Trial 5] Epoch 05/20  train=0.0093  val=0.0040
→ Trial 5 pruned at epoch 5

→ Trial 6: lr=1.45e-04, hidden=32, bs=256


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 6] Epoch 01/20  train=0.0129  val=0.0045
[Trial 6] Epoch 02/20  train=0.0119  val=0.0034
[Trial 6] Epoch 03/20  train=0.0099  val=0.0038
[Trial 6] Epoch 04/20  train=0.0088  val=0.0045


[I 2025-07-29 15:30:44,477] Trial 6 pruned. 


[Trial 6] Epoch 05/20  train=0.0078  val=0.0033
→ Trial 6 pruned at epoch 5

→ Trial 7: lr=1.50e-04, hidden=128, bs=256


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 7] Epoch 01/20  train=0.0128  val=0.0051
[Trial 7] Epoch 02/20  train=0.0105  val=0.0046
[Trial 7] Epoch 03/20  train=0.0080  val=0.0028
[Trial 7] Epoch 04/20  train=0.0071  val=0.0029
[Trial 7] Epoch 05/20  train=0.0068  val=0.0028


[I 2025-07-29 15:31:21,301] Trial 7 pruned. 


[Trial 7] Epoch 06/20  train=0.0066  val=0.0031
→ Trial 7 pruned at epoch 6

→ Trial 8: lr=1.88e-05, hidden=128, bs=256


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 8] Epoch 01/20  train=0.0129  val=0.0046
[Trial 8] Epoch 02/20  train=0.0128  val=0.0047
[Trial 8] Epoch 03/20  train=0.0128  val=0.0048
[Trial 8] Epoch 04/20  train=0.0126  val=0.0046


[I 2025-07-29 15:31:51,778] Trial 8 pruned. 


[Trial 8] Epoch 05/20  train=0.0123  val=0.0046
→ Trial 8 pruned at epoch 5

→ Trial 9: lr=1.97e-04, hidden=32, bs=64


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 9] Epoch 01/20  train=0.0117  val=0.0036
[Trial 9] Epoch 02/20  train=0.0078  val=0.0027
[Trial 9] Epoch 03/20  train=0.0065  val=0.0025
[Trial 9] Epoch 04/20  train=0.0061  val=0.0023
[Trial 9] Epoch 05/20  train=0.0060  val=0.0024
[Trial 9] Epoch 06/20  train=0.0058  val=0.0025
[Trial 9] Epoch 07/20  train=0.0058  val=0.0021


[I 2025-07-29 15:32:26,216] Trial 9 pruned. 


[Trial 9] Epoch 08/20  train=0.0057  val=0.0024
→ Trial 9 pruned at epoch 8

→ Trial 10: lr=9.82e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 10] Epoch 01/20  train=0.0098  val=0.0028
[Trial 10] Epoch 02/20  train=0.0062  val=0.0020
[Trial 10] Epoch 03/20  train=0.0058  val=0.0020
[Trial 10] Epoch 04/20  train=0.0058  val=0.0028
[Trial 10] Epoch 05/20  train=0.0057  val=0.0028
[Trial 10] Epoch 06/20  train=0.0057  val=0.0026
[Trial 10] Epoch 07/20  train=0.0056  val=0.0019
[Trial 10] Epoch 08/20  train=0.0056  val=0.0019
[Trial 10] Epoch 09/20  train=0.0056  val=0.0019
[Trial 10] Epoch 10/20  train=0.0056  val=0.0019
[Trial 10] Epoch 11/20  train=0.0055  val=0.0019
[Trial 10] Epoch 12/20  train=0.0054  val=0.0018
[Trial 10] Epoch 13/20  train=0.0054  val=0.0018
[Trial 10] Epoch 14/20  train=0.0053  val=0.0020
[Trial 10] Epoch 15/20  train=0.0053  val=0.0022
[Trial 10] Epoch 16/20  train=0.0052  val=0.0019
[Trial 10] Epoch 17/20  train=0.0052  val=0.0018
[Trial 10] Epoch 18/20  train=0.0051  val=0.0021
[Trial 10] Epoch 19/20  train=0.0051  val=0.0020


[I 2025-07-29 15:33:15,326] Trial 10 finished with value: 0.0017780077886646209 and parameters: {'lr': 0.0009815469737622783, 'hidden_size': 64, 'batch_size': 128}. Best is trial 10 with value: 0.0017780077886646209.


[Trial 10] Epoch 20/20  train=0.0051  val=0.0022
→ Trial 10 finished: best_val=0.0018  MAE=19.0208  RMSE=24.8311  sMAPE=32.01%

→ Trial 11: lr=9.53e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 11] Epoch 01/20  train=0.0091  val=0.0024
[Trial 11] Epoch 02/20  train=0.0061  val=0.0022
[Trial 11] Epoch 03/20  train=0.0058  val=0.0027
[Trial 11] Epoch 04/20  train=0.0057  val=0.0019
[Trial 11] Epoch 05/20  train=0.0057  val=0.0027
[Trial 11] Epoch 06/20  train=0.0057  val=0.0018
[Trial 11] Epoch 07/20  train=0.0057  val=0.0026
[Trial 11] Epoch 08/20  train=0.0056  val=0.0020
[Trial 11] Epoch 09/20  train=0.0056  val=0.0018
[Trial 11] Epoch 10/20  train=0.0055  val=0.0019
[Trial 11] Epoch 11/20  train=0.0055  val=0.0021
[Trial 11] Epoch 12/20  train=0.0054  val=0.0021
[Trial 11] Epoch 13/20  train=0.0054  val=0.0019
[Trial 11] Epoch 14/20  train=0.0053  val=0.0018
[Trial 11] Epoch 15/20  train=0.0053  val=0.0019
[Trial 11] Epoch 16/20  train=0.0052  val=0.0018
[Trial 11] Epoch 17/20  train=0.0052  val=0.0025
[Trial 11] Epoch 18/20  train=0.0052  val=0.0021
[Trial 11] Epoch 19/20  train=0.0051  val=0.0018


[I 2025-07-29 15:34:04,228] Trial 11 finished with value: 0.0017882894633499825 and parameters: {'lr': 0.0009534358943189696, 'hidden_size': 64, 'batch_size': 128}. Best is trial 10 with value: 0.0017780077886646209.


[Trial 11] Epoch 20/20  train=0.0051  val=0.0018
→ Trial 11 finished: best_val=0.0018  MAE=19.0962  RMSE=24.8846  sMAPE=31.83%

→ Trial 12: lr=9.64e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 12] Epoch 01/20  train=0.0088  val=0.0024
[Trial 12] Epoch 02/20  train=0.0062  val=0.0019
[Trial 12] Epoch 03/20  train=0.0058  val=0.0020
[Trial 12] Epoch 04/20  train=0.0057  val=0.0022
[Trial 12] Epoch 05/20  train=0.0057  val=0.0021
[Trial 12] Epoch 06/20  train=0.0056  val=0.0025
[Trial 12] Epoch 07/20  train=0.0056  val=0.0021
[Trial 12] Epoch 08/20  train=0.0056  val=0.0018
[Trial 12] Epoch 09/20  train=0.0056  val=0.0019
[Trial 12] Epoch 10/20  train=0.0055  val=0.0018
[Trial 12] Epoch 11/20  train=0.0055  val=0.0022
[Trial 12] Epoch 12/20  train=0.0054  val=0.0020
[Trial 12] Epoch 13/20  train=0.0054  val=0.0018
[Trial 12] Epoch 14/20  train=0.0053  val=0.0019
[Trial 12] Epoch 15/20  train=0.0053  val=0.0018
[Trial 12] Epoch 16/20  train=0.0053  val=0.0018
[Trial 12] Epoch 17/20  train=0.0052  val=0.0017
[Trial 12] Epoch 18/20  train=0.0052  val=0.0018
[Trial 12] Epoch 19/20  train=0.0052  val=0.0018


[I 2025-07-29 15:34:54,223] Trial 12 finished with value: 0.0017215024892483716 and parameters: {'lr': 0.0009637770438198408, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 12] Epoch 20/20  train=0.0051  val=0.0019
→ Trial 12 finished: best_val=0.0017  MAE=18.7041  RMSE=24.4160  sMAPE=31.62%

→ Trial 13: lr=8.78e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 13] Epoch 01/20  train=0.0094  val=0.0025
[Trial 13] Epoch 02/20  train=0.0062  val=0.0038
[Trial 13] Epoch 03/20  train=0.0058  val=0.0020
[Trial 13] Epoch 04/20  train=0.0058  val=0.0019
[Trial 13] Epoch 05/20  train=0.0057  val=0.0024
[Trial 13] Epoch 06/20  train=0.0057  val=0.0020
[Trial 13] Epoch 07/20  train=0.0057  val=0.0022
[Trial 13] Epoch 08/20  train=0.0056  val=0.0020
[Trial 13] Epoch 09/20  train=0.0056  val=0.0018
[Trial 13] Epoch 10/20  train=0.0055  val=0.0033
[Trial 13] Epoch 11/20  train=0.0055  val=0.0018
[Trial 13] Epoch 12/20  train=0.0055  val=0.0022
[Trial 13] Epoch 13/20  train=0.0054  val=0.0018
[Trial 13] Epoch 14/20  train=0.0054  val=0.0021
[Trial 13] Epoch 15/20  train=0.0054  val=0.0018
[Trial 13] Epoch 16/20  train=0.0053  val=0.0020
[Trial 13] Epoch 17/20  train=0.0053  val=0.0018
[Trial 13] Epoch 18/20  train=0.0053  val=0.0018
[Trial 13] Epoch 19/20  train=0.0052  val=0.0019


[I 2025-07-29 15:35:43,345] Trial 13 finished with value: 0.0017804408283985179 and parameters: {'lr': 0.0008777812942503073, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 13] Epoch 20/20  train=0.0053  val=0.0018
→ Trial 13 finished: best_val=0.0018  MAE=19.2657  RMSE=24.7913  sMAPE=32.70%

→ Trial 14: lr=4.62e-04, hidden=256, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 14] Epoch 01/20  train=0.0098  val=0.0046
[Trial 14] Epoch 02/20  train=0.0063  val=0.0026
[Trial 14] Epoch 03/20  train=0.0059  val=0.0023
[Trial 14] Epoch 04/20  train=0.0058  val=0.0021
[Trial 14] Epoch 05/20  train=0.0058  val=0.0019
[Trial 14] Epoch 06/20  train=0.0057  val=0.0019
[Trial 14] Epoch 07/20  train=0.0058  val=0.0020
[Trial 14] Epoch 08/20  train=0.0057  val=0.0018
[Trial 14] Epoch 09/20  train=0.0057  val=0.0020
[Trial 14] Epoch 10/20  train=0.0057  val=0.0019
[Trial 14] Epoch 11/20  train=0.0056  val=0.0018
[Trial 14] Epoch 12/20  train=0.0056  val=0.0019
[Trial 14] Epoch 13/20  train=0.0056  val=0.0023
[Trial 14] Epoch 14/20  train=0.0056  val=0.0019
[Trial 14] Epoch 15/20  train=0.0055  val=0.0018
[Trial 14] Epoch 16/20  train=0.0054  val=0.0025
[Trial 14] Epoch 17/20  train=0.0055  val=0.0022
[Trial 14] Epoch 18/20  train=0.0054  val=0.0021
→ Trial 14 early-stopped at epoch 18


[I 2025-07-29 15:40:22,092] Trial 14 finished with value: 0.0017962256476849966 and parameters: {'lr': 0.00046183097408688753, 'hidden_size': 256, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


→ Trial 14 finished: best_val=0.0018  MAE=19.3068  RMSE=24.8773  sMAPE=32.34%

→ Trial 15: lr=4.37e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 15] Epoch 01/20  train=0.0104  val=0.0036
[Trial 15] Epoch 02/20  train=0.0070  val=0.0030
[Trial 15] Epoch 03/20  train=0.0061  val=0.0026
[Trial 15] Epoch 04/20  train=0.0059  val=0.0022
[Trial 15] Epoch 05/20  train=0.0059  val=0.0019
[Trial 15] Epoch 06/20  train=0.0058  val=0.0018
[Trial 15] Epoch 07/20  train=0.0057  val=0.0020
[Trial 15] Epoch 08/20  train=0.0058  val=0.0018
[Trial 15] Epoch 09/20  train=0.0057  val=0.0024
[Trial 15] Epoch 10/20  train=0.0057  val=0.0020
[Trial 15] Epoch 11/20  train=0.0057  val=0.0022
[Trial 15] Epoch 12/20  train=0.0057  val=0.0019
[Trial 15] Epoch 13/20  train=0.0056  val=0.0021
[Trial 15] Epoch 14/20  train=0.0056  val=0.0019
[Trial 15] Epoch 15/20  train=0.0056  val=0.0019


[I 2025-07-29 15:41:01,181] Trial 15 finished with value: 0.0018416283216894321 and parameters: {'lr': 0.00043744333642124284, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 15] Epoch 16/20  train=0.0056  val=0.0019
→ Trial 15 early-stopped at epoch 16
→ Trial 15 finished: best_val=0.0018  MAE=19.6357  RMSE=25.1390  sMAPE=33.06%

→ Trial 16: lr=4.16e-04, hidden=64, bs=64


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 16] Epoch 01/20  train=0.0091  val=0.0028
[Trial 16] Epoch 02/20  train=0.0062  val=0.0019
[Trial 16] Epoch 03/20  train=0.0059  val=0.0026
[Trial 16] Epoch 04/20  train=0.0058  val=0.0020
[Trial 16] Epoch 05/20  train=0.0058  val=0.0019
[Trial 16] Epoch 06/20  train=0.0057  val=0.0023
[Trial 16] Epoch 07/20  train=0.0057  val=0.0018
[Trial 16] Epoch 08/20  train=0.0057  val=0.0022
[Trial 16] Epoch 09/20  train=0.0056  val=0.0018
[Trial 16] Epoch 10/20  train=0.0056  val=0.0019
[Trial 16] Epoch 11/20  train=0.0056  val=0.0020
[Trial 16] Epoch 12/20  train=0.0056  val=0.0021
[Trial 16] Epoch 13/20  train=0.0056  val=0.0018
[Trial 16] Epoch 14/20  train=0.0055  val=0.0019
[Trial 16] Epoch 15/20  train=0.0055  val=0.0019
[Trial 16] Epoch 16/20  train=0.0055  val=0.0019
[Trial 16] Epoch 17/20  train=0.0054  val=0.0020
[Trial 16] Epoch 18/20  train=0.0054  val=0.0020
[Trial 16] Epoch 19/20  train=0.0053  val=0.0018


[I 2025-07-29 15:42:26,991] Trial 16 finished with value: 0.0018039141217266897 and parameters: {'lr': 0.00041559169968939614, 'hidden_size': 64, 'batch_size': 64}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 16] Epoch 20/20  train=0.0053  val=0.0018
→ Trial 16 finished: best_val=0.0018  MAE=19.4524  RMSE=24.8986  sMAPE=32.86%

→ Trial 17: lr=6.35e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 17] Epoch 01/20  train=0.0098  val=0.0057
[Trial 17] Epoch 02/20  train=0.0064  val=0.0022
[Trial 17] Epoch 03/20  train=0.0059  val=0.0032
[Trial 17] Epoch 04/20  train=0.0058  val=0.0019
[Trial 17] Epoch 05/20  train=0.0057  val=0.0019
[Trial 17] Epoch 06/20  train=0.0058  val=0.0020
[Trial 17] Epoch 07/20  train=0.0056  val=0.0018
[Trial 17] Epoch 08/20  train=0.0056  val=0.0018
[Trial 17] Epoch 09/20  train=0.0056  val=0.0019
[Trial 17] Epoch 10/20  train=0.0056  val=0.0022
[Trial 17] Epoch 11/20  train=0.0055  val=0.0026
[Trial 17] Epoch 12/20  train=0.0055  val=0.0018
[Trial 17] Epoch 13/20  train=0.0055  val=0.0020
[Trial 17] Epoch 14/20  train=0.0054  val=0.0018
[Trial 17] Epoch 15/20  train=0.0054  val=0.0017
[Trial 17] Epoch 16/20  train=0.0053  val=0.0019
[Trial 17] Epoch 17/20  train=0.0052  val=0.0018
[Trial 17] Epoch 18/20  train=0.0053  val=0.0018
[Trial 17] Epoch 19/20  train=0.0052  val=0.0018


[I 2025-07-29 15:43:15,693] Trial 17 finished with value: 0.0017267076916101835 and parameters: {'lr': 0.0006354380760055624, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 17] Epoch 20/20  train=0.0052  val=0.0018
→ Trial 17 finished: best_val=0.0017  MAE=18.6755  RMSE=24.3994  sMAPE=31.34%

→ Trial 18: lr=1.07e-05, hidden=32, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 18] Epoch 01/20  train=0.0451  val=0.0042
[Trial 18] Epoch 02/20  train=0.0128  val=0.0049
[Trial 18] Epoch 03/20  train=0.0128  val=0.0049
[Trial 18] Epoch 04/20  train=0.0128  val=0.0049


[I 2025-07-29 15:43:27,674] Trial 18 pruned. 


[Trial 18] Epoch 05/20  train=0.0128  val=0.0047
→ Trial 18 pruned at epoch 5

→ Trial 19: lr=5.85e-04, hidden=256, bs=64


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 19] Epoch 01/20  train=0.0083  val=0.0025
[Trial 19] Epoch 02/20  train=0.0060  val=0.0019
[Trial 19] Epoch 03/20  train=0.0059  val=0.0019
[Trial 19] Epoch 04/20  train=0.0058  val=0.0018
[Trial 19] Epoch 05/20  train=0.0057  val=0.0020
[Trial 19] Epoch 06/20  train=0.0057  val=0.0019
[Trial 19] Epoch 07/20  train=0.0057  val=0.0018
[Trial 19] Epoch 08/20  train=0.0056  val=0.0020
[Trial 19] Epoch 09/20  train=0.0056  val=0.0019
[Trial 19] Epoch 10/20  train=0.0055  val=0.0020
[Trial 19] Epoch 11/20  train=0.0055  val=0.0022
[Trial 19] Epoch 12/20  train=0.0054  val=0.0021
[Trial 19] Epoch 13/20  train=0.0053  val=0.0018
[Trial 19] Epoch 14/20  train=0.0053  val=0.0021
[Trial 19] Epoch 15/20  train=0.0053  val=0.0018
[Trial 19] Epoch 16/20  train=0.0052  val=0.0019
[Trial 19] Epoch 17/20  train=0.0052  val=0.0019
[Trial 19] Epoch 18/20  train=0.0052  val=0.0018
[Trial 19] Epoch 19/20  train=0.0051  val=0.0019
[Trial 19] Epoch 20/20  train=0.0052  val=0.0020


[I 2025-07-29 15:49:34,026] Trial 19 finished with value: 0.0017892735182692338 and parameters: {'lr': 0.0005854030126951021, 'hidden_size': 256, 'batch_size': 64}. Best is trial 12 with value: 0.0017215024892483716.


→ Trial 19 finished: best_val=0.0018  MAE=19.3059  RMSE=24.8716  sMAPE=32.43%

→ Trial 20: lr=2.87e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 20] Epoch 01/20  train=0.0120  val=0.0031
[Trial 20] Epoch 02/20  train=0.0075  val=0.0028
[Trial 20] Epoch 03/20  train=0.0066  val=0.0028
[Trial 20] Epoch 04/20  train=0.0063  val=0.0024
[Trial 20] Epoch 05/20  train=0.0060  val=0.0020
[Trial 20] Epoch 06/20  train=0.0058  val=0.0019
[Trial 20] Epoch 07/20  train=0.0057  val=0.0022
[Trial 20] Epoch 08/20  train=0.0057  val=0.0020


[I 2025-07-29 15:49:55,385] Trial 20 pruned. 


[Trial 20] Epoch 09/20  train=0.0057  val=0.0021
→ Trial 20 pruned at epoch 9

→ Trial 21: lr=7.21e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 21] Epoch 01/20  train=0.0096  val=0.0030
[Trial 21] Epoch 02/20  train=0.0064  val=0.0029
[Trial 21] Epoch 03/20  train=0.0060  val=0.0022
[Trial 21] Epoch 04/20  train=0.0058  val=0.0025
[Trial 21] Epoch 05/20  train=0.0058  val=0.0020
[Trial 21] Epoch 06/20  train=0.0057  val=0.0018
[Trial 21] Epoch 07/20  train=0.0057  val=0.0019
[Trial 21] Epoch 08/20  train=0.0056  val=0.0021
[Trial 21] Epoch 09/20  train=0.0056  val=0.0018
[Trial 21] Epoch 10/20  train=0.0056  val=0.0022
[Trial 21] Epoch 11/20  train=0.0056  val=0.0021
[Trial 21] Epoch 12/20  train=0.0056  val=0.0019
[Trial 21] Epoch 13/20  train=0.0056  val=0.0020
[Trial 21] Epoch 14/20  train=0.0055  val=0.0018
[Trial 21] Epoch 15/20  train=0.0055  val=0.0019
[Trial 21] Epoch 16/20  train=0.0054  val=0.0018
[Trial 21] Epoch 17/20  train=0.0054  val=0.0020
[Trial 21] Epoch 18/20  train=0.0053  val=0.0018


[I 2025-07-29 15:50:41,469] Trial 21 finished with value: 0.0017771274180394475 and parameters: {'lr': 0.0007207907536327888, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 21] Epoch 19/20  train=0.0053  val=0.0018
→ Trial 21 early-stopped at epoch 19
→ Trial 21 finished: best_val=0.0018  MAE=19.3261  RMSE=24.7465  sMAPE=32.04%

→ Trial 22: lr=6.46e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 22] Epoch 01/20  train=0.0102  val=0.0055
[Trial 22] Epoch 02/20  train=0.0065  val=0.0038
[Trial 22] Epoch 03/20  train=0.0060  val=0.0019
[Trial 22] Epoch 04/20  train=0.0058  val=0.0021
[Trial 22] Epoch 05/20  train=0.0057  val=0.0023
[Trial 22] Epoch 06/20  train=0.0057  val=0.0020
[Trial 22] Epoch 07/20  train=0.0057  val=0.0018
[Trial 22] Epoch 08/20  train=0.0056  val=0.0018
[Trial 22] Epoch 09/20  train=0.0056  val=0.0019
[Trial 22] Epoch 10/20  train=0.0056  val=0.0019
[Trial 22] Epoch 11/20  train=0.0056  val=0.0021
[Trial 22] Epoch 12/20  train=0.0056  val=0.0020
[Trial 22] Epoch 13/20  train=0.0055  val=0.0018
[Trial 22] Epoch 14/20  train=0.0054  val=0.0020
[Trial 22] Epoch 15/20  train=0.0054  val=0.0018
[Trial 22] Epoch 16/20  train=0.0053  val=0.0018
[Trial 22] Epoch 17/20  train=0.0053  val=0.0024
[Trial 22] Epoch 18/20  train=0.0052  val=0.0018
[Trial 22] Epoch 19/20  train=0.0052  val=0.0019


[I 2025-07-29 15:51:29,874] Trial 22 finished with value: 0.0017786002819142911 and parameters: {'lr': 0.0006462022837485772, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 22] Epoch 20/20  train=0.0052  val=0.0026
→ Trial 22 finished: best_val=0.0018  MAE=19.0924  RMSE=24.8250  sMAPE=31.57%

→ Trial 23: lr=3.36e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 23] Epoch 01/20  train=0.0114  val=0.0037
[Trial 23] Epoch 02/20  train=0.0074  val=0.0036
[Trial 23] Epoch 03/20  train=0.0064  val=0.0026
[Trial 23] Epoch 04/20  train=0.0060  val=0.0020
[Trial 23] Epoch 05/20  train=0.0059  val=0.0030


[I 2025-07-29 15:51:44,352] Trial 23 pruned. 


[Trial 23] Epoch 06/20  train=0.0059  val=0.0022
→ Trial 23 pruned at epoch 6

→ Trial 24: lr=6.44e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 24] Epoch 01/20  train=0.0108  val=0.0030
[Trial 24] Epoch 02/20  train=0.0066  val=0.0024
[Trial 24] Epoch 03/20  train=0.0059  val=0.0022
[Trial 24] Epoch 04/20  train=0.0058  val=0.0027
[Trial 24] Epoch 05/20  train=0.0057  val=0.0019
[Trial 24] Epoch 06/20  train=0.0057  val=0.0020
[Trial 24] Epoch 07/20  train=0.0057  val=0.0019
[Trial 24] Epoch 08/20  train=0.0057  val=0.0018
[Trial 24] Epoch 09/20  train=0.0056  val=0.0021
[Trial 24] Epoch 10/20  train=0.0056  val=0.0019
[Trial 24] Epoch 11/20  train=0.0056  val=0.0018
[Trial 24] Epoch 12/20  train=0.0056  val=0.0018
[Trial 24] Epoch 13/20  train=0.0055  val=0.0019
[Trial 24] Epoch 14/20  train=0.0055  val=0.0018
[Trial 24] Epoch 15/20  train=0.0055  val=0.0018
[Trial 24] Epoch 16/20  train=0.0055  val=0.0018
[Trial 24] Epoch 17/20  train=0.0053  val=0.0019
[Trial 24] Epoch 18/20  train=0.0053  val=0.0018
[Trial 24] Epoch 19/20  train=0.0053  val=0.0018


[I 2025-07-29 15:52:32,685] Trial 24 finished with value: 0.0017520580187682872 and parameters: {'lr': 0.0006441682951273782, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 24] Epoch 20/20  train=0.0052  val=0.0018
→ Trial 24 finished: best_val=0.0018  MAE=18.8158  RMSE=24.6063  sMAPE=31.42%

→ Trial 25: lr=5.33e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 25] Epoch 01/20  train=0.0105  val=0.0061
[Trial 25] Epoch 02/20  train=0.0067  val=0.0031
[Trial 25] Epoch 03/20  train=0.0060  val=0.0029
[Trial 25] Epoch 04/20  train=0.0058  val=0.0024
[Trial 25] Epoch 05/20  train=0.0058  val=0.0021


[I 2025-07-29 15:52:47,278] Trial 25 pruned. 


[Trial 25] Epoch 06/20  train=0.0057  val=0.0021
→ Trial 25 pruned at epoch 6

→ Trial 26: lr=2.79e-04, hidden=64, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 26] Epoch 01/20  train=0.0127  val=0.0037
[Trial 26] Epoch 02/20  train=0.0076  val=0.0033
[Trial 26] Epoch 03/20  train=0.0067  val=0.0029
[Trial 26] Epoch 04/20  train=0.0064  val=0.0025
[Trial 26] Epoch 05/20  train=0.0061  val=0.0022
[Trial 26] Epoch 06/20  train=0.0058  val=0.0019
[Trial 26] Epoch 07/20  train=0.0057  val=0.0019
[Trial 26] Epoch 08/20  train=0.0057  val=0.0026
[Trial 26] Epoch 09/20  train=0.0057  val=0.0020
[Trial 26] Epoch 10/20  train=0.0056  val=0.0020
[Trial 26] Epoch 11/20  train=0.0057  val=0.0018
[Trial 26] Epoch 12/20  train=0.0056  val=0.0019
[Trial 26] Epoch 13/20  train=0.0056  val=0.0018
[Trial 26] Epoch 14/20  train=0.0056  val=0.0018
[Trial 26] Epoch 15/20  train=0.0056  val=0.0026
[Trial 26] Epoch 16/20  train=0.0056  val=0.0019
[Trial 26] Epoch 17/20  train=0.0056  val=0.0018
[Trial 26] Epoch 18/20  train=0.0056  val=0.0018
[Trial 26] Epoch 19/20  train=0.0056  val=0.0021


[I 2025-07-29 15:53:35,567] Trial 26 finished with value: 0.0018004604271086662 and parameters: {'lr': 0.00027902431337864547, 'hidden_size': 64, 'batch_size': 128}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 26] Epoch 20/20  train=0.0056  val=0.0018
→ Trial 26 finished: best_val=0.0018  MAE=19.3892  RMSE=24.9545  sMAPE=31.84%

→ Trial 27: lr=3.27e-05, hidden=256, bs=128


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 27] Epoch 01/20  train=0.0134  val=0.0049
[Trial 27] Epoch 02/20  train=0.0120  val=0.0039
[Trial 27] Epoch 03/20  train=0.0098  val=0.0038
[Trial 27] Epoch 04/20  train=0.0086  val=0.0033


[I 2025-07-29 15:54:52,267] Trial 27 pruned. 


[Trial 27] Epoch 05/20  train=0.0078  val=0.0036
→ Trial 27 pruned at epoch 5

→ Trial 28: lr=7.59e-04, hidden=32, bs=64


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 28] Epoch 01/20  train=0.0087  val=0.0023
[Trial 28] Epoch 02/20  train=0.0060  val=0.0019
[Trial 28] Epoch 03/20  train=0.0058  val=0.0019
[Trial 28] Epoch 04/20  train=0.0057  val=0.0020
[Trial 28] Epoch 05/20  train=0.0057  val=0.0019
[Trial 28] Epoch 06/20  train=0.0057  val=0.0026
[Trial 28] Epoch 07/20  train=0.0056  val=0.0019
[Trial 28] Epoch 08/20  train=0.0056  val=0.0018
[Trial 28] Epoch 09/20  train=0.0056  val=0.0019
[Trial 28] Epoch 10/20  train=0.0055  val=0.0018
[Trial 28] Epoch 11/20  train=0.0055  val=0.0021
[Trial 28] Epoch 12/20  train=0.0054  val=0.0017
[Trial 28] Epoch 13/20  train=0.0053  val=0.0018
[Trial 28] Epoch 14/20  train=0.0053  val=0.0019
[Trial 28] Epoch 15/20  train=0.0052  val=0.0019
[Trial 28] Epoch 16/20  train=0.0052  val=0.0018
[Trial 28] Epoch 17/20  train=0.0052  val=0.0018
[Trial 28] Epoch 18/20  train=0.0051  val=0.0018
[Trial 28] Epoch 19/20  train=0.0051  val=0.0018


[I 2025-07-29 15:56:16,779] Trial 28 finished with value: 0.0017309052168654846 and parameters: {'lr': 0.000758594447348659, 'hidden_size': 32, 'batch_size': 64}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 28] Epoch 20/20  train=0.0051  val=0.0018
→ Trial 28 finished: best_val=0.0017  MAE=18.8516  RMSE=24.4400  sMAPE=31.40%

→ Trial 29: lr=3.44e-04, hidden=32, bs=64


  lr          = trial.suggest_loguniform("lr", 1e-5, 1e-3)


[Trial 29] Epoch 01/20  train=0.0112  val=0.0041
[Trial 29] Epoch 02/20  train=0.0068  val=0.0025
[Trial 29] Epoch 03/20  train=0.0060  val=0.0023
[Trial 29] Epoch 04/20  train=0.0058  val=0.0019
[Trial 29] Epoch 05/20  train=0.0058  val=0.0022
[Trial 29] Epoch 06/20  train=0.0057  val=0.0019
[Trial 29] Epoch 07/20  train=0.0057  val=0.0021
[Trial 29] Epoch 08/20  train=0.0057  val=0.0018
[Trial 29] Epoch 09/20  train=0.0057  val=0.0018
[Trial 29] Epoch 10/20  train=0.0056  val=0.0019
[Trial 29] Epoch 11/20  train=0.0056  val=0.0019
[Trial 29] Epoch 12/20  train=0.0056  val=0.0021
[Trial 29] Epoch 13/20  train=0.0056  val=0.0022
[Trial 29] Epoch 14/20  train=0.0056  val=0.0019
[Trial 29] Epoch 15/20  train=0.0056  val=0.0019
[Trial 29] Epoch 16/20  train=0.0056  val=0.0018
[Trial 29] Epoch 17/20  train=0.0056  val=0.0019
[Trial 29] Epoch 18/20  train=0.0055  val=0.0020
[Trial 29] Epoch 19/20  train=0.0055  val=0.0019


[I 2025-07-29 15:57:41,658] Trial 29 finished with value: 0.0017878655850401392 and parameters: {'lr': 0.00034437161892647236, 'hidden_size': 32, 'batch_size': 64}. Best is trial 12 with value: 0.0017215024892483716.


[Trial 29] Epoch 20/20  train=0.0055  val=0.0018
→ Trial 29 finished: best_val=0.0018  MAE=19.3614  RMSE=24.7975  sMAPE=32.09%
✔ Stage 1 done: {'lr': 0.0009637770438198408, 'hidden_size': 64, 'batch_size': 128} val_loss= 0.0017215024892483716
