# LSTM Forecasting

In [1]:
# --- Repo path bootstrap so imports like `from price_forecast...` work ---
import os, sys
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname("__file__"), ".."))
SRC_ROOT = os.path.join(REPO_ROOT, "src")
if SRC_ROOT not in sys.path:
    sys.path.insert(0, SRC_ROOT)

# Optional, for iterative dev:

data_dir = os.path.join(REPO_ROOT, "data")  # where data is stored

In [2]:
SRC_ROOT

'c:\\Users\\HarrisDeralas\\Downloads\\panagiotis_files\\price_forecast\\src'

In [3]:
data_dir

'c:\\Users\\HarrisDeralas\\Downloads\\panagiotis_files\\price_forecast\\data'

## Build the dataset

In [4]:
import pandas as pd
import numpy as np

from price_forecast.config import (
    TimezoneConfig, Naming, DAMConfig, WeatherConfig, WeatherSite
)
from price_forecast.pipeline.dataset_builder import DatasetBuilder
from price_forecast.utils.merge import TimeSeriesMerger


In [5]:
from entsoe import EntsoePandasClient
client = EntsoePandasClient(api_key="7260b39e-b41a-4905-a402-4a65bd531f3f")

In [6]:
tz_cfg = TimezoneConfig()
naming = Naming(dt_local="datetime_local", dam_price="dam_price_eur_mwh")
dam_cfg = DAMConfig(base_dir="../data")
sites = [
    WeatherSite("athens", 37.98, 23.72),
    WeatherSite("thessaloniki", 40.64, 22.94),
    WeatherSite("heraklion", 35.34, 25.13),
]
wx_cfg = WeatherConfig()

In [7]:
builder = DatasetBuilder(
    tz_cfg=tz_cfg,
    naming=naming,
    dam_cfg=dam_cfg,
    weather_sites=sites,
    weather_cfg=wx_cfg,
    entsoe_client=client,
    entsoe_prefix="entsoe_",
)

In [8]:
df  = builder.create_dataset(start_date="2020-11-01", end_date="2023-12-31", add_prev_day=True)

Finished building DAM data.
Finished building ENTSO-E data.
Finished building Weather data.
Finished merging all data on UTC axis.
Converted merged data back to local timezone.
Added previous-day DAM price feature.


## Visualize the DAM series

In [13]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=df["datetime_local"],
        y=df["dam_price_eur_mwh"],
        name="DAM Price",
        mode="lines"
    )
)

fig.add_trace(
    go.Scatter(
        x=df["datetime_local"],
        y=df["previous_day_dam"],
        name="Previous Day DAM Price",
        mode="lines"
    )
)

fig.update_layout(
    title="Day-Ahead Market Prices",
    xaxis_title="Date",
    yaxis_title="Price (EUR/MWh)",
    hovermode="x unified",
    xaxis=dict(rangeslider=dict(visible=True), type="date"),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
)

fig.show()


## Generate dataset for LSTM ingestion

In [18]:
from price_forecast.config import DatasetCfg

cfgd = DatasetCfg(n_lookback_days=30,test_size=0.2,put_main_first = True)

In [19]:
from price_forecast.datasets.daily_sequence import DailySequenceDataset

builder = DailySequenceDataset(df, cfgd)
data = builder.build(return_dfs=True)

# scaled for training
X_train, y_train = data["X_train"], data["y_train"]

# unscaled for metrics/plots/inverse checks
X_train_raw, y_train_raw = data["X_train_raw"], data["y_train_raw"]


# tidy inspection (raw vs scaled)
X_train_df_raw = data["X_train_df_raw"]
X_train_df_scaled = data["X_train_df"]
y_train_df_raw = data["y_train_df_raw"]
X_test_df_raw = data["X_test_df_raw"]
y_test_df_raw = data["y_test_df_raw"]

  if not np.all(diffs.view("i8") == pd.Timedelta(hours=1).value):


In [17]:
cfgd = DatasetCfg(
    datetime_col="datetime_local",
    target_col="dam_price_eur_mwh",
    n_lookback_days=60,
    test_size=0.2,
    main_series="previous_day_dam",
    put_main_first=True,
)

builder = DailySequenceDataset(df, cfgd)
data = builder.build(return_dfs=True)

# scaled for training
X_train, y_train = data["X_train"], data["y_train"]

# unscaled for metrics/plots/inverse checks
X_train_raw, y_train_raw = data["X_train_raw"], data["y_train_raw"]

# tidy inspection (raw vs scaled)
X_train_df_raw = data["X_train_df_raw"]
X_train_df_scaled = data["X_train_df"]
y_train_df_raw = data["y_train_df_raw"]
X_test_df_raw = data["X_test_df_raw"]
y_test_df_raw = data["y_test_df_raw"]


# scaled arrays for training
X_train, X_test = data["X_train"], data["X_test"]
y_train, y_test = data["y_train"], data["y_test"]




Series.view is deprecated and will be removed in a future version. Use ``astype`` as an alternative to change the dtype.



## Train the LSTM(s)

In [18]:
# X_train: (batch, input_steps, n_features)
# y_train: (batch, output_steps, output_dim)
from price_forecast.models.lstm_seq2seq import LSTMSeq2SeqModel
model = LSTMSeq2SeqModel.from_training_data(
    X_train, y_train,
    lstm_units=15*8,
    dropout=0.2,
)

# Optional explicit tail validation split
split = int(0.9 * X_train.shape[0])
history = model.fit(
    X_train=X_train[:split], y_train=y_train[:split],
    X_val=X_train[split:],   y_val=y_train[split:],
    epochs=60, batch_size=64, patience=10, reduce_lr_patience=5,
    checkpoint_path="best_lstm.keras",
    verbose=1,
)

# Predict, invert scaling with your builder, evaluate, and persist
y_pred_scaled = model.predict(X_test)
y_pred = builder.inverse_transform_target(y_pred_scaled)
y_true = builder.inverse_transform_target(y_test)
print(LSTMSeq2SeqModel.evaluate(y_true, y_pred))






Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



Epoch 1/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1.2069 - mae: 0.9508
Epoch 1: val_loss improved from None to 0.18644, saving model to best_lstm.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - loss: 1.5690 - mae: 1.0026 - val_loss: 0.1864 - val_mae: 0.3560 - learning_rate: 0.0010
Epoch 2/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.4189 - mae: 0.5557
Epoch 2: val_loss did not improve from 0.18644
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - loss: 0.4424 - mae: 0.5240 - val_loss: 0.1941 - val_mae: 0.3651 - learning_rate: 0.0010
Epoch 3/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.3110 - mae: 0.4616
Epoch 3: val_loss improved from 0.18644 to 0.14745, saving model to best_lstm.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 2s/step - loss: 0.3796 - mae: 0.4818 - val_loss: 0.1475 - val

In [19]:
# --- Imports (adjust the import path to your package layout) ---
from price_forecast.models.lstm_seq2seq_plus import LSTMSeq2SeqPlus  # the new model
from price_forecast.models.lstm_seq2seq import LSTMSeq2SeqModel      # old model (for evaluate reuse)

# X_train: (batch, input_steps, n_features)
# y_train: (batch, output_steps, output_dim)

# 1) Build the model by inferring shapes from data, with all hyperparams explicit
model_plus = LSTMSeq2SeqPlus.from_training_data(
    X_train=X_train,
    y_train=y_train,
    # --- Architecture ---
    enc_units=(128, 64),        # stacked encoder LSTMs; try (256,128) or (64,32)
    dec_units=128,              # decoder LSTM size; try 64/256
    bidirectional=True,         # use BiLSTM in encoder
    attention=True,             # AdditiveAttention over encoder sequence
    dropout=0.2,                # dropout between LSTM timesteps (Keras 'dropout')
    recurrent_dropout=0.0,      # use sparingly; slows training if >0
    layernorm=True,             # LayerNormalization after enc/dec
    residual_last24=True,       # concat projected last-24 inputs into the head
    # --- Training config (compile) ---
    optimizer="adam",
    loss="mse",
    metrics=("mae",),           # can add more, e.g. ("mae","mape")
    # --- Reproducibility ---
    seed=42,
)

# 2) Train (explicit tail validation split to keep temporal order)
split = int(0.9 * X_train.shape[0])
history_plus = model_plus.fit(
    X_train=X_train[:split],
    y_train=y_train[:split],
    X_val=X_train[split:],
    y_val=y_train[split:],
    epochs=80,                 # a bit higher by default for the larger model
    batch_size=64,
    patience=10,
    min_delta=1e-4,
    reduce_lr_patience=5,
    checkpoint_path="best_lstm_plus.keras",  # saves the full model
    verbose=1,
    shuffle=False,             # keep temporal ordering
)

# 3) Predict (scaled), inverse-transform, evaluate, and persist
y_pred_scaled_plus = model_plus.predict(X_test, batch_size=256, verbose=0)
y_pred_plus = builder.inverse_transform_target(y_pred_scaled_plus)
y_true = builder.inverse_transform_target(y_test)

print("PLUS metrics:", LSTMSeq2SeqModel.evaluate(y_true, y_pred_plus))  # reuse the same evaluate helper

# 4) Save & (optionally) restore
model_plus.save("final_lstm_plus.keras")
restored_plus = LSTMSeq2SeqPlus.load("best_lstm_plus.keras")


Epoch 1/80
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29s/step - loss: 2.8850 - mae: 1.4195 
Epoch 1: val_loss improved from None to 2.15707, saving model to best_lstm_plus.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m401s[0m 30s/step - loss: 3.3598 - mae: 1.5274 - val_loss: 2.1571 - val_mae: 1.4005 - learning_rate: 0.0010
Epoch 2/80
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41s/step - loss: 3.3565 - mae: 1.6086 
Epoch 2: val_loss improved from 2.15707 to 0.68624, saving model to best_lstm_plus.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m531s[0m 42s/step - loss: 3.6861 - mae: 1.6106 - val_loss: 0.6862 - val_mae: 0.7584 - learning_rate: 0.0010
Epoch 3/80
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90s/step - loss: 1.3203 - mae: 0.9470  
Epoch 3: val_loss improved from 0.68624 to 0.39886, saving model to best_lstm_plus.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1141s

## Naive predictions (benchmark) [same as previous day]

In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

test_start_date = y_test_df_raw['datetime'].min()
test_df = df[df['datetime_local'] >= test_start_date].copy()
naive_mae = mean_absolute_error(test_df['dam_price_eur_mwh'], test_df['previous_day_dam'])
naive_mse = mean_squared_error(test_df['dam_price_eur_mwh'], test_df['previous_day_dam'])
print("Naive (previous day) baseline:")
print("MSE:", naive_mse, "MAE:", naive_mae)

NameError: name 'y_test_df_raw' is not defined

## Test with relative features

In [9]:
datetime_col = "datetime_local"
target_col   = "dam_price_eur_mwh"
main_series  = "previous_day_dam"

In [10]:
from sklearn.preprocessing import StandardScaler
from price_forecast.config import DatasetCfg
from price_forecast.config import SafeStandardScaler

cfg = DatasetCfg(
    datetime_col=datetime_col,
    target_col=target_col,
    main_series=main_series,        # used for feature ordering
    put_main_first=True,            # keep previous_day_dam as first feature
    n_lookback_days=30,              # encoder lookback (=> input_steps = 7*24)
    test_size=0.2,                  # tail split by whole days
    scale_features=True,
    feature_scaler=SafeStandardScaler(),
    scale_target=True,
    target_scaler=StandardScaler())

In [11]:
from price_forecast.datasets.daily_sequence import DailySequenceDataset
builder = DailySequenceDataset(
    df, cfg,
    target_as_relative=True,
    relative_feature_cols=[main_series],  # turn previous_day_dam into rel-diffs
    epsilon=1e-6,
    fill_value=0.0,
)

  if not np.all(diffs.view("i8") == pd.Timedelta(hours=1).value):


In [12]:
out = builder.build(return_dfs=True)

X_train, X_test = out["X_train"], out["X_test"]        # (S, N*24, F)
y_train, y_test = out["y_train"], out["y_test"]        # (S, 24, 1)


# tidy inspection (raw vs scaled)
X_train_df_raw = out["X_train_df_raw"]
X_train_df_scaled = out["X_train_df"]
y_train_df_raw = out["y_train_df_raw"]
X_test_df_raw = out["X_test_df_raw"]
y_test_df_raw = out["y_test_df_raw"]

# For relative targets, we’ll need these to invert predictions back to absolute:
y_train_bases = out.get("y_train_bases", None)
y_test_bases  = out.get("y_test_bases", None)

print(
    f"X_train: {X_train.shape}, y_train: {y_train.shape} | "
    f"X_test: {X_test.shape}, y_test: {y_test.shape}"
)


X_train: (901, 720, 28), y_train: (901, 24, 1) | X_test: (225, 720, 28), y_test: (225, 24, 1)


In [13]:
from price_forecast.models.lstm_seq2seq import LSTMSeq2SeqModel

model = LSTMSeq2SeqModel.from_training_data(
    X_train, y_train,
    lstm_units=15 * 8,      # your earlier choice
    dropout=0.2,
    recurrent_dropout=0.0,
    optimizer="adam",
    loss="mse",
    metrics=("mae",),
    seed=42,
)

# Tail validation split is handled inside .fit() if you don't pass X_val/y_val
history = model.fit(
    X_train=X_train, y_train=y_train,
    epochs=60,
    batch_size=64,
    patience=10,
    reduce_lr_patience=5,
    checkpoint_path="best_lstm.keras",   # saves the full model
    verbose=1,
    shuffle=False,                       # keep temporal ordering
)



  super().__init__(**kwargs)


Epoch 1/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 902ms/step - loss: 0.4594 - mae: 0.3917
Epoch 1: val_loss improved from None to 0.51244, saving model to best_lstm.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - loss: 1.0847 - mae: 0.6096 - val_loss: 0.5124 - val_mae: 0.4709 - learning_rate: 0.0010
Epoch 2/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 991ms/step - loss: 0.4720 - mae: 0.4131
Epoch 2: val_loss improved from 0.51244 to 0.50472, saving model to best_lstm.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - loss: 1.0916 - mae: 0.6274 - val_loss: 0.5047 - val_mae: 0.4588 - learning_rate: 0.0010
Epoch 3/60
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 889ms/step - loss: 0.4340 - mae: 0.3610
Epoch 3: val_loss improved from 0.50472 to 0.50100, saving model to best_lstm.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 933ms/step - lo

In [14]:
y_pred_test_scaled = model.predict(X_test, batch_size=256, verbose=0)

In [15]:
# The dataset helper handles both scaling-only and relative+scaling modes.
if y_test_bases is not None:
    # Relative mode: provide per-sample bases
    y_pred_test_abs = builder.inverse_transform_target(y_pred_test_scaled, bases=y_test_bases)
    y_true_test_abs = builder.inverse_transform_target(y_test,             bases=y_test_bases)
else:
    # Absolute mode (no relative deltas): inverse scaling only (or no-op if disabled)
    y_pred_test_abs = builder.inverse_transform_target(y_pred_test_scaled)
    y_true_test_abs = builder.inverse_transform_target(y_test)

# -------------------------
# 7) Simple evaluation (MAE/MSE)
# -------------------------
metrics = LSTMSeq2SeqModel.evaluate(y_true_test_abs, y_pred_test_abs)
print("Test metrics (abs units):", metrics)

Test metrics (abs units): {'mse': 6.961839310183534e+65, 'mae': 1.217391687341922e+31}
