In [1]:
!bash scripts/LSTM.sh

Traceback (most recent call last):
  File "/home/RDC/inceemir/power/run.py", line 4, in <module>
    from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
  File "/home/RDC/inceemir/power/exp/exp_long_term_forecasting.py", line 1, in <module>
    from data_provider.data_factory import data_provider
  File "/home/RDC/inceemir/power/data_provider/data_factory.py", line 3, in <module>
    from data_provider.data_loader import Dataset_Custom
  File "/home/RDC/inceemir/power/data_provider/data_loader.py", line 3, in <module>
    import pandas as pd
  File "/home/RDC/inceemir/power/.venv/lib64/python3.9/site-packages/pandas/__init__.py", line 22, in <module>
    from pandas.compat import is_numpy_dev as _is_numpy_dev  # pyright: ignore # noqa:F401
  File "/home/RDC/inceemir/power/.venv/lib64/python3.9/site-packages/pandas/compat/__init__.py", line 18, in <module>
    from pandas.compat.numpy import (
  File "/home/RDC/inceemir/power/.venv/lib64/python3.9/site-packages/pandas/compa

In [None]:
# ---------------------------- imports ----------------------------
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

# ------------------------ reproducibility ------------------------
torch.manual_seed(2021)
np.random.seed(2021)

# ---------------------- 1. load the data -------------------------
CSV_PATH = "./data/causal_data.csv"        # adjust if needed
df = pd.read_csv(CSV_PATH, parse_dates=["date"])

# keep just the columns we need
cols_x = ["solar_forecast", "wind_forecast", "total_load"]
col_y  = "electricity_price"
df = df[["date"] + cols_x + [col_y]].dropna()

# ------------------ 2. chronological splits ----------------------
train_end = "2023-12-31 23:00:00"
val_end   = "2024-06-30 23:00:00"

train_df = df[df["date"] <= train_end]
val_df   = df[(df["date"] > train_end) & (df["date"] <= val_end)]
test_df  = df[df["date"] > val_end]

# ------------------ 3. standardise each split --------------------
scaler_x = StandardScaler()
scaler_y = StandardScaler()

def scale_xy(split_df, fit=False):
    x = split_df[cols_x].values
    y = split_df[[col_y]].values            # 2-D for sklearn
    if fit:
        x = scaler_x.fit_transform(x)
        y = scaler_y.fit_transform(y)
    else:
        x = scaler_x.transform(x)
        y = scaler_y.transform(y)
    return x.astype(np.float32), y.astype(np.float32)

x_train, y_train = scale_xy(train_df, fit=True)
x_val,   y_val   = scale_xy(val_df)
x_test,  y_test  = scale_xy(test_df)

# --------------- 4. make (X,Y) sequences for LSTM ---------------
SEQ_LEN  = 168     # look-back (hours)
HORIZON  = 24      # predict next 24 hours

def make_sequences(x, y):
    X_seq, Y_seq = [], []
    for i in range(len(x) - SEQ_LEN - HORIZON + 1):
        X_seq.append(x[i : i + SEQ_LEN])
        Y_seq.append(y[i + SEQ_LEN : i + SEQ_LEN + HORIZON].T)  # -> shape (1,24)
    X_seq = np.stack(X_seq)                    # [N, 168, 3]
    Y_seq = np.concatenate(Y_seq, axis=0)      # [N, 24]
    return torch.tensor(X_seq), torch.tensor(Y_seq)

Xtr, Ytr = make_sequences(x_train, y_train)
Xvl, Yvl = make_sequences(x_val,   y_val)
Xts, Yts = make_sequences(x_test,  y_test)

train_loader = DataLoader(TensorDataset(Xtr, Ytr), batch_size=32, shuffle=True)
val_loader   = DataLoader(TensorDataset(Xvl, Yvl), batch_size=32)

# --------------------- 5. LSTM baseline --------------------------
class PriceLSTM(nn.Module):
    def __init__(self, n_in: int, hidden: int = 64, n_layers: int = 2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=n_in,
            hidden_size=hidden,
            num_layers=n_layers,
            batch_first=True,
        )
        self.proj = nn.Linear(hidden, HORIZON)   # 24-step head

    def forward(self, x):             # x : [B, 168, 3]
        _, (h_n, _) = self.lstm(x)    # use last hidden state
        out = self.proj(h_n[-1])      # [B, 24]
        return out

device = "cuda" if torch.cuda.is_available() else "cpu"
model  = PriceLSTM(n_in=len(cols_x)).to(device)

# optimise MSE, early-stop on val MSE
criterion = nn.MSELoss()
opt       = torch.optim.Adam(model.parameters(), lr=1e-3)
PATIENCE  = 3
best_val  = np.inf
pat_cnt   = 0

# -------------------- 6. training loop ---------------------------
for epoch in range(1, 51):   # hard cap 50 epochs
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        opt.step()

    # ----- validation -----
    model.eval()
    val_losses = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            val_losses.append(criterion(model(xb), yb).item())
    val_mse = np.mean(val_losses)
    print(f"Epoch {epoch:02d}  |  val MSE = {val_mse:.4f}")

    # early stopping
    if val_mse < best_val - 1e-4:
        best_val = val_mse
        pat_cnt  = 0
        best_state = model.state_dict()
    else:
        pat_cnt += 1
        if pat_cnt >= PATIENCE:
            print("Early stop triggered.")
            break

# reload best weights
model.load_state_dict(best_state)

# ---------------- 7. evaluate on *test* set ----------------------
model.eval()
with torch.no_grad():
    yhat_std = model(Xts.to(device)).cpu().numpy()   # std-scale
y_true_std = Yts.numpy()

# inverse scaling
yhat = scaler_y.inverse_transform(yhat_std)
ytrue = scaler_y.inverse_transform(y_true_std)

mae = mean_absolute_error(ytrue, yhat)
mse = mean_squared_error(ytrue, yhat)

print(f"\n====  LSTM baseline  ====")
print(f"MAE = {mae:.3f}  |  MSE = {mse:.3f}")

Epoch 01  |  val MSE = 0.6986
Epoch 02  |  val MSE = 0.3234
Epoch 03  |  val MSE = 0.2495
Epoch 04  |  val MSE = 0.3970
Epoch 05  |  val MSE = 0.4434
Epoch 06  |  val MSE = 0.3555
Early stop triggered.

====  LSTM baseline  ====
MAE = 37.913  |  MSE = 3691.247


In [11]:
!bash scripts/LSTM.sh

Traceback (most recent call last):
  File "/home/RDC/inceemir/power/run.py", line 4, in <module>
    from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
  File "/home/RDC/inceemir/power/exp/exp_long_term_forecasting.py", line 1, in <module>
    from data_provider.data_factory import data_provider
  File "/home/RDC/inceemir/power/data_provider/data_factory.py", line 3, in <module>
    from data_provider.data_loader import Dataset_Custom
  File "/home/RDC/inceemir/power/data_provider/data_loader.py", line 3, in <module>
    import pandas as pd
  File "/home/RDC/inceemir/power/.venv/lib64/python3.9/site-packages/pandas/__init__.py", line 22, in <module>
    from pandas.compat import is_numpy_dev as _is_numpy_dev  # pyright: ignore # noqa:F401
  File "/home/RDC/inceemir/power/.venv/lib64/python3.9/site-packages/pandas/compat/__init__.py", line 18, in <module>
    from pandas.compat.numpy import (
  File "/home/RDC/inceemir/power/.venv/lib64/python3.9/site-packages/pandas/compa

In [10]:
# ---------------------------- imports ----------------------------
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

# ------------------------ reproducibility ------------------------
torch.manual_seed(2021)
np.random.seed(2021)

# ---------------------- 1. load the data -------------------------
CSV_PATH = "./data/causal_data.csv"        # adjust if needed
df = pd.read_csv(CSV_PATH, parse_dates=["date"])

cols_x = ["solar_forecast", "wind_forecast", "total_load"]
col_y  = "electricity_price"
df     = df[["date"] + cols_x + [col_y]].dropna()

# ------------------ 2. chronological splits ----------------------
train_end = "2023-12-31 23:00:00"
val_end   = "2024-06-30 23:00:00"

train_df = df[df["date"] <= train_end]
val_df   = df[(df["date"] > train_end) & (df["date"] <= val_end)]
test_df  = df[df["date"] > val_end]

# ------------------ 3. standardise each split --------------------
scaler_x = StandardScaler()
scaler_y = StandardScaler()

def scale_xy(split_df, fit=False):
    x = split_df[cols_x].values
    y = split_df[[col_y]].values            # keep 2-D shape
    if fit:
        x = scaler_x.fit_transform(x)
        y = scaler_y.fit_transform(y)
    else:
        x = scaler_x.transform(x)
        y = scaler_y.transform(y)
    return x.astype(np.float32), y.astype(np.float32)

x_train, y_train = scale_xy(train_df, fit=True)
x_val,   y_val   = scale_xy(val_df)
x_test,  y_test  = scale_xy(test_df)

# --------------- 4. make (X,Y) sequences for LSTM ---------------
SEQ_LEN  = 168     # look-back (hours)
HORIZON  = 24      # forecast window

def make_sequences(x, y):
    X_seq, Y_seq = [], []
    for i in range(len(x) - SEQ_LEN - HORIZON + 1):
        X_seq.append(x[i : i + SEQ_LEN])
        # collect the *vector* of next-24 values of y
        Y_seq.append(y[i + SEQ_LEN : i + SEQ_LEN + HORIZON].T)  # shape (1,24)
    return torch.tensor(np.stack(X_seq)), torch.tensor(np.concatenate(Y_seq))

Xtr, Ytr = make_sequences(x_train, y_train)
Xvl, Yvl = make_sequences(x_val,   y_val)
Xts, Yts = make_sequences(x_test,  y_test)

train_loader = DataLoader(TensorDataset(Xtr, Ytr), batch_size=64, shuffle=True)
val_loader   = DataLoader(TensorDataset(Xvl, Yvl), batch_size=64)

# --------------------- 5. LSTM baseline --------------------------
class PriceLSTM(nn.Module):
    def __init__(self, n_in: int, hidden: int = 128, n_layers: int = 2,
                 dropout: float = 0.1):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=n_in,
            hidden_size=hidden,
            num_layers=n_layers,
            dropout=dropout,
            batch_first=True,
        )
        self.proj = nn.Linear(hidden, HORIZON)   # 24-step head

    def forward(self, x):             # x : [B, 168, 3]
        _, (h_n, _) = self.lstm(x)
        out = self.proj(h_n[-1])      # [B, 24]
        return out

device = "cuda" if torch.cuda.is_available() else "cpu"
model  = PriceLSTM(n_in=len(cols_x)).to(device)

criterion = nn.MSELoss()
opt       = torch.optim.Adam(model.parameters(), lr=3e-3)
PATIENCE  = 5
best_val  = np.inf
pat_cnt   = 0

# -------------------- 6. training loop ---------------------------
for epoch in range(1, 101):   # up to 100 epochs
    # ---- train ----
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        opt.step()

    # ---- validate ----
    model.eval()
    val_losses = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            val_losses.append( criterion(model(xb), yb).item() )
    val_mse = np.mean(val_losses)
    print(f"Epoch {epoch:03d} | val MSE = {val_mse:.4f}")

    # early stopping
    if val_mse < best_val - 1e-4:
        best_val = val_mse
        pat_cnt  = 0
        best_state = model.state_dict()
    else:
        pat_cnt += 1
        if pat_cnt >= PATIENCE:
            print("Early stop.")
            break

# reload best weights
model.load_state_dict(best_state)

# ---------------- 7. evaluate on *test* set ----------------------
model.eval()
with torch.no_grad():
    yhat_std = model(Xts.to(device)).cpu().numpy()
y_true_std = Yts.numpy()

# ---- metrics in standardised space (same as other models) ----
mae_std = mean_absolute_error(y_true_std, yhat_std)
mse_std = mean_squared_error(y_true_std, yhat_std)
print("\n=====  LSTM baseline (standardised scale)  =====")
print(f"MAE = {mae_std:.4f}   |   MSE = {mse_std:.4f}")

# ------------- optional: metrics in real € scale ----------------
yhat_real  = scaler_y.inverse_transform(yhat_std)
ytrue_real = scaler_y.inverse_transform(y_true_std)
mae_real   = mean_absolute_error(ytrue_real, yhat_real)
mse_real   = mean_squared_error(ytrue_real, yhat_real)
print("\n(inverse-transformed – for intuition only)")
print(f"MAE = {mae_real:.2f} €   |   MSE = {mse_real:.2f}  €²")

Epoch 001 | val MSE = 0.5682
Epoch 002 | val MSE = 0.6094
Epoch 003 | val MSE = 0.6481
Epoch 004 | val MSE = 0.8603
Epoch 005 | val MSE = 0.7203
Epoch 006 | val MSE = 0.5282
Epoch 007 | val MSE = 0.5844
Epoch 008 | val MSE = 0.1443
Epoch 009 | val MSE = 0.3995
Epoch 010 | val MSE = 0.4903
Epoch 011 | val MSE = 0.4469
Epoch 012 | val MSE = 0.3456
Epoch 013 | val MSE = 0.3528
Early stop.

=====  LSTM baseline (standardised scale)  =====
MAE = 0.2951   |   MSE = 0.2468

(inverse-transformed – for intuition only)
MAE = 37.62 €   |   MSE = 4010.33  €²


In [8]:
# -------------------------------------------------------------
#  baseline_regression_and_mlp.py
#  ------------------------------------------------------------
#  - LinearRegression (scikit-learn)
#  - One-hidden-layer MLP (PyTorch)
# -------------------------------------------------------------
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime

# ------------------------------------------------------------------
# libraries for the two baselines
# ------------------------------------------------------------------
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# -------------------------- SETTINGS ------------------------------
CSV_PATH   = Path("data/causal_data.csv")
SEQ_LEN    = 168          # hours fed into the model
PRED_LEN   = 24           # hours predicted
BATCH_SIZE = 256          # only for the small MLP
EPOCHS     = 30
LR         = 1e-3
PATIENCE   = 3
DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ------------------------ LOAD & PREP DATA ------------------------
print("Loading CSV …")
df = (pd.read_csv(CSV_PATH, parse_dates=["date"])
        .sort_values("date")
        .set_index("date"))

cols_in   = ["electricity_price", "solar_forecast",
             "wind_forecast", "total_load"]
target_col = "electricity_price"

# -------------- split into train / val / test chronologically -----
train_end = "2023-12-31 23:00:00"
val_end   = "2024-06-30 23:00:00"

df_train = df.loc[:train_end,  cols_in]
df_val   = df.loc[train_end:val_end,  cols_in]
df_test  = df.loc[val_end:,  cols_in]

# --------------------------- SCALING ------------------------------
scaler_x = StandardScaler()
scaler_y = StandardScaler()

def scale_block(block):
    X = scaler_x.transform(block[cols_in])
    y = scaler_y.transform(block[[target_col]])
    return X, y

# fit on *training* only
scaler_x.fit(df_train[cols_in])
scaler_y.fit(df_train[[target_col]])

# ---------------- create rolling windows --------------------------
def make_windows(block):
    Xs, ys = [], []
    a = block.values
    for i in range(len(a) - SEQ_LEN - PRED_LEN + 1):
        seq_x = a[i:i+SEQ_LEN]                 # 168 × 4
        seq_y = a[i+SEQ_LEN:i+SEQ_LEN+PRED_LEN, 0]  # 24 ×  (price column)
        Xs.append(seq_x)                       # keep 2-D shape
        ys.append(seq_y)
    return np.stack(Xs), np.stack(ys)

X_tr, y_tr = make_windows(df_train)
X_va, y_va = make_windows(df_val)
X_te, y_te = make_windows(df_test)

# flatten sequences for the regression baseline
X_tr_flat = X_tr.reshape(len(X_tr), -1)
X_va_flat = X_va.reshape(len(X_va), -1)
X_te_flat = X_te.reshape(len(X_te), -1)

# ================================================================
# 1)  LINEAR-REGRESSION BASELINE
# ================================================================
print("\nTraining LinearRegression …")
lin = LinearRegression(n_jobs=-1).fit(X_tr_flat, y_tr)

y_pred_lr = lin.predict(X_te_flat)
mae_lr = mean_absolute_error(y_te, y_pred_lr)
mse_lr = mean_squared_error(y_te, y_pred_lr)

print(f"[Linear]  MAE = {mae_lr:.4f}   MSE = {mse_lr:.4f}")

# ================================================================
# 2)  MLP BASELINE  (one hidden layer)
# ================================================================
class TinyMLP(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(512, out_dim)
        )
    def forward(self, x):
        return self.net(x)

in_dim  = SEQ_LEN * len(cols_in)      # 168 × 4  -> 672
out_dim = PRED_LEN                    # 24

model = TinyMLP(in_dim, out_dim).to(DEVICE)
opt   = torch.optim.Adam(model.parameters(), lr=LR)
loss_fn = nn.MSELoss()

def to_loader(X, y, shuffle):
    X_t = torch.tensor(X.reshape(len(X), -1), dtype=torch.float32)
    y_t = torch.tensor(y,                 dtype=torch.float32)
    return DataLoader(TensorDataset(X_t, y_t),
                      batch_size=BATCH_SIZE, shuffle=shuffle)

dl_tr = to_loader(X_tr, y_tr, shuffle=True)
dl_va = to_loader(X_va, y_va, shuffle=False)

best_val = np.inf
stuck    = 0

print("\nTraining 1-hidden-layer MLP …")
for epoch in range(1, EPOCHS+1):
    model.train()
    for xb, yb in dl_tr:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        opt.zero_grad()
        pred = model(xb)
        loss = loss_fn(pred, yb)
        loss.backward()
        opt.step()

    # ---- validation ---
    model.eval()
    with torch.no_grad():
        val_pred = []
        for xb, _ in dl_va:
            xb = xb.to(DEVICE)
            val_pred.append(model(xb).cpu().numpy())
        val_pred = np.concatenate(val_pred, axis=0)
        val_loss = mean_squared_error(y_va, val_pred)

    print(f"Epoch {epoch:2d}  val-MSE = {val_loss:.4f}")

    if val_loss < best_val - 1e-6:
        best_val = val_loss
        best_state = model.state_dict()
        stuck = 0
    else:
        stuck += 1
        if stuck >= PATIENCE:
            print("Early-stopping.")
            break

# ---------- test set ----------
model.load_state_dict(best_state)
model.eval()
with torch.no_grad():
    X_te_flat = torch.tensor(X_te.reshape(len(X_te), -1),
                             dtype=torch.float32).to(DEVICE)
    y_pred_mlp = model(X_te_flat).cpu().numpy()

mae_mlp = mean_absolute_error(y_te, y_pred_mlp)
mse_mlp = mean_squared_error(y_te, y_pred_mlp)

print(f"[Tiny-MLP] MAE = {mae_mlp:.4f}   MSE = {mse_mlp:.4f}")

# ---------------------------------------------------------------
#  The MAE / MSE numbers above are what you put into the results
#  tables next to TimeXer, iTransformer, PatchTST, Crossformer …
# ---------------------------------------------------------------

Loading CSV …

Training LinearRegression …
[Linear]  MAE = 25.9635   MSE = 1894.7248

Training 1-hidden-layer MLP …
Epoch  1  val-MSE = 11941.0274
Epoch  2  val-MSE = 7718.5826
Epoch  3  val-MSE = 6999.1241
Epoch  4  val-MSE = 6673.1845
Epoch  5  val-MSE = 6491.6193
Epoch  6  val-MSE = 6383.7500
Epoch  7  val-MSE = 6306.0333
Epoch  8  val-MSE = 6250.1228
Epoch  9  val-MSE = 6204.0284
Epoch 10  val-MSE = 6179.6426
Epoch 11  val-MSE = 6169.9750
Epoch 12  val-MSE = 6150.2302
Epoch 13  val-MSE = 6133.3141
Epoch 14  val-MSE = 6117.1433
Epoch 15  val-MSE = 6114.1509
Epoch 16  val-MSE = 6136.4535
Epoch 17  val-MSE = 6123.0418
Epoch 18  val-MSE = 6112.8399
Epoch 19  val-MSE = 6119.3832
Epoch 20  val-MSE = 6123.4493
Epoch 21  val-MSE = 6114.8088
Early-stopping.
[Tiny-MLP] MAE = 92.3062   MSE = 12717.9125


In [1]:
import pandas as pd
import numpy as np
from seasonal_naive import seasonal_naive
from sklearn.metrics import mean_absolute_error, mean_squared_error

# ----------------------------------------------------------
# 1) Load your CSV exactly as you do for the neural models
# ----------------------------------------------------------
CSV_PATH = "./data/causal_data.csv"
df = pd.read_csv(CSV_PATH, parse_dates=["date"])

# keep only the price column of the *test* period
test_start = "2024-07-01 00:00:00"
test_prices = (
    df.loc[df["date"] >= test_start, "electricity_price"]
      .astype(np.float32)
      .values            # → 1-D ndarray
)

# ----------------------------------------------------------
# 2) Generate forecasts with the helper
# ----------------------------------------------------------
y_hat, y_true = seasonal_naive(
    series      = test_prices,
    horizon     = 24,       # predict 24 hours
    season_lag  = 24        # use yesterday’s values
)

# ----------------------------------------------------------
# 3) Evaluate
# ----------------------------------------------------------
mae = mean_absolute_error(y_true, y_hat)
mse = mean_squared_error(y_true, y_hat)

print(f"Seasonal-naïve  MAE = {mae:.2f} | MSE = {mse:.2f}")

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject