# Bidirectional LSTMs

Bidirectional LSTM to capture both forward and backward dependencies in the time series data

In [1]:
import pandas as pd

# Load the dataset
file_path = "../data/clean_FeatEng.csv" #"../data/ml-engineer-dataset-clean.csv"
df_cleaned = pd.read_csv(file_path)

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Select relevant features
features = ["demand-forecast", "wind-forecast", "solar-forecast", "temperature-forecast", "day-ahead-auction-price"]
target = "day-ahead-auction-price"

# Scale data
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_cleaned[features])

# Convert data into sequences
def create_sequences(data, seq_length=24):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, -1])  # Predict price
    return np.array(X), np.array(y)

SEQ_LENGTH = 24  # Use past 24 hours to predict next price
X, y = create_sequences(df_scaled, SEQ_LENGTH)

# Train-test split
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")


2025-03-13 22:31:12.921198: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Train shape: (31817, 24, 5), Test shape: (7955, 24, 5)


# Define the BiLSTM Model

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout

def build_bilstm():
    model = Sequential([
        Bidirectional(LSTM(64, return_sequences=True), input_shape=(SEQ_LENGTH, X_train.shape[2])),
        Dropout(0.3),
        Bidirectional(LSTM(32, return_sequences=False)),
        Dropout(0.3),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mae')
    return model


# Train the BiLSTM Model

In [5]:
bilstm_model = build_bilstm()

history = bilstm_model.fit(
    X_train, y_train, validation_data=(X_test, y_test),
    epochs=50, batch_size=32, verbose=1
)

y_pred = bilstm_model.predict(X_test)

# Reverse scaling
y_pred_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_pred.reshape(-1, 1))))[:, -1]
y_test_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_test.reshape(-1, 1))))[:, -1]



Epoch 1/50


  super().__init__(**kwargs)


[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 16ms/step - loss: 0.0831 - val_loss: 0.0342
Epoch 2/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0365 - val_loss: 0.0318
Epoch 3/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0301 - val_loss: 0.0270
Epoch 4/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0274 - val_loss: 0.0235
Epoch 5/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 17ms/step - loss: 0.0263 - val_loss: 0.0235
Epoch 6/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0250 - val_loss: 0.0228
Epoch 7/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0248 - val_loss: 0.0240
Epoch 8/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0244 - val_loss: 0.0228
Epoch 9/50
[1m995/995[0m [32m━━━

NameError: name 'mean_absolute_error' is not defined

In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Calculate metrics
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))

print(f"BiLSTM Model - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


BiLSTM Model - MAE: 9.82, RMSE: 14.61


| Model                                            | Mae lower is better | Rmse lower is better |
| :----------------------------------------------- | :------------------ | :------------------- |
| Historical Average                               | 30.37               | 40.85                |
| Initial Linear Regression                        | 17.7                | 23.81                |
| Improved Linear Regression (Feature Engineering) | 16.24               | 21.42                |
| XGBoost (Default Settings)                       | 15.12               | 20.06                |
| Tuned XGBoost (Hyperparameter Search)            | 14.86               | 19.73                |
| Stacked Model (XGBoost + LightGBM + Ridge)       | 15.33               | 20.39                |
| lightGBM (tuned)                                 | 15.93               | 20.81                |
| Stacked Model with tuned LGBM                    | 15.47               | 20.41                |
| LSTMs                                            | 11.74               | 15.19                |
| CNN-LSTM                                         | 12.27               | 17.58                |
| **BiLSTM**                                       | **9.82**            | **14.61**            |

## Generate Lag Features

In [8]:
df = df_cleaned

In [9]:
# Create lag features for day-ahead price, demand, wind, and solar forecasts
lag_features = ['day-ahead-auction-price', 'demand-forecast', 'wind-forecast', 'solar-forecast']

for feature in lag_features:
    for lag in [1, 2, 3, 7]:  # 1-hour, 2-hour, 3-hour, 1-day lags
        df[f'{feature}_lag{lag}'] = df[feature].shift(lag)


## Add Rolling Statistics

In [10]:
# Compute rolling mean & std for the last 3, 6, and 12 hours
for feature in lag_features:
    for window in [3, 6, 12]:
        df[f'{feature}_roll_mean{window}'] = df[feature].rolling(window=window).mean()
        df[f'{feature}_roll_std{window}'] = df[feature].rolling(window=window).std()


## Fourier Features for Seasonality

In [11]:
import numpy as np

def add_fourier_terms(df, period, K):
    """Add Fourier terms to capture periodic patterns."""
    time = np.arange(len(df))
    for k in range(1, K+1):
        df[f'sin_{k}_period{period}'] = np.sin(2 * np.pi * k * time / period)
        df[f'cos_{k}_period{period}'] = np.cos(2 * np.pi * k * time / period)
    return df

# Add weekly and daily seasonalities
df = add_fourier_terms(df, period=24, K=3)  # Daily
df = add_fourier_terms(df, period=24*7, K=3)  # Weekly


## Preprocess Data with New Features

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout

# Define features and target
target = 'day-ahead-auction-price'
features = df.columns.difference(['contract-delivery', target])  # Exclude non-numeric

# Scale data
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df[features])

# Convert to sequences for LSTM
SEQ_LENGTH = 24  # Use past 24 hours for prediction

def create_sequences(data, target, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(target[i+seq_length])
    return np.array(X), np.array(y)

X, y = create_sequences(df_scaled, df[target].values, SEQ_LENGTH)

# Train-test split
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


## Define & Train BiLSTM

In [14]:
# Define BiLSTM model
def build_bilstm():
    model = Sequential([
        Bidirectional(LSTM(64, return_sequences=True), input_shape=(SEQ_LENGTH, X_train.shape[2])),
        Dropout(0.3),
        Bidirectional(LSTM(32, return_sequences=False)),
        Dropout(0.3),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mae')
    return model

bilstm_model = build_bilstm()

# Train model
history = bilstm_model.fit(
    X_train, y_train, validation_data=(X_test, y_test),
    epochs=50, batch_size=32, verbose=1
)


Epoch 1/50


  super().__init__(**kwargs)


[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 18ms/step - loss: 0.0810 - val_loss: 0.0364
Epoch 2/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0363 - val_loss: 0.0300
Epoch 3/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 0.0310 - val_loss: 0.0292
Epoch 4/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 0.0291 - val_loss: 0.0246
Epoch 5/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18ms/step - loss: 0.0269 - val_loss: 0.0274
Epoch 6/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0258 - val_loss: 0.0244
Epoch 7/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0247 - val_loss: 0.0224
Epoch 8/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0243 - val_loss: 0.0232
Epoch 9/50
[1m995/995[0m [32m━━━

## Evaluate Model

In [15]:
# Predict
y_pred = bilstm_model.predict(X_test)

# Reverse scaling
y_pred_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_pred.reshape(-1, 1))))[:, -1]
y_test_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_test.reshape(-1, 1))))[:, -1]

# Compute error metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))

print(f"BiLSTM with Feature Engineering - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
BiLSTM with Feature Engineering - MAE: 14.13, RMSE: 18.67
