Hybrid Model using CNN-LSTM, which combines convolutional layers (for feature extraction) with LSTM layers (for sequential learning)

In [3]:
import pandas as pd

# Load the dataset
file_path = "../data/clean_FeatEng.csv" #"../data/ml-engineer-dataset-clean.csv"
df_cleaned = pd.read_csv(file_path)

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Select relevant features
features = ["demand-forecast", "wind-forecast", "solar-forecast", "temperature-forecast", "day-ahead-auction-price"]
target = "day-ahead-auction-price"

# Scale data
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_cleaned[features])

# Convert data into sequences
def create_sequences(data, seq_length=24):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, -1])  # Predict price
    return np.array(X), np.array(y)

SEQ_LENGTH = 24  # Use past 24 hours to predict next price
X, y = create_sequences(df_scaled, SEQ_LENGTH)

# Train-test split
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")


Train shape: (31817, 24, 5), Test shape: (7955, 24, 5)


# Define Model

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout, Flatten

def build_cnn_lstm():
    model = Sequential([
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(SEQ_LENGTH, X_train.shape[2])),
        MaxPooling1D(pool_size=2),
        LSTM(64, return_sequences=True),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        Dropout(0.3),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mae')
    return model


2025-03-13 22:16:46.244651: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Train the CNN-LSTM Model

In [5]:
cnn_lstm_model = build_cnn_lstm()

history = cnn_lstm_model.fit(
    X_train, y_train, validation_data=(X_test, y_test),
    epochs=50, batch_size=32, verbose=1
)

y_pred = cnn_lstm_model.predict(X_test)

# Reverse scaling
y_pred_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_pred.reshape(-1, 1))))[:, -1]
y_test_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_test.reshape(-1, 1))))[:, -1]



Epoch 1/50


  super().__init__(


[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - loss: 0.0940 - val_loss: 0.0419
Epoch 2/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0401 - val_loss: 0.0346
Epoch 3/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0339 - val_loss: 0.0306
Epoch 4/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0314 - val_loss: 0.0327
Epoch 5/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0305 - val_loss: 0.0283
Epoch 6/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0295 - val_loss: 0.0269
Epoch 7/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0288 - val_loss: 0.0282
Epoch 8/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - loss: 0.0286 - val_loss: 0.0269
Epoch 9/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━

NameError: name 'mean_absolute_error' is not defined

In [6]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
# Calculate metrics
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))

print(f"CNN-LSTM Model - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


CNN-LSTM Model - MAE: 12.27, RMSE: 17.58


| Model                                            | Mae lower is better | Rmse lower is better |
| :----------------------------------------------- | :------------------ | :------------------- |
| Historical Average                               | 30.37               | 40.85                |
| Initial Linear Regression                        | 17.7                | 23.81                |
| Improved Linear Regression (Feature Engineering) | 16.24               | 21.42                |
| XGBoost (Default Settings)                       | 15.12               | 20.06                |
| Tuned XGBoost (Hyperparameter Search)            | 14.86               | 19.73                |
| Stacked Model (XGBoost + LightGBM + Ridge)       | 15.33               | 20.39                |
| lightGBM (tuned)                                 | 15.93               | 20.81                |
| Stacked Model with tuned LGBM                    | 15.47               | 20.41                |
| **LSTMs**                                        | **11.74**           | **15.19**            |
| CNN-LSTM                                         | 12.27               | 17.58                |