<h2>Why LSTMs for This Problem?</h2>

    LSTMs can capture sequential dependencies in time-series data.
    Unlike tree-based models (XGBoost), they learn patterns across time rather than treating each hour independently.
    Can handle nonlinear relationships better when trained correctly.

In [1]:
import pandas as pd

# Load the dataset
file_path = "../data/clean_FeatEng.csv" #"../data/ml-engineer-dataset-clean.csv"
df_cleaned = pd.read_csv(file_path)

In [20]:
df_cleaned.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'contract-delivery', 'demand-forecast',
       'temperature-normal', 'temperature-forecast', 'solar-forecast',
       'day-ahead-auction-price', 'wind-forecast', 'day-ahead-auction-time',
       'hour', 'day_of_week', 'month', 'renewable_share', 'lag_1',
       'rolling_mean_7', 'rolling_std_7', 'renewable_ratio', 'hour_sin',
       'hour_cos'],
      dtype='object')

# Preprocessing for LSTM

LSTMs require: 
- Scaling the data (MinMaxScaler or StandardScaler)
- Converting data into sequences (past n timesteps → predict future price)

In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Select relevant features
features = ["demand-forecast", "wind-forecast", "solar-forecast", "temperature-forecast", "day-ahead-auction-price"]
target = "day-ahead-auction-price"

# Scale data
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df_cleaned[features])

# Convert data into sequences
def create_sequences(data, seq_length=24):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length, -1])  # Predict price
    return np.array(X), np.array(y)

SEQ_LENGTH = 24  # Use past 24 hours to predict next price
X, y = create_sequences(df_scaled, SEQ_LENGTH)

# Train-test split
split = int(len(X) * 0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")


Train shape: (31817, 24, 5), Test shape: (7955, 24, 5)


# Build Model

In [4]:
# Define LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(SEQ_LENGTH, X_train.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(16, activation="relu"),
    Dense(1)  # Predict single value (price)
])

# Compile model
model.compile(optimizer=Adam(learning_rate=0.001), loss="mae")

# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, verbose=1)


Epoch 1/50


  super().__init__(**kwargs)


[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 0.0891 - val_loss: 0.0367
Epoch 2/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - loss: 0.0367 - val_loss: 0.0270
Epoch 3/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0288 - val_loss: 0.0241
Epoch 4/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0263 - val_loss: 0.0225
Epoch 5/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0253 - val_loss: 0.0226
Epoch 6/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0246 - val_loss: 0.0250
Epoch 7/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0242 - val_loss: 0.0225
Epoch 8/50
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - loss: 0.0235 - val_loss: 0.0224
Epoch 9/50
[1m995/995[0m [32m━━━

# Evaluation

In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Predict
y_pred = model.predict(X_test)

# Reverse scaling
y_pred_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_pred.reshape(-1, 1))))[:, -1]
y_test_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_test.reshape(-1, 1))))[:, -1]

# Calculate MAE & RMSE
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))

print(f"LSTM Model - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
LSTM Model - MAE: 11.74, RMSE: 15.19


In [14]:
from keras.models import load_model
#model.save('../models/my_model.h5')  # creates a HDF5 file 'my_model.h5'
model.save('../models/my_model.keras')
#del model  # deletes the existing model

# returns a compiled model
# identical to the previous one
# model = load_model('my_model.h5')

| Model                                            | Mae lower is better | Rmse lower is better |
| :----------------------------------------------- | :------------------ | :------------------- |
| Historical Average                               | 30.37               | 40.85                |
| Initial Linear Regression                        | 17.7                | 23.81                |
| Improved Linear Regression (Feature Engineering) | 16.24               | 21.42                |
| XGBoost (Default Settings)                       | 15.12               | 20.06                |
| Tuned XGBoost (Hyperparameter Search)            | 14.86               | 19.73                |
| Stacked Model (XGBoost + LightGBM + Ridge)       | 15.33               | 20.39                |
| lightGBM (tuned)                                 | 15.93               | 20.81                |
| Stacked Model with tuned LGBM                    | 15.47               | 20.41                |
| **LSTMs**                                        | **11.74**           | **15.19**            |


# LSTM with hyperparameter tuning

In [4]:
from keras.models import load_model
model = load_model('../models/my_model.keras')

  trackable.load_own_variables(weights_store.get(inner_path))


In [7]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam


## Parameters Search function

In [8]:
def build_lstm_model(hp):
    model = Sequential([
        LSTM(
            units=hp.Int("lstm_units", min_value=32, max_value=128, step=32),
            return_sequences=True,
            input_shape=(SEQ_LENGTH, X_train.shape[2])
        ),
        Dropout(hp.Float("dropout_1", min_value=0.1, max_value=0.5, step=0.1)),
        LSTM(hp.Int("lstm_units_2", min_value=16, max_value=64, step=16), return_sequences=False),
        Dropout(hp.Float("dropout_2", min_value=0.1, max_value=0.5, step=0.1)),
        Dense(hp.Int("dense_units", min_value=8, max_value=32, step=8), activation="relu"),
        Dense(1)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice("learning_rate", [0.001, 0.0005, 0.0001])),
        loss="mae"
    )
    
    return model


# Run Hyperparameter Search

<h3> Too Long Stopped!</h3>

In [12]:
SEQ_LENGTH = 24  # Use past 24 hours to predict next price
tuner = kt.BayesianOptimization(
    build_lstm_model,
    objective="val_loss",
    max_trials=15,
    executions_per_trial=2,
    directory="lstm_tuning",
    project_name="power_price_forecast"
)

tuner.search(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32)

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("Best Hyperparameters Found:")
print(f"LSTM Units: {best_hps.get('lstm_units')}")
print(f"LSTM Units 2: {best_hps.get('lstm_units_2')}")
print(f"Dropout 1: {best_hps.get('dropout_1')}")
print(f"Dropout 2: {best_hps.get('dropout_2')}")
print(f"Dense Units: {best_hps.get('dense_units')}")
print(f"Learning Rate: {best_hps.get('learning_rate')}")


Trial 3 Complete [00h 59m 19s]
val_loss: 0.02359774988144636

Best val_loss So Far: 0.021222582086920738
Total elapsed time: 04h 22m 43s

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
128               |96                |lstm_units
0.1               |0.1               |dropout_1
64                |48                |lstm_units_2
0.1               |0.5               |dropout_2
16                |8                 |dense_units
0.001             |0.001             |learning_rate

Epoch 1/50
[1m449/995[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m10s[0m 20ms/step - loss: 0.1046

KeyboardInterrupt: 

Best val_loss So Far: 0.021222582086920738
Total elapsed time: 04h 22m 43s

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
128               |96                |lstm_units
0.1               |0.1               |dropout_1
64                |48                |lstm_units_2
0.1               |0.5               |dropout_2
16                |8                 |dense_units
0.001             |0.001             |learning_rate

# Train the Best Model

In [18]:
best_model = Sequential([
    LSTM(96, return_sequences=True, input_shape=(SEQ_LENGTH, X_train.shape[2])),
    Dropout(0.1),
    LSTM(48, return_sequences=False),
    Dropout(0.5),
    Dense(8, activation="relu"),
    Dense(1)  # Predict single value (price)
])
# Compile model
best_model.compile(optimizer=Adam(learning_rate=0.001), loss="mae")

In [None]:
best_model = tuner.hypermodel.build(best_hps)

In [19]:


history = best_model.fit(
    X_train, y_train, validation_data=(X_test, y_test),
    epochs=100, batch_size=32, verbose=1
)

y_pred = best_model.predict(X_test)

# Reverse scaling
y_pred_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_pred.reshape(-1, 1))))[:, -1]
y_test_rescaled = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_test.reshape(-1, 1))))[:, -1]

# Calculate metrics
mae = mean_absolute_error(y_test_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_test_rescaled, y_pred_rescaled))

print(f"Tuned LSTM - MAE: {mae:.2f}, RMSE: {rmse:.2f}")


Epoch 1/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 16ms/step - loss: 0.0998 - val_loss: 0.0381
Epoch 2/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0364 - val_loss: 0.0263
Epoch 3/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0308 - val_loss: 0.0293
Epoch 4/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0287 - val_loss: 0.0235
Epoch 5/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17ms/step - loss: 0.0274 - val_loss: 0.0239
Epoch 6/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0269 - val_loss: 0.0227
Epoch 7/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0267 - val_loss: 0.0227
Epoch 8/100
[1m995/995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0267 - val_loss: 0.0241
Epoch 9/100
[1m

KeyboardInterrupt: 