In [None]:
import os
os._exit(00)  # Force restart of the kernel

In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

from scikeras.wrappers import KerasRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv("df_combined.csv", parse_dates=["timestamp"])
df.set_index("timestamp", inplace=True)
df = df[df.index >= "2024-10-02"]

# Feature Engineering
df['hour'] = df.index.hour
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month

# Normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df)

# Sequence Creation Function
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length, 0])
    return np.array(X), np.array(y)

SEQ_LENGTH = 48
X, y = create_sequences(scaled_data, SEQ_LENGTH)

# Split data
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Define LSTM Model with Parameterized Arguments
def build_model(units=50, dropout_rate=0.2):
    model = Sequential([
        Input(shape=(SEQ_LENGTH, scaled_data.shape[1])),
        LSTM(units, return_sequences=True),
        Dropout(dropout_rate),
        LSTM(units, return_sequences=False),
        Dropout(dropout_rate),
        Dense(25),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Wrap Keras model for use with GridSearchCV
model = KerasRegressor(model=build_model, verbose=0)

# Define hyperparameter grid
param_grid = {
    'model__units': [32, 64, 100, 150],
    'model__dropout_rate': [0.15, 0.2, 0.3],
    'batch_size': [32, 64, 128],
    'epochs': [30, 40, 50],
}

# Perform GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_absolute_error', verbose=2)
grid_result = grid.fit(X_train, y_train)

# Best Model and Parameters
best_model = grid_result.best_estimator_
print(f"✅ Best Parameters: {grid_result.best_params_}")


# Predictions
predictions = best_model.predict(X_test)
predictions = scaler.inverse_transform(
    np.hstack((predictions.reshape(-1, 1), np.zeros((len(predictions), scaled_data.shape[1] - 1))))
)[:, 0]

# Convert actual values back to original scale
y_test_actual = scaler.inverse_transform(
    np.hstack((y_test.reshape(-1, 1), np.zeros((len(y_test), scaled_data.shape[1] - 1))))
)[:, 0]

Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=32; total time=   4.2s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=32; total time=   4.2s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=32; total time=   4.3s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=64; total time=   4.3s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=64; total time=   4.9s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=64; total time=   4.8s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=100; total time=   6.0s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=100; total time=   5.6s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, model__units=100; total time=   6.2s
[CV] END batch_size=32, epochs=30, model__dropout_rate=0.15, 

In [None]:
# Evaluation Metrics
mae = mean_absolute_error(y_test_actual, predictions)
rmse = np.sqrt(mean_squared_error(y_test_actual, predictions))
epsilon = 1e-10
mape = np.mean(np.abs((y_test_actual - predictions) / np.clip(y_test_actual, epsilon, None))) * 100

print(f"📊 Evaluation Metrics:")
print(f"✅ MAE: {mae:.2f}")
print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ MAPE: {mape:.2f}%")

# Plotting
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
plt.plot(df.index[-len(y_test):], y_test_actual, label='Actual Price', color='blue')
plt.plot(df.index[-len(y_test):], predictions, label='Predicted Price', color='red')
plt.xlabel("Date")
plt.ylabel("Electricity Price (EUR/MWh)")
plt.title("Improved LSTM Model: Actual vs Predicted Electricity Prices")
plt.legend()
plt.grid(True)
plt.show()