# LSTM model for predicting the electricity prices

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import root_mean_squared_error, mean_absolute_error

from modelling import *
%load_ext autoreload
%autoreload 2

In [2]:
# moving to GPU if available (Metal)
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: mps


## Loading the data

In [3]:
# path to the CSV file
path = '../../data/fulldata.csv'

data = pd.read_csv(path)

display(data.head())
print(data.shape)

Unnamed: 0,SpotPrice,from,to,temp_pca_1,temp_pca_2,temp_pca_3,wind_speed_pca_1,wind_speed_pca_2,wind_speed_pca_3,wind_speed_pca_4,...,mean_wind_dir_pca_39,hour_sin,hour_cos,day_sin,day_cos,month_sin,month_cos,oil_price,gas_price,constant
0,596.570007,2024-11-29 23:00:00+00:00,2024-11-30 00:00:00+00:00,-33.480249,7.339981,1.718044,3.248249,2.159318,0.612078,3.505092,...,-0.093112,-0.258819,0.965926,-0.433884,-0.900969,-0.5,0.866025,72.940002,47.811001,1
1,770.27002,2024-11-29 22:00:00+00:00,2024-11-29 23:00:00+00:00,-32.131543,8.173021,2.038769,2.387323,3.215623,0.288633,3.701821,...,-0.004524,-0.5,0.866025,-0.433884,-0.900969,-0.5,0.866025,72.940002,47.811001,1
2,848.200012,2024-11-29 21:00:00+00:00,2024-11-29 22:00:00+00:00,-30.659681,8.780246,1.967827,1.620036,2.482432,0.516816,3.950176,...,-0.081036,-0.707107,0.707107,-0.433884,-0.900969,-0.5,0.866025,72.940002,47.811001,1
3,836.049988,2024-11-29 20:00:00+00:00,2024-11-29 21:00:00+00:00,-29.321284,9.297498,1.494915,1.490229,2.968878,1.071656,3.674438,...,-0.073105,-0.866025,0.5,-0.433884,-0.900969,-0.5,0.866025,72.940002,47.811001,1
4,894.219971,2024-11-29 19:00:00+00:00,2024-11-29 20:00:00+00:00,-27.818541,9.207599,0.85338,2.309477,2.890789,0.990981,2.872615,...,-0.040381,-0.965926,0.258819,-0.433884,-0.900969,-0.5,0.866025,72.940002,47.811001,1


(62280, 68)


## Data preprocessing

In [4]:
data['from'] = pd.to_datetime(data['from'])

# drop to column
data = data.drop(columns=['to'])

# explicitly including lagged prices as well
lag_hours = [0, 
            -1, -2, -3, -4, -5, -6, -24, #-48, -72, -96, -120, -144, -168
             ]
for lag in lag_hours:
    data[f'price_lag_{lag}'] = data['SpotPrice'].shift(lag)

lag_oil_gas = [-24, -48, -72, # -168
               ]
for lag in lag_oil_gas:
    data[f'oil_price_{lag}'] = data['oil_price'].shift(lag)
    data[f'gas_price_{lag}'] = data['gas_price'].shift(lag)

# offset price by 1 day
data['SpotPrice'] = data['SpotPrice'].shift(24)
data['from'] = data['from'].shift(24)

time_features = ['hour', 'day', 'month']
for i in time_features:
    data[f'{i}_sin'] = data[f'{i}_sin'].shift(24)
    data[f'{i}_cos'] = data[f'{i}_cos'].shift(24)

# drop the first 24 rows
data = data.dropna()

# sort data to be ascending
data = data.sort_values('from')

# reset index
data = data.reset_index(drop=True)

display(data.head())
print(data.shape)

Unnamed: 0,SpotPrice,from,temp_pca_1,temp_pca_2,temp_pca_3,wind_speed_pca_1,wind_speed_pca_2,wind_speed_pca_3,wind_speed_pca_4,wind_speed_pca_5,...,price_lag_-4,price_lag_-5,price_lag_-6,price_lag_-24,oil_price_-24,gas_price_-24,oil_price_-48,gas_price_-48,oil_price_-72,gas_price_-72
0,96.019997,2017-10-27 00:00:00+00:00,11.237675,0.831761,0.32318,6.760909,2.790222,0.3983,3.612175,4.192237,...,210.059998,219.059998,222.639999,95.199997,58.439999,18.110001,58.330002,17.959999,57.369999,18.09
1,95.349998,2017-10-27 01:00:00+00:00,11.377424,0.873795,0.021477,5.367659,1.404028,0.596286,4.109275,3.550561,...,201.720001,210.059998,219.059998,104.129997,58.439999,18.110001,58.330002,17.959999,57.369999,18.09
2,141.729996,2017-10-27 02:00:00+00:00,11.360804,0.662414,-0.153373,4.258495,1.098352,-0.898239,4.812269,2.074639,...,194.720001,201.720001,210.059998,126.760002,58.439999,18.110001,58.330002,17.959999,57.369999,18.09
3,178.720001,2017-10-27 03:00:00+00:00,11.961742,0.604866,-0.202105,3.781656,0.551939,-0.444115,4.825875,1.811022,...,186.460007,194.720001,201.720001,196.139999,58.439999,18.110001,58.330002,17.959999,57.369999,18.09
4,238.199997,2017-10-27 04:00:00+00:00,12.162359,0.136666,-0.113291,3.029801,1.184454,-0.552593,5.198467,2.016204,...,185.720001,186.460007,194.720001,258.070007,58.439999,18.110001,58.330002,17.959999,57.369999,18.09


(62184, 81)


In [5]:
# extract column names
cols = data.columns.tolist()

# select features and target variable
all_features = cols[2:]
target = cols[:1]

print(f'Count of features before interaction terms: {len(all_features)}')
print(f'Target variable: {target}')

Count of features before interaction terms: 79
Target variable: ['SpotPrice']


In [6]:
# training data: until July 2023
train_data = data[data['from'] < '2023-08-01']

# validation data: July 2023 to December 2023
val_data = data[(data['from'] >= '2023-08-01') & (data['from'] < '2024-08-01')]

# Test data: Aug 2024 and beyond 
test_data = data[data['from'] >= '2024-07-31'] # initialized 24 hours before to get the first 24 hours

In [7]:
X_train = train_data[all_features].values
X_val = val_data[all_features].values
X_test = test_data[all_features].values

# initialize the scaler
scaler = StandardScaler()

# fit the scaler on the training features and transform
X_train = scaler.fit_transform(X_train)

# transform the validation and test features using the same scaler
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [8]:
# extract target values
train_targets = train_data[target].values
val_targets = val_data[target].values
test_targets = test_data[target].values

## Model building

In [9]:
# set seed for reproducibility
torch.manual_seed(2024)
np.random.seed(2024)

# hyperparameters
seq_length = 24  # Use past 24 hours to form a sequence
batch_size = 256
input_dim = X_train.shape[1]
output_dim = 1
learning_rate = 0.001

# update the array to load all models
lambda_l1_array = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
lambda_l2_array = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]
hidden_dim_array = [128, 256, 512]
layer_dim_array = [2, 3]

In [10]:
# create datasets
train_dataset = LSTMdataset(X_train, train_targets, seq_length)
val_dataset = LSTMdataset(X_val, val_targets, seq_length)
test_dataset = LSTMdataset(X_test, test_targets, seq_length)

# create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# loss evaluation function
criterion = nn.MSELoss()

## Hyper parameter tuning

In [11]:
# # training the model
# num_epochs = 100
# patience = 10  # for early stopping
# best_loss_global = np.inf

# for hidden_dim in hidden_dim_array:
#     for layer_dim in layer_dim_array:
#         for lambda_l1 in lambda_l1_array:
#             for lambda_l2 in lambda_l2_array:
#                 torch.manual_seed(2024)
#                 np.random.seed(2024)
#                 model = LSTMmodel(input_dim, hidden_dim, layer_dim, output_dim).to(device)
#                 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#                 # set seed for reproducibility
#                 best_loss = np.inf
#                 counter = 0

#                 # initialize lists to store loss values
#                 training_losses = []
#                 validation_losses = []

#                 for epoch in range(num_epochs):
#                     model.train()
#                     train_loss = 0
#                     for X_batch, y_batch in train_loader:
#                         X_batch, y_batch = X_batch.to(device), y_batch.to(device)
#                         optimizer.zero_grad()

#                         # forward pass
#                         outputs = model(X_batch)

#                         mse_loss = criterion(outputs.squeeze(), y_batch.squeeze())

#                         # L1 regularization
#                         l1_loss = l1_regularization(model, lambda_l1)
#                         l2_loss = l2_regularization(model, lambda_l2)

#                         # calc total loss
#                         loss = mse_loss + l1_loss + l2_loss

#                         # backward pass and optimization
#                         loss.backward()
#                         optimizer.step()

#                         train_loss += loss.item()


#                     # average training loss
#                     avg_train_loss = train_loss / len(train_loader)
#                     training_losses.append(avg_train_loss)

#                     # validation
#                     model.eval()
#                     val_losses = []
#                     with torch.no_grad():
#                         for X_batch, y_batch in val_loader:
#                             X_batch, y_batch = X_batch.to(device), y_batch.to(device)
#                             outputs = model(X_batch)
#                             mse_loss = criterion(outputs.squeeze(), y_batch.squeeze())
#                             loss = mse_loss
#                             val_losses.append(loss.item())

#                     avg_val_loss = np.mean(val_losses)
#                     validation_losses.append(avg_val_loss)
#                     if (epoch + 1) % 10 == 0:
#                         print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')
#                     # torch.save(model.state_dict(), 'output/nn-paths/last_model.pth')
                    
#                     # early stopping
#                     if avg_val_loss < best_loss:
#                         best_loss = avg_val_loss
#                         counter = 0
#                         # save the best model
#                         torch.save(model.state_dict(), f'output/nn-paths/best_model_{layer_dim}_{hidden_dim}_{int(np.abs(np.log10(lambda_l1)))}_{int(np.abs(np.log10(lambda_l2)))}.pth')
#                     else:
#                         counter += 1
#                         if counter >= patience:
#                             print(f"Early stopping after {epoch} epochs. Best validation loss: {best_loss:.4f}")
#                             break
                    
#                     if epoch == (num_epochs - 1):
#                         print(f"Training stopped after {epoch} epochs. Best validation loss: {best_loss:.4f}")


## Model evaluation

### Evaluating all models in the grid search

In [None]:
# load in the all the models and print the best validation loss
best_val_loss = np.inf
for hidden_dim in hidden_dim_array:
    for layer_dim in layer_dim_array:
        for lambda_l1 in lambda_l1_array:
            for lambda_l2 in lambda_l2_array:
                model = LSTMmodel(input_dim, hidden_dim, layer_dim, output_dim).to(device)
                try:
                    model.load_state_dict(torch.load(f'output/nn-paths/best_model_{layer_dim}_{hidden_dim}_{int(np.abs(np.log10(lambda_l1)))}_{int(np.abs(np.log10(lambda_l2)))}.pth'))
                except:
                    continue
                model.eval()
                val_losses = []
                with torch.no_grad():
                    for X_batch, y_batch in val_loader:
                        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                        outputs = model(X_batch)
                        mse_loss = criterion(outputs.squeeze(), y_batch.squeeze())
                        loss = mse_loss
                        val_losses.append(loss.item())

                avg_val_loss = np.mean(val_losses)

                if avg_val_loss < best_val_loss:
                    best_val_loss = avg_val_loss
                    best_hidden_dim = hidden_dim
                    best_lambda_l1 = lambda_l1
                    best_lambda_l2 = lambda_l2
                    best_layer_dim = layer_dim
                print(f'Hidden dim:  {hidden_dim}, Layer dim: {layer_dim},  L1:    {lambda_l1},  L2:  {lambda_l2},\nValidation Loss:     {avg_val_loss:.4f}')

print(f'Best: \n Hidden dim:  {best_hidden_dim}, Layer dim: {best_layer_dim},  L1:    {best_lambda_l1},  L2:  {best_lambda_l2},\nBest Validation Loss:     {best_val_loss:.4f}')

# save the best model as best_model.pth
model = LSTMmodel(input_dim, best_hidden_dim, best_layer_dim, output_dim).to(device)
model.load_state_dict(torch.load(f'output/nn-paths/best_model_{best_layer_dim}_{best_hidden_dim}_{int(np.abs(np.log10(best_lambda_l1)))}_{int(np.abs(np.log10(best_lambda_l2)))}.pth'))
torch.save(model.state_dict(), 'output/nn-paths/best_model.pth')
print('Best model saved as output/nn-paths/best_model.pth')

Hidden dim:  128, Layer dim: 2,  L1:    0.1,  L2:  0.1,    Validation Loss:     81444.3724
RMSE: 365.5452, MAE: 278.4396
Hidden dim:  128, Layer dim: 2,  L1:    0.1,  L2:  0.01,    Validation Loss:     77399.2890
RMSE: 369.0497, MAE: 280.5470
Hidden dim:  128, Layer dim: 2,  L1:    0.1,  L2:  0.001,    Validation Loss:     72896.4416
RMSE: 358.6684, MAE: 274.9196
Hidden dim:  128, Layer dim: 2,  L1:    0.1,  L2:  0.0001,    Validation Loss:     80378.5820
RMSE: 372.9278, MAE: 288.5375
Hidden dim:  128, Layer dim: 2,  L1:    0.1,  L2:  1e-05,    Validation Loss:     79988.5750
RMSE: 358.8839, MAE: 270.3241
Hidden dim:  128, Layer dim: 2,  L1:    0.01,  L2:  0.1,    Validation Loss:     82258.0099
RMSE: 358.3621, MAE: 271.6883
Hidden dim:  128, Layer dim: 2,  L1:    0.01,  L2:  0.01,    Validation Loss:     80376.9750
RMSE: 369.5752, MAE: 278.8642
Hidden dim:  128, Layer dim: 2,  L1:    0.01,  L2:  0.001,    Validation Loss:     81129.4104
RMSE: 356.3283, MAE: 269.8279
Hidden dim:  128, 

### Re-training best model

In [13]:
# current best hyperparameters
opt_hidden_dim = best_hidden_dim #512 # 512 seem to work best 256 can do
opt_layer_dim = best_layer_dim #2 dimensions seems to work best
opt_lambda_l1 =  best_lambda_l1 # 1e-1 seems to work best
opt_lambda_l2 = best_lambda_l2 #1e-4 # 1e-4, 1e-5 and 1e-6 all seem to work well, maybe 1-e6 is best

torch.manual_seed(2024)
np.random.seed(2024)
# initialize the model
model = LSTMmodel(input_dim, opt_hidden_dim, opt_layer_dim, output_dim).to(device)
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [14]:
# retraining the model to get the loss-path of the model
num_epochs = 150
patience = 25  # for early stopping
best_loss = np.inf
counter = 0

# initialize lists to store loss values
training_losses = []
validation_losses = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()

        # forward pass
        outputs = model(X_batch)

        mse_loss = criterion(outputs.squeeze(), y_batch.squeeze())

        # L1 regularization
        l1_loss = l1_regularization(model, opt_lambda_l1)
        l2_loss = l2_regularization(model, opt_lambda_l2)

        # calc total loss
        loss = mse_loss + l1_loss + l2_loss

        # backward pass and optimization
        loss.backward()
        optimizer.step()

        train_loss += loss.item()


    # average training loss
    avg_train_loss = train_loss / len(train_loader)
    training_losses.append(avg_train_loss)

    # validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            mse_loss = criterion(outputs.squeeze(), y_batch.squeeze())
            loss = mse_loss
            val_losses.append(loss.item())

    avg_val_loss = np.mean(val_losses)
    validation_losses.append(avg_val_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}')
    
    # early stopping
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        counter = 0
        # save the best model
        torch.save(model.state_dict(), 'output/nn-paths/best_model.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping")
            break

Epoch [1/150], Training Loss: 820204.2926, Validation Loss: 264223.9813
Epoch [2/150], Training Loss: 720552.9733, Validation Loss: 199643.7498
Epoch [3/150], Training Loss: 647908.3088, Validation Loss: 155791.2917
Epoch [4/150], Training Loss: 597923.4184, Validation Loss: 128693.2910
Epoch [5/150], Training Loss: 576356.9318, Validation Loss: 178868.0626
Epoch [6/150], Training Loss: 571664.0807, Validation Loss: 111391.0458
Epoch [7/150], Training Loss: 500836.2825, Validation Loss: 101637.1550
Epoch [8/150], Training Loss: 464650.0011, Validation Loss: 95562.7242
Epoch [9/150], Training Loss: 429469.2240, Validation Loss: 89579.8041
Epoch [10/150], Training Loss: 397481.3644, Validation Loss: 79679.5913
Epoch [11/150], Training Loss: 372431.4884, Validation Loss: 88724.4418
Epoch [12/150], Training Loss: 349803.7461, Validation Loss: 92122.8112
Epoch [13/150], Training Loss: 329407.0987, Validation Loss: 67862.7783
Epoch [14/150], Training Loss: 309741.2367, Validation Loss: 68184

In [15]:
# load the best model
model = LSTMmodel(input_dim, opt_hidden_dim, opt_layer_dim, output_dim).to(device)
model.load_state_dict(torch.load('output/nn-paths/best_model.pth'))

model.eval()
with torch.no_grad():
    predictions = []
    actuals = []
    
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        predictions.extend(outputs.squeeze().tolist())

        actuals.extend(y_batch.tolist())

rmse = root_mean_squared_error(actuals, predictions)
mae = mean_absolute_error(actuals, predictions)

print(f'Performance on test data:\nRMSE: {rmse:.4f}\nMAE: {mae:.4f}')

Performance on test data:
RMSE: 324.0369
MAE: 249.2722


In [16]:
train_losses = np.array(training_losses)/1000
val_losses = np.array(validation_losses)/1000

# Plotting the loss curves
plt.figure(figsize=(14, 8))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', linewidth=3)
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', linewidth=3)
plt.xlabel('Epoch', fontsize=26)
plt.ylabel('Avg loss (x1,000)', fontsize=26)
plt.tick_params(axis='both', which='major', labelsize=24)
plt.legend(fontsize=24)
plt.grid(True)
plt.tight_layout()
plt.savefig('output/loss.png')
# plt.show()
plt.close()


In [17]:
# convert 'predictions' and 'actuals' to numpy arrays
predictions = np.array(predictions)
actuals = np.array(actuals)


# save the predicted vaules
np.save('output/forecast/forecast_lstm.npy', predictions)