# 00 LSTM trefor park

In [None]:
import torch
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import DataLoader
from core.util.plot_predictions import plot_predictions
from core.util.save_model import save_model, load_parameters
from core.util.get_datasets import cross_validation
from core.util.trefor_dataset import TreforData
from core.models import LSTM
from core.models.model_training import train_model, test_model
from core.util.hyperparameter_configuration import get_hyperparameter_configuration
from core.util.metrics import mae, rmse, smape

## Configuration
Parameters specific to this experiment

In [None]:
experiment_name = "00_LSTM_trefor_park"
features = {}
model_input_size = len(features) + 1

Load in the 3 best hyperparameter configurations found by the grid search

In [None]:
parameters = []
for i in range(3):
    parameters.append(load_parameters(f"{experiment_name}_{i}"))
    print(parameters[i])

Global hyperparameter configuration

In [None]:
hyperparameters = get_hyperparameter_configuration()
hidden_size = hyperparameters["hidden_size"]
epochs = hyperparameters["epochs"]
horizon = hyperparameters["horizon"]
loss_function = hyperparameters["loss_function"]
dropout_rate = hyperparameters["dropout_rate"]
train_days = hyperparameters["train_days"]
val_days = hyperparameters["val_days"]
test_days = hyperparameters["test_days"]
early_stopper = hyperparameters["early_stopper"]

hyperparameters

If the host has CUDA, it will use the GPU for computation

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

### Creation of our simple LSTM model
The implementation consist of three layers, defined in the `forward` method.
1. LSTM
2. LeakyReLU
3. Linear
4. ReLU

In [None]:
def get_model(num_layers: int, lookback: int) -> nn.Module:
    """Get the model for training folds."""
    model = LSTM(
        input_size=model_input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout_rate=dropout_rate,
        horizon=horizon,
        lookback=lookback,
    )
    model.to(device)
    return model

### Main loop
Iterate all hyperparameter configuration to find the best one.

For each of these, we do the full iteration of epochs (unless early stop occurs) with training and validation.
Lastly, we run the test set on the given model to see how it performs using the metrics MAE, RMSE, and sMAPE.

In [None]:
results = []
for i in range(3):
    # Reset the early stopper
    # Otherwise it can carry information from the previous training and stops too early
    early_stopper.reset()

    # Get parameters for the i'th model
    experiment_parameters = parameters[i]
    learning_rate = experiment_parameters["learning_rate"]
    batch_size = experiment_parameters["batch_size"]
    lookback = experiment_parameters["lookback"]
    num_layers = experiment_parameters["num_layers"]
    torch.manual_seed(experiment_parameters["seed"])

    # Loads in the datasets because they can differ for the models (different lookback)
    x_train, y_train, x_val, y_val, x_test, y_test = cross_validation(
        lookback=lookback,
        horizon=horizon,
        train_days=train_days,
        val_days=val_days,
        test_days=test_days,
        features=features,
    )

    # Show the shapes of the datasets
    # x_train, x_validation, and x_test: [datapoints, lookback, number of features]
    # y_train, y_validation, and y_test: [datapoints, horizon]
    print(f"x_train: {x_train.shape}")
    print(f"y_train: {y_train.shape}")
    print(f"x_val: {x_val.shape}")
    print(f"y_val: {y_val.shape}")
    print(f"x_test: {x_test.shape}")
    print(f"y_test: {y_test.shape}")

    # convert to dataset that can use dataloaders
    train_dataset = TreforData(x_train, y_train, device)
    val_dataset = TreforData(x_val, y_val, device)
    test_dataset = TreforData(x_test, y_test, device)

    # initialize the dataloaders, without shuffeling the data between epochs
    training_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    validation_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    testing_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Train the i'th model
    experiment_iteration_name = f"00_LSTM_trefor_park_iteration_{i}"
    best_train_loss, best_val_loss, best_model = train_model(
        epochs=epochs,
        model=get_model(num_layers, lookback),
        loss_function=loss_function,
        training_loader=training_loader,
        validation_loader=validation_loader,
        learning_rate=learning_rate,
        early_stopper=early_stopper,
    )

    t_loss, predicted = test_model(
        best_model=best_model,
        loss_function=loss_function,
        testing_loader=testing_loader,
    )

    # Flatten the predictions and test set
    # This is done so they can be compared to the tests and calculate the metrics
    flattened_predicted = predicted.flatten()
    flattened_test = y_test.flatten()

    model_mae = mae(flattened_test, flattened_predicted).detach().item()
    model_rmse = rmse(flattened_test, flattened_predicted)
    model_smape = smape(flattened_test, flattened_predicted).detach().item()

    print(experiment_iteration_name)
    print(f"MAE: {model_mae}")
    print(f"RMSE: {model_rmse}")
    print(f"SMAPE: {model_smape} \n")

    # Append the results so we can pick out the second best
    results.append(
        {
            "sMAPE": model_smape,
            "MAE": model_mae,
            "RMSE": model_rmse,
            "model": best_model,
            "train loss": best_train_loss,
            "validation loss": best_val_loss,
            "y_test": y_test,
            "predicted": predicted,
            "parameter index": i,
        }
    )

In order to select the second best model we sort the results by lowest sMAPE.

sMAPE is the chosen metric as the loss is lower when predictions are larger than the actual value compared to when predictions are lower than the actual value.
$$\hat{y} > y: \text{lower loss} \\ \hat{y} < y: \text{greater loss}$$

In [None]:
# Sort results and extract second best
second_best = sorted(results, key=lambda d: d["sMAPE"])[1]
print(f"sMAPE: {second_best["sMAPE"]}")
print(f"MAE: {second_best["MAE"]}")
print(f"RMSE: {second_best["RMSE"]}")
print(f"parameters: {parameters[second_best["parameter index"]]}")

model_smape = second_best["sMAPE"]
model_mae = second_best["MAE"]
model_rmse = second_best["RMSE"]
best_model = second_best["model"]
best_train_loss = second_best["train loss"]
best_val_loss = second_best["validation loss"]
y_test = second_best["y_test"]
predicted = second_best["predicted"]

save_model(
    model=best_model,
    model_name=experiment_name,
    train_loss=best_train_loss,
    val_loss=best_val_loss,
    mae=model_mae,
    rmse=model_rmse,
    smape=model_smape,
    overwrite=True,
)

## Plotting
### Training- and validation loss
Visualize the training and validation loss.

In [None]:
plt.clf()
plt.plot(best_train_loss, label="Training Loss")
plt.plot(best_val_loss, label="Validation Loss")
plt.scatter(
    best_val_loss.index(min(best_val_loss)),
    min(best_val_loss),
    color="red",
    marker=".",
    label="Chosen model",
    zorder=10,
)
plt.legend()
plt.show()

### Predictions- and actual consumption

Plot the actual values and predictions from the model on the test set.

In [None]:
plot_predictions(0, 300, y_test, predicted)

Plot predictions for the first 7 days in the test set

In [None]:
plot_predictions(0, 6, y_test, predicted)

Plot predictions for the last 7 days in the test set

In [None]:
plot_predictions(-7, -1, y_test, predicted)

Plot predictions for 7 days in the in the middle of the test set.

In [None]:
days_count = len(y_test) / 24
plot_predictions(int(days_count / 2), int(days_count / 2 + 7), y_test, predicted)