In [None]:
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from core.util.plot_predictions import plot_predictions
from core.util.get_datasets import get_park_datasets
from core.util.trefor_dataset import TreforData
from core.models import LSTM
from core.models.model_training import train_model, test_model
from core.util.hyperparameter_configuration import get_hyperparameter_configuration
from core.util.early_stop import EarlyStop

Set global parameters.

In [None]:
# To be modified depending on gridsearch result
batch_size = 128
learning_rate = 0.001
num_layers = 1
lookback = 36

# Extract hyperparameters configuration that will not be tuned upon
hidden_size, EPOCHS, horizon, lookback, loss_function, dropout_rate, folds = (
    get_hyperparameter_configuration()
)

Use CUDA (GPU) if available.

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

Split data into a training, validation, and test set. Output of the get_timeserie_dataset are tensors.

In [None]:
x_train, y_train, x_val, y_val, x_test, y_test, indicies = get_park_datasets(
    lookback=lookback, horizon=horizon, folds=folds
)

Create the datasets for train, validation and test.

In [None]:
train_dataset = TreforData(x_train, y_train, device)
val_dataset = TreforData(x_val, y_val, device)
test_dataset = TreforData(x_test, y_test, device)

Load data into a dataloader with specified batch size from global parameter. Don't shuffle as we use time series were order matters

In [None]:
training_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
validation_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
testing_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Initialize a very basic LSTM.

In [None]:
model = LSTM(
    input_size=1,
    hidden_size=hidden_size,
    num_layers=num_layers,
    dropout_rate=dropout_rate,
    horizon=horizon,
    lookback=lookback,
)
model.to(device)

Validate the training model on the validation set for each epoch.

In [None]:
plot_train_loss, plot_val_loss, best_model = train_model(
    epochs=EPOCHS,
    model=model,
    loss_function=loss_function,
    training_loader=training_loader,
    validation_loader=validation_loader,
    learning_rate=learning_rate,
    early_stopper=EarlyStop(5, 0.05),
)

Evaluate the final model on the test data.

In [None]:
t_loss, predicted = test_model(
    best_model=best_model, loss_function=loss_function, testing_loader=testing_loader
)
print(f"Avg loss: {t_loss:>8f} \n")

## Visualize model performance
Visualize the training and validation loss.

In [None]:
plt.clf()
plt.plot(plot_train_loss, label="Training Loss")
plt.plot(plot_val_loss, label="Validation Loss")
plt.legend()
plt.show()

Plot the actual values and predictions from the model on the validation set.

Plot predictions for the first 7 days in the validation set

In [None]:
plot_predictions(0, 6, y_test, predicted)

Plot predictions for the last 7 days in the validation set

In [None]:
plot_predictions(-7, -1, y_test, predicted)

Plot predictions for 7 days in the in the middle of the validation set.

In [None]:
days_count = len(y_test) / 24
plot_predictions(int(days_count / 2), int(days_count / 2 + 7), y_test, predicted)