Testing out PyTorch Forecasting

In [1]:
import torch
import pytorch_lightning as pl
from pytorch_forecasting import TimeSeriesDataSet, Baseline, TemporalFusionTransformer, DeepAR
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.data.examples import get_stallion_data
from pytorch_forecasting.metrics import RMSE, SMAPE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

data = get_stallion_data()


TypeError: Cannot create a consistent method resolution
order (MRO) for bases Callback, PyTorchLightningPruningCallback

In [ ]:
# Preprocess the data
data["time_idx"] = data["date"].dt.year * 52 + data["date"].dt.weekofyear
data["month"] = data.date.dt.month.astype(str).astype("category")
data["log_volume"] = np.log(data.volume + 1e-8)
data["avg_basket_size"] = data["volume"] / data["order"]
data.fillna(0, inplace=True)

In [ ]:
# Split the data into training and validation sets
mask = data["time_idx"] > data["time_idx"].max() - 12 * 2
training_data = TimeSeriesDataSet(
    data[~mask],
    time_idx="time_idx",
    target="volume",
    group_ids=["agency", "sku"],
    min_encoder_length=0,
    max_encoder_length=12,
    min_prediction_length=1,
    max_prediction_length=24,
    static_categoricals=["agency", "sku", "brand"],
    static_reals=["avg_population_2017", "avg_yearly_household_income_2017"],
    time_varying_known_categoricals=["special_days", "month"],
    variable_groups={"special_days": ["easter_day", "good_friday", "new_year", "christmas", "labor_day"]},
)

In [ ]:
# Load the validation data
validation_data = TimeSeriesDataSet.from_dataset(training_data, data[mask], predict=True, stop_randomization=True)

# Calculate baseline absolute error
actuals = torch.cat([y[0] for x, y in iter(validation_data)], dim=0)
baseline_predictions = Baseline().predict(validation_data)
(abs(baseline_predictions - actuals).mean().item())

In [ ]:
# Train the model
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=100,
    gpus=1,  # use this to train on GPU
    weights_summary="top",
    gradient_clip_val=0.1,
    limit_train_batches=30,  # running validation every 30 batches
    callbacks=[lr_logger, early_stop_callback],
)

tft = TemporalFusionTransformer.from_dataset(
    training_data,
    learning_rate=0.03,
    hidden_size=32,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,
    loss=QuantileLoss(),  # used QuantileLoss for robust log-transformed training
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Fit the model
trainer.fit(tft, train_dataloader=training_data, val_dataloaders=validation_data)

In [ ]:
# Predictions are taken on GPU and moved back to CPU --> Speeds up predictions
raw_predictions, x = tft.predict(validation_data, mode="raw")

final_preds = (raw_predictions["prediction"] * std + mean).detach().numpy().T