In [2]:
import copy, torch, warnings
import numpy as np
import pandas as pd
from pathlib import Path

import pytorch_lightning as pl
import pytorch_forecasting as pf
from pf.data import GroupNormalizer
from pl.loggers import TensorBoardLogger

from pf.metrics import SMAPE, PoissonLoss, QuantileLoss
from pl.callbacks import EarlyStopping, LearningRateMonitor
from pf import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pf.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

## Load data

In [None]:
from pytorch_forecasting.data.examples import get_stallion_data

data = get_stallion_data()

# add time index
data["time_idx"] = data["date"].dt.year * 12 + data["date"].dt.month
data["time_idx"] -= data["time_idx"].min()

# add additional features
data["month"] = data.date.dt.month.astype(str).astype("category")  # categories have be strings
data["log_volume"] = np.log(data.volume + 1e-8)
data["avg_volume_by_sku"] = data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean")
data["avg_volume_by_agency"] = data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean")


special_days = [ "easter_day", "good_friday", "new_year", "christmas", "labor_day", "independence_day", "revolution_day_memorial", "regional_games", "fifa_u_17_world_cup", "football_gold_cup", "beer_capital", "music_fest",]
data[special_days] = data[special_days].apply(lambda x: x.map({0: "-", 1: x.name})).astype("category")
data.sample(6, random_state=1)

In [None]:
data.describe()

### Create dataset and dataloaders

In [5]:
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = data["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet( data[lambda x: x.time_idx <= training_cutoff], time_idx="time_idx", target="volume", group_ids=["agency", "sku"],
    min_encoder_length=max_encoder_length // 2,  max_encoder_length=max_encoder_length,
    min_prediction_length=1, max_prediction_length=max_prediction_length,
    static_categoricals=["agency", "sku"], static_reals=["avg_population_2017", "avg_yearly_household_income_2017"],
    time_varying_known_categoricals=["special_days", "month"], variable_groups={"special_days": special_days},  
    time_varying_known_reals=["time_idx", "price_regular", "discount_in_percent"], time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["volume", "log_volume", "industry_volume", "soda_volume", "avg_max_temp", "avg_volume_by_agency", "avg_volume_by_sku", ],
    target_normalizer=GroupNormalizer( groups=["agency", "sku"], transformation="softplus"),  
    add_relative_time_idx=True, add_target_scales=True, add_encoder_length=True,)


validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)


batch_size = 64  
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 8, num_workers=0)

## Create baseline model

In [6]:
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
(actuals - baseline_predictions).abs().mean().item()

293.0088195800781

### Find optimal learning rate

In [7]:
pl.seed_everything(42)
trainer = pl.Trainer(gpus=0, gradient_clip_val=0.1,)

tft = TemporalFusionTransformer.from_dataset(training, learning_rate=0.05, hidden_size=16, attention_head_size=1, dropout=0.2, hidden_continuous_size=8, output_size=7, loss=QuantileLoss(), reduce_on_plateau_patience=4,)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Number of parameters in network: 29.7k


In [None]:
# find optimal learning rate
res = trainer.tuner.lr_find(tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader, max_lr=10.0, min_lr=1e-6,)
print(f"suggested learning rate: {res.suggestion()}")
res.plot(show=True, suggest=True).show()

### Train model

In [9]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  
logger = TensorBoardLogger("lightning_logs")

trainer = pl.Trainer(max_epochs=50, gpus=0, enable_model_summary=True, gradient_clip_val=0.1, limit_train_batches=16, callbacks=[lr_logger, early_stop_callback], logger=logger,)

tft = TemporalFusionTransformer.from_dataset(training, learning_rate=0.03, hidden_size=16, attention_head_size=1, dropout=0.1, hidden_continuous_size=8, output_size=7, loss=QuantileLoss(), log_interval=10, reduce_on_plateau_patience=4,)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


Number of parameters in network: 29.7k


In [None]:
trainer.fit(tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader,)