### define Temporal Fusion Transformers model

#### create time series dataset using data frame

In [1]:
import pytorch_forecasting
import torch
import pandas as pd
from tqdm.autonotebook import tqdm
from pytorch_forecasting.data import (
    TimeSeriesDataSet,
    GroupNormalizer
)

  from tqdm.autonotebook import tqdm


#### for now, we create a continous time index but how to encode the fact that market is not opened on weekend

In [2]:
df = pd.read_csv("./data/Test万华化学.csv", sep=',')
# df_train = df.iloc[:480, :]
# df_test = df.iloc[480:, :]
df.insert(0, "time_idx", [i for i in range(df.shape[0])])
df.drop(columns='date', axis=1)
df = df.iloc[9:, ]
df.insert(7, "next_close", df["close"].shift(-1))
# df_train = df.iloc[:-1, ]
print(df["next_close"].describe())

df_train = df.iloc[:544, :]
df_test = df.iloc[544:, :]
# print(df_test.columns)
# print(df_train.columns)

count     553.000000
mean     3308.074977
std       377.249733
min      2504.389784
25%      3063.430361
50%      3274.300078
75%      3538.379105
max      4718.796057
Name: next_close, dtype: float64


In [3]:
max_prediction_length = 6
max_encoder_length = 24

training = TimeSeriesDataSet(
    data=df_train,
    time_idx="time_idx",
    target="next_close",
    group_ids=["code"],               # only one time series for now
    min_encoder_length=0,
    min_prediction_length=1,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["code"],
    static_reals=[],                 # ["code", , "tradestatus", "adjustflag"],       FIXME: how to solve this
    time_varying_unknown_categoricals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["open", "high", "low", "close", "preclose", "volume", "amount", "turn", "pctChg", "next_close"],
    variable_groups={},
    target_normalizer=GroupNormalizer(
        groups=["code"],
        # transformation=""
    ),
    add_relative_time_idx=True,  # add as feature
    add_target_scales=True,  # add as feature
    add_encoder_length=True,  # add as feature
)

In [4]:
validation = TimeSeriesDataSet.from_dataset(
    training, df_train, predict=True, stop_randomization=True
)


In [5]:
batch_size = 128
train_dataloader = training.to_dataloader(
    train=True, batch_size=batch_size, num_workers=0
)
val_dataloader = validation.to_dataloader(
    train=False, batch_size=batch_size * 10, num_workers=0
)


In [6]:
print(type(training))
# print(type(train_dataloader))

<class 'pytorch_forecasting.data.timeseries.TimeSeriesDataSet'>


In [7]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.models import TemporalFusionTransformer
# stop training, when loss metric does not improve on validation set
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,
    patience=10,
    verbose=False,
    mode="min"
)
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # log to tensorboard
# create trainer
trainer = pl.Trainer(
    max_epochs=30,
    # gpus=[0],  # train on CPU, use gpus = [0] to run on GPU
    gradient_clip_val=0.1,
    # early_stop_callback=early_stop_callback,
    limit_train_batches=30,  # running validation every 30 batches
    # fast_dev_run=True,  # comment in to quickly check for bugs
    callbacks=[lr_logger],
    logger=logger,
)
# initialise model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # biggest influence network size
    attention_head_size=1,
    dropout=0.01,
    hidden_continuous_size=8,
    output_size=7,  # QuantileLoss has 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,  # log example every 10 batches
    reduce_on_plateau_patience=4,  # reduce learning automatically
)
tft.size() # 29.6k parameters in model
# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 1     
3  | prescalers                         | ModuleDict                      | 240   
4  | static_variable_selection          | VariableSelectionNetwork        | 1.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 8.2 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 1.2 K 
7  | static_context_variable_selection  | GatedResidualNetwork 

Sanity Checking DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 29: 100%|██████████| 4/4 [00:03<00:00,  1.29it/s, v_num=2, train_loss_step=75.90, val_loss=43.00, train_loss_epoch=76.80]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 4/4 [00:03<00:00,  1.22it/s, v_num=2, train_loss_step=75.90, val_loss=43.00, train_loss_epoch=76.80]
Number of parameters in network: 25.4k


In [18]:
# # find optimal learning rate
# res = trainer.tuner.lr_find(
#     tft,
#     train_dataloader=train_dataloader,
#     val_dataloaders=val_dataloader,
#     max_lr=0.1,
#     min_lr=1e-7,
# )

# print(f"suggested learning rate: {res.suggestion()}")
# fig = res.plot(show=True, suggest=True)
# fig.show()

In [8]:
from pytorch_forecasting.metrics import MAE
# print(val_dataloader)
# load the best model according to the validation loss (given that
# we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# calculate mean absolute error on validation set


# actuals = torch.cat([y for x, y in iter(val_dataloader)])
actuals = torch.cat([y[0] for _, y in iter(val_dataloader)])  # adjust index as needed
# print(actuals)
predictions = best_tft.predict(val_dataloader)
mae = MAE()
mae.update(predictions, actuals)
eval_result = mae.compute()

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


In [9]:
print(predictions[0])
print(actuals[0])


tensor([3175.2180, 3201.5142, 3222.9917, 3239.9805, 3254.3284, 3267.1741])
tensor([3204.1243, 3192.4854, 3153.4607, 3146.6143, 3148.6682, 3329.7561])


Convert tensor to a dataloader as new input to put into our model

In [10]:
print(eval_result)

tensor(61.5124)


In [11]:
import matplotlib.pyplot as plt

# df_test=df_test.drop(["next_close"],axis=1)
df_test = df_test.iloc[:-1, ]
print(df_test["next_close"].describe())

# df_test=df_test.drop(["next_close"],axis=1)
test = TimeSeriesDataSet.from_dataset(
    training, df_test, predict=True, stop_randomization=True
)
test_dataloader = test.to_dataloader(
    train=False, batch_size=batch_size * 10, num_workers=0
)
pred=best_tft.predict(df_test,return_x=True)
print(len(pred))
print(pred[0])
pred_numpy = pred[0].detach().numpy()
pred_numpy
# plt.plot(pred_numpy)
# plt.show()

count       9.000000
mean     3210.286057
std        80.740956
min      3106.220482
25%      3155.514701
50%      3200.701069
75%      3238.356376
max      3359.195679
Name: next_close, dtype: float64
5
tensor([[3198.8347, 3232.7832, 3255.1440, 3272.1821, 3286.2891, 3298.4563]])


  rank_zero_warn(


array([[3198.8347, 3232.7832, 3255.144 , 3272.1821, 3286.289 , 3298.4563]],
      dtype=float32)

In [24]:
actuals_test = torch.cat([y[0] for _, y in iter(test_dataloader)])  # adjust index as needed
# actuals_test = torch.cat(df_test['next_close'].tolist())
# numpy_array = df_test['next_close'].to_numpy()
# tensor = torch.from_numpy(numpy_array)
print(actuals_test)
mae1 = MAE()
mae1.update(pred[0], actuals_test)
eval_result = mae1.compute()
eval_result

tensor([[3238.3564, 3165.7844, 3106.2205, 3226.3752, 3140.1104, 3200.7012]])


tensor(38.2260)

In [None]:
# from pytorch_forecasting.metrics import SMAPE
# # calculate metric by which to display
# predictions, x = best_tft.predict(val_dataloader)
# mean_losses = SMAPE(reduction="none")(predictions, actuals).mean(1)
# indices = mean_losses.argsort(descending=True)  # sort losses
# raw_predictions, x = best_tft.predict(val_dataloader, mode="raw, return_x=True")
# # show only two examples for demonstration purposes
# for idx in range(2):
#     best_tft.plot_prediction(
#         x,
#         raw_predictions,
#         idx=indices[idx],
#         add_loss_to_title=SMAPE()
#     )

ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
# interpretation = best_tft.interpret_output(
#     raw_predictions, reduction="sum"
# )
# best_tft.plot_interpretation(interpretation)

NameError: name 'raw_predictions' is not defined