### Import of libraries

In [111]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
import torch

from pytorch_forecasting import Baseline, DeepAR, TimeSeriesDataSet
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.data.examples import generate_ar_data
from pytorch_forecasting.metrics import SMAPE, MAE, NormalDistributionLoss

### Dataset creation

In [158]:
#We import the data of CAC 40 (obtained from Kaggle)
data = pd.read_csv("data.csv", index_col=[0])
data.head(10)

Unnamed: 0,date,temp,wetb,dewpt,vappr,rhum,station,series_idx,time_idx
0,2019-05-08 12:00:00,7.9,6.7,5.3,8.9,83.0,phoenix_park,0,0
1,2019-05-09 12:00:00,8.3,7.4,6.3,9.6,87.0,phoenix_park,0,1
2,2019-05-10 12:00:00,12.6,8.1,2.1,7.1,48.0,phoenix_park,0,2
3,2019-05-11 12:00:00,12.7,7.9,1.6,6.9,46.0,phoenix_park,0,3
4,2019-05-12 12:00:00,14.6,10.6,6.4,9.6,57.0,phoenix_park,0,4
5,2019-05-13 12:00:00,18.7,12.4,5.9,9.3,42.0,phoenix_park,0,5
6,2019-05-14 12:00:00,16.8,10.3,2.6,7.3,38.0,phoenix_park,0,6
7,2019-05-15 12:00:00,16.9,10.0,1.5,6.8,35.0,phoenix_park,0,7
8,2019-05-16 12:00:00,15.2,11.8,8.6,11.2,64.0,phoenix_park,0,8
9,2019-05-17 12:00:00,10.6,9.1,7.5,10.4,81.0,phoenix_park,0,9


In [159]:
data.isna().sum()

date          0
temp          0
wetb          0
dewpt         0
vappr         0
rhum          0
station       0
series_idx    0
time_idx      0
dtype: int64

In [160]:
data = data.astype(dict(series_idx=str))

## Essais modèles

In [161]:
# We consider 1000 time_idx.
# 800 for training, 100 for validation, 100 for test.

In [162]:
# create dataset and dataloaders
max_encoder_length = 60
max_prediction_length = 20

training_cutoff = 899

context_length = max_encoder_length
prediction_length = max_prediction_length

training = TimeSeriesDataSet(
    data = data[lambda x: x.time_idx <= 899],
    time_idx="time_idx", #indicateur de l'index de temps
    target="temp", #ce que l'on souhaite forecast
    group_ids=["series_idx"], #identificateur des différentes séries temporelles
    static_categoricals=["series_idx", "station"],  #variables catégorielles qui ne changent jamais
    #categorical_encoders={"idx": NaNLabelEncoder().fit(data.idx)},
    time_varying_unknown_reals=["temp"], #variables qui changent au cours du temps
    #time_varying_known_reals=["wetb","dewpt","vappr","rhum"], #covariables
    max_encoder_length=context_length,
    max_prediction_length=prediction_length,
    #predict_mode=True
)

validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training_cutoff + 1)


batch_size = 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

In [164]:
# calculate baseline absolute error
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
SMAPE()(baseline_predictions, actuals)

tensor(0.3198)

In [165]:
pl.seed_everything(42)
import pytorch_forecasting as ptf

Global seed set to 42


In [166]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
trainer = pl.Trainer(
    max_epochs=30,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback],
    #limit_train_batches=50,
)


net = DeepAR.from_dataset(
    training,
    learning_rate=0.1,
    log_interval=10,
    log_val_interval=1,
    hidden_size=30,
    rnn_layers=2,
    loss=NormalDistributionLoss(),
)

trainer.fit(
    net,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name                   | Type                   | Params
------------------------------------------------------------------
0 | loss                   | NormalDistributionLoss | 0     
1 | logging_metrics        | ModuleList             | 0     
2 | embeddings             | MultiEmbedding         | 432   
3 | rnn                    | LSTM                   | 13.6 K
4 | distribution_projector | Linear                 | 62    
------------------------------------------------------------------
14.1 K    Trainable params
0         Non-trainable params
14.1 K    Total params
0.056     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


                                                                                

  rank_zero_warn(


Epoch 0:  91%|▉| 153/169 [00:38<00:04,  3.98it/s, loss=2.16, v_num=9, train_loss
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/16 [00:00<?, ?it/s][A
Epoch 0:  91%|▉| 154/169 [00:38<00:03,  3.97it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  92%|▉| 155/169 [00:39<00:03,  3.96it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  92%|▉| 156/169 [00:39<00:03,  3.95it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  93%|▉| 157/169 [00:39<00:03,  3.94it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  93%|▉| 158/169 [00:40<00:02,  3.93it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  94%|▉| 159/169 [00:40<00:02,  3.92it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  95%|▉| 160/169 [00:40<00:02,  3.92it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  95%|▉| 161/169 [00:41<00:02,  3.92it/s, loss=2.16, v_num=9, train_loss[A
Epoch 0:  96%|▉| 162/169 [00:41<00:01,  3.91i

Epoch 5:  91%|▉| 153/169 [00:34<00:03,  4.41it/s, loss=1.77, v_num=9, train_loss[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/16 [00:00<?, ?it/s][A
Epoch 5:  91%|▉| 154/169 [00:34<00:03,  4.40it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  92%|▉| 155/169 [00:35<00:03,  4.40it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  92%|▉| 156/169 [00:35<00:02,  4.39it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  93%|▉| 157/169 [00:35<00:02,  4.38it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  93%|▉| 158/169 [00:36<00:02,  4.38it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  94%|▉| 159/169 [00:36<00:02,  4.37it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  95%|▉| 160/169 [00:36<00:02,  4.37it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  95%|▉| 161/169 [00:36<00:01,  4.36it/s, loss=1.77, v_num=9, train_loss[A
Epoch 5:  96%|▉| 162/169 [00:37<00:01,  4.

Epoch 10:  91%|▉| 153/169 [00:36<00:03,  4.20it/s, loss=1.68, v_num=9, train_los[A
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/16 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/16 [00:00<?, ?it/s][A
Epoch 10:  91%|▉| 154/169 [00:36<00:03,  4.19it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  92%|▉| 155/169 [00:37<00:03,  4.19it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  92%|▉| 156/169 [00:37<00:03,  4.18it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  93%|▉| 157/169 [00:37<00:02,  4.18it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  93%|▉| 158/169 [00:37<00:02,  4.18it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  94%|▉| 159/169 [00:38<00:02,  4.17it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  95%|▉| 160/169 [00:38<00:02,  4.17it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  95%|▉| 161/169 [00:38<00:01,  4.16it/s, loss=1.68, v_num=9, train_los[A
Epoch 10:  96%|▉| 162/169 [00:38<00:01,  4

In [147]:
best_model_path = trainer.checkpoint_callback.best_model_path
best_model = DeepAR.load_from_checkpoint(best_model_path)

In [148]:
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_model.predict(val_dataloader)
(actuals - predictions).abs().mean()

tensor(3.0964)

In [157]:
validation.x_to_index(x)["series_idx"]

KeyError: 'decoder_time_idx'

In [151]:
raw_predictions, x = net.predict(val_dataloader, mode="raw", return_x=False, n_samples=24, return_index=True, fast_dev_run=True)

In [156]:
series = validation.x_to_index(x)["series_idx"]
for idx in range(24):  # plot 10 examples
    best_model.plot_prediction(x, raw_predictions, idx=idx, add_loss_to_title=True)
    plt.suptitle(f"Series: {series.iloc[idx]}")

KeyError: 'decoder_time_idx'

In [74]:
test = TimeSeriesDataSet(
    data_test,
    time_idx="time_idx", #indicateur de l'index de temps
    target="temp", #ce que l'on souhaite forecast
    group_ids=["series_idx"], #identificateur des différentes séries temporelles
    static_categoricals=["series_idx", "station"],  #variables catégorielles qui ne changent jamais
    #categorical_encoders={"idx": NaNLabelEncoder().fit(data.idx)},
    time_varying_unknown_reals=["temp"], #variables qui changent au cours du temps
    time_varying_known_reals=["wetb","dewpt","vappr","rhum"], #covariables
    max_encoder_length=context_length,
    max_prediction_length=prediction_length,
)

In [75]:
test_dataloader = test.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

In [96]:
x

{'encoder_cat': tensor([[[14,  2],
          [14,  2],
          [14,  2],
          ...,
          [14,  2],
          [14,  2],
          [14,  2]],
 
         [[21, 22],
          [21, 22],
          [21, 22],
          ...,
          [21, 22],
          [21, 22],
          [21, 22]],
 
         [[ 0, 19],
          [ 0, 19],
          [ 0, 19],
          ...,
          [ 0, 19],
          [ 0, 19],
          [ 0, 19]],
 
         ...,
 
         [[16, 10],
          [16, 10],
          [16, 10],
          ...,
          [16, 10],
          [16, 10],
          [16, 10]],
 
         [[13,  1],
          [13,  1],
          [13,  1],
          ...,
          [13,  1],
          [13,  1],
          [13,  1]],
 
         [[22, 16],
          [22, 16],
          [22, 16],
          ...,
          [22, 16],
          [22, 16],
          [22, 16]]]),
 'encoder_cont': tensor([[[ 0.8132,  0.8204,  0.7739,  0.3904,  1.0772],
          [ 0.2858,  0.1883,  0.0720, -0.3361,  0.6760],
          [

In [102]:
raw_predictions[0].size()

torch.Size([480, 20, 24])