# Import

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)

from IPython.display import clear_output
import matplotlib.pyplot as plt

import torch
import pytorch_forecasting as pf
import lightning.pytorch as pl
# device = torch.device("cuda")

import copy
from pathlib import Path
import warnings

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import pandas as pd
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

  from .autonotebook import tqdm as notebook_tqdm


# Read data

In [2]:
# Visuelle
visuelle_gtrend = pd.read_csv("../visuelle/gtrends.csv", parse_dates=["date"])
visuelle_raw = pd.read_csv("../visuelle/train.csv", parse_dates=["release_date"]).sort_values("release_date")
visuelle_raw = visuelle_raw.rename(columns={str(i):str(i+52) for i in range(0, 12)})

scale = np.load("../visuelle/normalization_scale.npy")
visuelle_raw.iloc[:, :12] = visuelle_raw.iloc[:, :12] * scale
visuelle_raw.head()

Unnamed: 0,52,53,54,55,56,57,58,59,60,61,62,63,external_code,season,category,release_date,day,week,month,year,image_path,color,fabric,extra
0,5.0,78.0,66.0,71.0,49.0,46.0,28.0,21.0,13.0,15.0,11.0,10.0,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem
1,6.0,117.0,137.0,144.0,88.0,105.0,68.0,56.0,37.0,148.0,170.0,59.0,2,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00002.png,brown,acrylic,hem
2,3.0,221.0,189.0,102.0,44.0,32.0,16.0,7.0,11.0,6.0,3.0,2.0,3,SS17,culottes,2016-12-02,0.666667,0.923077,1.0,0.998514,PE17/00003.png,blue,scuba crepe,hem
3,1.0,47.0,50.0,44.0,30.0,34.0,34.0,25.0,18.0,30.0,22.0,10.0,4,SS17,long sleeve,2016-12-02,0.666667,0.923077,1.0,0.998514,PE17/00004.png,yellow,acrylic,sleeveless
4,7.0,105.0,134.0,128.0,73.0,50.0,46.0,37.0,32.0,31.0,36.0,10.0,5,SS17,long sleeve,2016-12-02,0.666667,0.923077,1.0,0.998514,PE17/00005.png,grey,acrylic,hem


In [3]:
def get_data(data, gtrend):
    # Get data
    def func(x, gtrend):
        cat, release_date = x
        gtrend_start = release_date - pd.DateOffset(weeks=52)
        gtrend = gtrend[
            (gtrend["date"] >= gtrend_start) 
            &(gtrend["date"] < release_date)
            ][cat]
        return gtrend.values
    
    data = data.copy()
    data[[str(i) for i in range(0, 52)]] = data[["category", "release_date"]].apply(lambda x: func(x, gtrend), axis=1, result_type="expand")
    return data

visuelle_melt = get_data(visuelle_raw, visuelle_gtrend)
visuelle_melt = pd.melt(visuelle_melt, id_vars=["external_code"], value_vars=[str(i) for i in range(64)])
visuelle_melt["variable"] = visuelle_melt["variable"].astype(int)
visuelle_melt = visuelle_melt.sort_values(["external_code", "variable"]).reset_index(drop=True)

In [4]:
visuelle = pd.merge(visuelle_raw.iloc[:, 12:], visuelle_melt, on="external_code")
visuelle

Unnamed: 0,external_code,season,category,release_date,day,week,month,year,image_path,color,fabric,extra,variable,value
0,1,SS17,long sleeve,2016-12-01,0.500000,0.923077,1.000000,0.998514,PE17/00001.png,yellow,acrylic,hem,0,64.0
1,1,SS17,long sleeve,2016-12-01,0.500000,0.923077,1.000000,0.998514,PE17/00001.png,yellow,acrylic,hem,1,60.0
2,1,SS17,long sleeve,2016-12-01,0.500000,0.923077,1.000000,0.998514,PE17/00001.png,yellow,acrylic,hem,2,48.0
3,1,SS17,long sleeve,2016-12-01,0.500000,0.923077,1.000000,0.998514,PE17/00001.png,yellow,acrylic,hem,3,49.0
4,1,SS17,long sleeve,2016-12-01,0.500000,0.923077,1.000000,0.998514,PE17/00001.png,yellow,acrylic,hem,4,53.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325115,5080,AW19,kimono dress,2019-10-25,0.666667,0.826923,0.833333,1.000000,AI19/05080.png,red,acrylic,hem,59,82.0
325116,5080,AW19,kimono dress,2019-10-25,0.666667,0.826923,0.833333,1.000000,AI19/05080.png,red,acrylic,hem,60,123.0
325117,5080,AW19,kimono dress,2019-10-25,0.666667,0.826923,0.833333,1.000000,AI19/05080.png,red,acrylic,hem,61,46.0
325118,5080,AW19,kimono dress,2019-10-25,0.666667,0.826923,0.833333,1.000000,AI19/05080.png,red,acrylic,hem,62,70.0


# Make Dataset

In [5]:
pd.set_option("display.max_rows", None)
display(visuelle.head(64))
pd.set_option("display.max_rows", 10)

Unnamed: 0,external_code,season,category,release_date,day,week,month,year,image_path,color,fabric,extra,variable,value
0,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,0,64.0
1,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,1,60.0
2,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,2,48.0
3,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,3,49.0
4,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,4,53.0
5,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,5,54.0
6,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,6,52.0
7,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,7,51.0
8,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,8,49.0
9,1,SS17,long sleeve,2016-12-01,0.5,0.923077,1.0,0.998514,PE17/00001.png,yellow,acrylic,hem,9,49.0


In [6]:
product_cutoff = 5000
train_dataset = pf.TimeSeriesDataSet(
    visuelle[visuelle["external_code"] < product_cutoff],
    time_idx = "variable",
    target = "value",
    group_ids = ["external_code"],
    min_encoder_length = 52,
    max_encoder_length = 52,
    min_prediction_length = 12,
    max_prediction_length = 12,
    time_varying_unknown_reals=["value"],
    target_normalizer = None,
)
valid_dataset = pf.TimeSeriesDataSet.from_dataset(train_dataset, visuelle, predict=True)

train_dataloader = train_dataset.to_dataloader(batch_size=16, shuffle=True)
valid_dataloader = valid_dataset.to_dataloader(batch_size=16, shuffle=False, train=False)



In [7]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    accelerator="gpu",
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)


tft = TemporalFusionTransformer.from_dataset(
    train_dataset,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=8,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=1,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    loss=QuantileLoss(),
    optimizer="Ranger"
    # reduce learning rate if no improvement in validation loss after x epochs
    # reduce_on_plateau_patience=1000,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 4.2k


  rank_zero_warn(
  rank_zero_warn(


In [8]:
# find optimal learning rate
from lightning.pytorch.tuner import Tuner

res = Tuner(trainer).lr_find(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

  rank_zero_warn(
[rank: 0] Global seed set to 42
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2
  rank_zero_warn(
[rank: 1] Global seed set to 42
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1]
  rank_zero_warn(
  rank_zero_warn(
Finding b

ProcessRaisedException: 

-- Process 0 terminated with the following error:
Traceback (most recent call last):
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
    fn(i, *args)
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/lightning/pytorch/strategies/launchers/multiprocessing.py", line 153, in _wrapping_function
    results = function(*args, **kwargs)
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/lightning/pytorch/trainer/trainer.py", line 571, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/lightning/pytorch/trainer/trainer.py", line 960, in _run
    call._call_callback_hooks(self, "on_fit_start")
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/lightning/pytorch/trainer/call.py", line 195, in _call_callback_hooks
    fn(trainer, trainer.lightning_module, *args, **kwargs)
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/lightning/pytorch/callbacks/lr_finder.py", line 125, in on_fit_start
    self.lr_find(trainer, pl_module)
  File "/home/sh-sungho.park/anaconda3/envs/cudatest/lib/python3.8/site-packages/lightning/pytorch/callbacks/lr_finder.py", line 122, in lr_find
    raise _TunerExitException()
lightning.pytorch.utilities.exceptions._TunerExitException


In [10]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=50,
    accelerator="cpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)

tft = TemporalFusionTransformer.from_dataset(
    train_dataset,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=2,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=QuantileLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 14.5k


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


In [12]:
# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
)


   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 16    
4  | static_variable_selection          | VariableSelectionNetwork        | 0     
5  | encoder_variable_selection         | VariableSelectionNetwork        | 528   
6  | decoder_variable_selection         | VariableSelectionNetwork        | 0     
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 1.1 

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


IndexError: list index out of range