In [1]:
import numpy as np
import pandas as pd
import pytorch_lightning as pl

from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE

In [2]:
DATA_PATH = 'data/prepared'
SUBMISSION_PATH = 'data/submission'

USE_LOG = True

In [3]:
# Import pickled data
train_df = pd.read_pickle(f'{DATA_PATH}/train_df.pkl')
test_df = pd.read_pickle(f'{DATA_PATH}/test_df.pkl')

In [4]:
# Pytorch Forecasting needs a continuing index to indicate time steps
# Create it as the difference from the first day in the dataset in days
train_df = train_df.sort_values(['Store', 'Date'], ascending = True)
test_df = test_df.sort_values(['Store', 'Date'], ascending = True)

first_day = train_df['Date'].min()

train_df['time_idx'] = (train_df['Date'] - first_day).dt.days
test_df['time_idx'] = (test_df['Date'] - first_day).dt.days

In [5]:
# TFT can use embeddings for categorical features, therefore no need for
# the one-hot-encoding (need to reverse it)

def reverse_onehot(data, col):
    """
    Reverse one-hot encoding
    """
    # Extract a list of column names that match the regex
    cols = [x for x in data.columns if x.startswith(col)]

    data[col] = ''

    for col_name in cols:
        data.loc[data[col_name] == 1, col] = col_name

    data = data.drop(cols, axis=1)

    return data

train_df = reverse_onehot(train_df, 'StateHoliday')
train_df = reverse_onehot(train_df, 'StoreType')
train_df = reverse_onehot(train_df, 'Assortment')

test_df = reverse_onehot(test_df, 'StateHoliday')
test_df = reverse_onehot(test_df, 'StoreType')
test_df = reverse_onehot(test_df, 'Assortment')

In [6]:
# TFT needs special category dtypes, transform for relevant columns

def to_category(data, cols):
    """
    Convert columns to category
    """
    for col in cols:
        data[col] = data[col].astype(str).astype('category')

    return data

cat_cols_ls = ['Year', 'Month', 'DayOfWeek', 'WeekOfYear', 'Store']

train_df = to_category(train_df, cat_cols_ls)
test_df = to_category(test_df, cat_cols_ls)

In [7]:
# Predict on log scale to handle skewed target distribution
if USE_LOG:
    train_df['Sales'] = np.log1p(train_df['Sales'])

In [8]:
# Max prediction length is the length of the prediction task in test
max_prediction_length = int(test_df.groupby('Store').Store.count().unique()[0])

# Max encoder length defined as multiple of the prediction length
max_encoder_length = max_prediction_length * 4

training = TimeSeriesDataSet(
    train_df,
    time_idx='time_idx',
    target='Sales',
    group_ids=['Store'],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=[
        'Store', 
        'Assortment', 
        'StoreType' 
        ],
    static_reals=[
        'CompetitionDistance',
        'CompetitionOpenSinceMonth',
        'CompetitionOpenSinceYear',
    ],
    time_varying_known_categoricals=[
        'Year', 
        'Month', 
        'DayOfWeek', 
        'WeekOfYear'],
    time_varying_known_reals=[
        'time_idx',
        'Promo2', 
        'Promo2SinceWeek',
        'Promo2SinceYear',
        'CompetitionOpen', 
        'PromoOpen', 
        'IsPromoMonth'
        ],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        'Sales', 
        'Customers'
        ],
    target_normalizer=GroupNormalizer(
        groups=['Store'], transformation='softplus'
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
)

# Create validation set (predict=True) which means to predict the 
# last max_prediction_length points in time for each series
validation = TimeSeriesDataSet.from_dataset(training, 
                                            train_df, 
                                            predict=True, 
                                            stop_randomization=True)

# Create dataloaders for model
batch_size = 64
train_dataloader = training.to_dataloader(train=True, 
                                          batch_size=batch_size, 
                                          num_workers=0)
val_dataloader = validation.to_dataloader(train=False, 
                                          batch_size=batch_size * 10, 
                                          num_workers=0)

In [9]:

# Training callbacks
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4, 
    patience=10, 
    verbose=False, 
    mode="min")
lr_logger = LearningRateMonitor()  
logger = TensorBoardLogger("lightning_logs")

# PyTorch Ligntning trainer
trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,
    weights_summary="top",
    gradient_clip_val=0.1,
    limit_train_batches=30,
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
    default_root_dir='model_checkpoints'
)

# TFT Model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=1,
    loss=SMAPE(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Run training
trainer.fit(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_deprecation(

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | SMAPE                           | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 18.8 K
3  | prescalers                         | ModuleDict                      | 256   
4  | static_variable_selection          | VariableSelectionNetwork        | 4.3 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 7.9 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 6.3 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial

Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  target_scale = torch.tensor([batch[0]["target_scale"] for batch in batches], dtype=torch.float)


Validation sanity check:  50%|█████     | 1/2 [00:02<00:02,  2.40s/it]



Validation sanity check: 100%|██████████| 2/2 [00:03<00:00,  1.41s/it]



                                                                      

  rank_zero_warn(
  rank_zero_warn(


Epoch 0:   0%|          | 0/32 [00:00<?, ?it/s] 



Epoch 29: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s, loss=0.346, v_num=6, train_loss_step=0.359, val_loss=0.299, train_loss_epoch=0.348]


In [11]:
# Stores to predict
stores_in_test = test_df['Store'].unique()

# Select the encoder data from the actual training data
encoder_df = train_df[lambda x: x.time_idx > x.time_idx.max() - max_encoder_length]
encoder_df = encoder_df[encoder_df['Store'].isin(stores_in_test)]

# Define decoder data (taken from test data)
decoder_df = test_df

# Combine encoder and decoder data
new_prediction_data = pd.concat([encoder_df, decoder_df], ignore_index=True)

# Set future unknown to arbitrary value
new_prediction_data[['Sales', 'Customers']] = new_prediction_data[['Sales', 'Customers']].fillna(0)

In [12]:
# Predict on test data
y_hat, y_hat_idx = tft.predict(new_prediction_data, mode='prediction', return_index=True)

In [13]:
# Create submission file
submission_df = pd.concat([y_hat_idx, pd.DataFrame(y_hat)], axis=1)
submission_df = pd.melt(submission_df, id_vars=['time_idx', 'Store'], var_name='step', value_name='Sales')

submission_df['time_idx'] = submission_df['time_idx'] + submission_df['step']
submission_df = submission_df.sort_values(by=['Store', 'time_idx'])

submission_df = pd.merge(submission_df, test_df[['Id', 'Store', 'time_idx']], on=['time_idx', 'Store'], how='left')

submission_df['Sales'] = np.expm1(submission_df['Sales']).clip(0, None)
submission_df = submission_df[['Id', 'Sales']]

# Quick check
assert submission_df.shape[0] == test_df.shape[0]

submission_df.to_csv(f'{SUBMISSION_PATH}/submission_tft.csv', index=False)