In [1]:
# install the libraries
from sklearn.preprocessing import MinMaxScaler
import os
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_forecasting import TimeSeriesDataSet, NBeats, Baseline
from pytorch_forecasting.data import NaNLabelEncoder
from pytorch_forecasting.data.examples import generate_ar_data
from pytorch_forecasting.metrics import SMAPE

In [2]:
my_early_stop_callback = EarlyStopping(
        monitor='val_loss',
        min_delta=0.00,
        patience=3,
        verbose=False,
        mode='min',
        strict=True
    )

In [3]:
file = "C:/Users/gurpr/Videos/BetaSci/Rossmann_Store1_Data_FULL.csv"
pwd = os.getcwd()
os.chdir(os.path.dirname(file))
data = pd.read_csv(os.path.basename(file), usecols = [1],
                      engine = "python",
                      skipfooter = 3)

data.head()

Unnamed: 0,Sale
0,4327
1,4486
2,4997
3,7176
4,5580


In [None]:
from pytorch_forecasting.data import (
 TimeSeriesDataSet,
 GroupNormalizer
)
max_prediction_length = 195  # forecast of 195 days
max_encoder_length = 582  # using history of 582 days
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
 data[lambda x: x.time_idx <= training_cutoff],
 time_idx="time_idx",
 target="volume",
 group_ids=["agency", "sku"],
 min_encoder_length=0,  # allowing predictions without history
max_encoder_length=max_encoder_length,
 min_prediction_length=1,
 max_prediction_length=max_prediction_length,
 static_categoricals=["agency", "sku"],
 static_reals=[
     "avg_population_2017",
     "avg_yearly_household_income_2017"
 ],
time_varying_known_categoricals=["special_days", "month"],
 # group of categorical variables can be treated as one variable
 variable_groups={"special_days": special_days},
 time_varying_known_reals=[
     "time_idx",
     "price_regular",
     "discount_in_percent"
 ],
 time_varying_unknown_categoricals=[],
 time_varying_unknown_reals=[
     "volume",
     "log_volume",
     "industry_volume",
     "soda_volume",
     "avg_max_temp",
     "avg_volume_by_agency",
     "avg_volume_by_sku",
 ],
 target_normalizer=GroupNormalizer(
     groups=["agency", "sku"], coerce_positive=1.0
 ),  # use softplus with beta=1.0 and normalize by group
 add_relative_time_idx=True,  # add as feature
 add_target_scales=True,  # add as feature
 add_encoder_length=True,  # add as feature
)
# creating validation set (predict=True) which means to predict the
# last max_prediction_length points in time for each series
validation = TimeSeriesDataSet.from_dataset(
 training, data, predict=True, stop_randomization=True
)
# create dataloaders for model
batch_size = 128
train_dataloader = training.to_dataloader(
 train=True, batch_size=batch_size, num_workers=0
)
val_dataloader = validation.to_dataloader(
 train=False, batch_size=batch_size * 10, num_workers=0
)