In [None]:
import sys
import os
sys.path.append("../")

In [None]:
import warnings
import time
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn import model_selection
from sklearn.metrics import mean_absolute_percentage_error



In [None]:
from uncertainty_estimation_mqcnn.uncertainty_estimation_models import Model, MQCNN
from uncertainty_estimation_mqcnn.constants import PredEnum

## 1 Data preparation

We need a dataset without any gaps for MQCNN. Missing dates for some stores will not work with the existing code implementation and missing features (meaning some days exist for some time series and in other time series they are missing) in some part of the sequence are also a problem for encoder decoder structure as different time series.

In [None]:
import os

ue_dir_path = os.path.dirname(os.path.dirname(os.getcwd()))
full_df_with_zero_sales_path = os.path.join(ue_dir_path, 'datasets', 'rossmann_full_df_with_zero_sales.pickle')

full_df_with_zero_sales = pd.read_pickle(full_df_with_zero_sales_path)

## REMOVE BEFORE PUBLISHING

In [None]:
full_df_with_zero_sales = full_df_with_zero_sales.iloc[:10000, :]

In [None]:
full_df_with_zero_sales.head().T

Unnamed: 0,0,1,2,3,4
index,0,1,2,3,4
Store,1,1,1,1,1
DayOfWeek,2,3,4,5,6
Date,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00
Sales,0.0,5530.0,4327.0,4486.0,4997.0
...,...,...,...,...,...
StateHoliday_bw,1.0,1.0,1.0,1.0,1.0
Promo_bw,0.0,0.0,0.0,0.0,0.0
SchoolHoliday_fw,7.0,7.0,7.0,7.0,7.0
StateHoliday_fw,1.0,0.0,0.0,0.0,0.0


In [None]:
len(full_df_with_zero_sales)

Out[9]: 1050330

After the data is loaded the train data is sorted after entity and date.

In [None]:
full_df_with_zero_sales = full_df_with_zero_sales.sort_values(['Store', 'Date'])
full_df_with_zero_sales = full_df_with_zero_sales.reset_index(drop=True)

In [None]:
cat_vars = ['Store', 'DayOfWeek', 'Year', 'Month', 'Day', 'StateHoliday', 'StoreType', 'Assortment', 
    'PromoInterval', 'CompetitionOpenSinceYear', 'Promo2SinceYear', 'Week', 'Promo_fw', 
    'Promo_bw', 'StateHoliday_fw', 'StateHoliday_bw', 'SchoolHoliday_fw', 'SchoolHoliday_bw', 'Missing']

cont_vars = ['CompetitionDistance', 'AfterStateHoliday', 'BeforeStateHoliday', 'Promo', 'SchoolHoliday']

In [None]:
display(full_df_with_zero_sales['Date'].max())
display(full_df_with_zero_sales['Date'].min())

Timestamp('2015-07-31 00:00:00')Timestamp('2013-01-01 00:00:00')

In [None]:
display(len(full_df_with_zero_sales))

1050330

## 3 Model Application

In [None]:
TARGET = 'Sales'
forecast_horizon = 48

## 3.8 MQCNN

For this Neural Network Model the target it not only inferred by the features in the same row, but also by features of "previous" rows as they will be encoded to better predict the upcoming values.
We do not only need to provide the corresponding rows for the target in our forecast horizon , but also the previous features and target values in lookback length in order to predict. This means that we do not split our dataframe as for tabular data.
We create a `full_train_df`, that contains information from start until holdout date (train_val split will be done internally in fit method) and a `full_test_df` which contains information from start until end of holdout date.

In [None]:
# inputs required for neural network
full_train_df = full_df_with_zero_sales[full_df_with_zero_sales['Date'] < "20150614"].sort_values(['Store', 'Date'])
full_test_df = full_df_with_zero_sales.copy()
display(full_train_df.tail())

index,Store,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday,Missing,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,State,file,week,trend,file_DE,week_DE,trend_DE,Date_DE,State_DE,Month_DE,Day_DE,Dayofweek_DE,Dayofyear_DE,Is_month_end_DE,Is_month_start_DE,Is_quarter_end_DE,Is_quarter_start_DE,Is_year_end_DE,Is_year_start_DE,Elapsed_DE,Max_TemperatureC,Mean_TemperatureC,Min_TemperatureC,Dew_PointC,MeanDew_PointC,Min_DewpointC,Max_Humidity,Mean_Humidity,Min_Humidity,Max_Sea_Level_PressurehPa,Mean_Sea_Level_PressurehPa,Min_Sea_Level_PressurehPa,Max_VisibilityKm,Mean_VisibilityKm,Min_VisibilitykM,Max_Wind_SpeedKm_h,Mean_Wind_SpeedKm_h,Max_Gust_SpeedKm_h,Precipitationmm,CloudCover,Events,WindDirDegrees,StateName,CompetitionOpenSince,CompetitionDaysOpen,CompetitionMonthsOpen,Promo2Since,Promo2Days,Promo2Weeks,AfterSchoolHoliday,BeforeSchoolHoliday,AfterStateHoliday,BeforeStateHoliday,AfterPromo,BeforePromo,SchoolHoliday_bw,StateHoliday_bw,Promo_bw,SchoolHoliday_fw,StateHoliday_fw,Promo_fw
1050277,1115,2,2015-06-09T00:00:00.000+0000,5119.0,363.0,1.0,0.0,False,0.0,0,2015,6,24,9,1,160,False,False,False,False,False,False,1433808000,d,c,5350.0,1,1900,1,22,2012,"Mar,Jun,Sept,Dec",HE,Rossmann_DE_HE,2015-06-14 - 2015-06-20,85,Rossmann_DE,2015-06-14 - 2015-06-20,82,2015-06-14T00:00:00.000+0000,,6,14,6,165,False,False,False,False,False,False,1434240000,20,16,12,7,6,4,67,48,29,1026,1025,1024,10.0,10.0,10.0,32,24,47.0,0.0,6.0,,22,Hessen,1900-01-15T00:00:00.000+0000,0,0,2012-05-28T00:00:00.000+0000,1107,25,60,-48,5,0,4,-6,0.0,1.0,3.0,0.0,0.0,1.0
1050278,1115,3,2015-06-10T00:00:00.000+0000,4676.0,357.0,1.0,0.0,False,0.0,0,2015,6,24,10,2,161,False,False,False,False,False,False,1433894400,d,c,5350.0,1,1900,1,22,2012,"Mar,Jun,Sept,Dec",HE,Rossmann_DE_HE,2015-06-14 - 2015-06-20,85,Rossmann_DE,2015-06-14 - 2015-06-20,82,2015-06-14T00:00:00.000+0000,,6,14,6,165,False,False,False,False,False,False,1434240000,21,17,12,11,8,6,67,53,39,1026,1024,1022,10.0,10.0,10.0,26,16,,0.0,6.0,,59,Hessen,1900-01-15T00:00:00.000+0000,0,0,2012-05-28T00:00:00.000+0000,1108,25,61,-47,6,0,5,-5,0.0,1.0,2.0,0.0,0.0,2.0
1050279,1115,4,2015-06-11T00:00:00.000+0000,5216.0,380.0,1.0,0.0,False,0.0,0,2015,6,24,11,3,162,False,False,False,False,False,False,1433980800,d,c,5350.0,1,1900,1,22,2012,"Mar,Jun,Sept,Dec",HE,Rossmann_DE_HE,2015-06-14 - 2015-06-20,85,Rossmann_DE,2015-06-14 - 2015-06-20,82,2015-06-14T00:00:00.000+0000,,6,14,6,165,False,False,False,False,False,False,1434240000,24,21,17,12,9,8,64,47,28,1022,1019,1015,10.0,10.0,10.0,23,14,,0.0,5.0,Rain,51,Hessen,1900-01-15T00:00:00.000+0000,0,0,2012-05-28T00:00:00.000+0000,1109,25,62,-46,7,0,6,-4,0.0,0.0,1.0,0.0,0.0,3.0
1050280,1115,5,2015-06-12T00:00:00.000+0000,5315.0,378.0,1.0,0.0,False,0.0,0,2015,6,24,12,4,163,False,False,False,False,False,False,1434067200,d,c,5350.0,1,1900,1,22,2012,"Mar,Jun,Sept,Dec",HE,Rossmann_DE_HE,2015-06-14 - 2015-06-20,85,Rossmann_DE,2015-06-14 - 2015-06-20,82,2015-06-14T00:00:00.000+0000,,6,14,6,165,False,False,False,False,False,False,1434240000,31,22,14,16,12,9,78,54,25,1015,1012,1009,31.0,15.0,10.0,40,11,58.0,0.0,5.0,Rain,42,Hessen,1900-01-15T00:00:00.000+0000,0,0,2012-05-28T00:00:00.000+0000,1110,25,63,-45,8,0,7,-3,0.0,0.0,0.0,0.0,0.0,4.0
1050281,1115,6,2015-06-13T00:00:00.000+0000,7736.0,503.0,1.0,0.0,False,0.0,0,2015,6,24,13,5,164,False,False,False,False,False,False,1434153600,d,c,5350.0,1,1900,1,22,2012,"Mar,Jun,Sept,Dec",HE,Rossmann_DE_HE,2015-06-14 - 2015-06-20,85,Rossmann_DE,2015-06-14 - 2015-06-20,82,2015-06-14T00:00:00.000+0000,,6,14,6,165,False,False,False,False,False,False,1434240000,26,21,17,17,14,9,94,65,27,1011,1010,1008,31.0,13.0,10.0,29,11,39.0,0.0,6.0,Rain,240,Hessen,1900-01-15T00:00:00.000+0000,0,0,2012-05-28T00:00:00.000+0000,1111,25,64,-44,9,0,8,-2,0.0,0.0,0.0,0.0,0.0,5.0


In [None]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

# DYNAMIC CONTINUOUS

# collect all dynamic real features and convert them to int or float
dynamic_bool_var = ['Promo', 'SchoolHoliday', 'StateHoliday', 'Missing']
full_train_df[dynamic_bool_var] = full_train_df[dynamic_bool_var].astype('int')
full_test_df[dynamic_bool_var] = full_test_df[dynamic_bool_var].astype('int')

dynamic_cont_var = ['AfterStateHoliday', 'BeforeStateHoliday'] #same as for TFT
full_train_df[dynamic_cont_var] = full_train_df[dynamic_cont_var].astype('float32')
full_test_df[dynamic_cont_var] = full_test_df[dynamic_cont_var].astype('float32')

dynamic_cat_var = ['DayOfWeek', 'Year', 'Month', 'Day', 'Week', 'Promo_fw', 'Promo_bw', 'StateHoliday_fw', 'StateHoliday_bw', 'SchoolHoliday_fw', 'SchoolHoliday_bw']
full_train_df[dynamic_cat_var] = full_train_df[dynamic_cat_var].astype(str)
full_test_df[dynamic_cat_var] = full_test_df[dynamic_cat_var].astype(str)
full_train_df[dynamic_cat_var] = full_train_df[dynamic_cat_var].astype('float32')
full_test_df[dynamic_cat_var] = full_test_df[dynamic_cat_var].astype('float32')

cont_vars = dynamic_bool_var+dynamic_cat_var+dynamic_cont_var


# STATIC CATEGORICAL

# 'PromoInterval' is left out as it leads to NaN loss values.
# bring alphabetical_cat_vars to numerical format first
alphabetical_cat_vars = ['Assortment', 'StoreType']
oe = OrdinalEncoder()
# These are all known features therefore we can fit transform on test set
full_test_df[alphabetical_cat_vars] = oe.fit_transform(full_test_df[alphabetical_cat_vars])
full_train_df[alphabetical_cat_vars] = oe.transform(full_train_df[alphabetical_cat_vars])

# bring static numerical_cat_vars to type float or str
numerical_cat_vars = ['Store', 'CompetitionOpenSinceYear', 'Promo2SinceYear', 'Assortment', 'StoreType']
full_train_df[numerical_cat_vars] = full_train_df[numerical_cat_vars].astype('object')
full_test_df[numerical_cat_vars] = full_test_df[numerical_cat_vars].astype('object')

static_cat_vars = numerical_cat_vars

# obtain_y_test_out_of_X_test()
mqcnn_y_test = MQCNN.obtain_y_test_out_of_X_test(X_test=full_test_df, forecast_horizon=forecast_horizon, timestamp="Date", target=TARGET, item_id="Store")

In [None]:
from gluonts.mx.trainer.learning_rate_scheduler import LearningRateReduction
from gluonts.mx.trainer.model_averaging import ModelAveraging, SelectNBestMean, save_epoch_info
from gluonts.mx.trainer import Trainer as MXTrainer


modelaveraging = ModelAveraging(avg_strategy=SelectNBestMean(num_models=1))
# if val metric is not improving for patience epochs then learning rate will be lowered by decay factor 
# --> if this is then below min_lr train will stop immediately
scheduler = LearningRateReduction(patience=5, 
                                  base_lr=0.0001,
                                  objective='min', 
                                  decay_factor= 0.1, 
                                  min_lr =0.00009) 

trainer = MXTrainer(add_default_callbacks=True, 
                    callbacks=[scheduler, modelaveraging], 
                    clip_gradient=10.0, 
                    ctx="gpu",
                    epochs=100, 
                    hybridize=False, 
                    num_batches_per_epoch=100, 
                    weight_decay = 1e-08)

mqcnn_params = {'batch_size': 256, 
                'num_forking' : None, 
                'decoder_mlp_dim_seq' : [32, 32], 
                'channels_seq' : [32,32,32,32,32,32], 
                'dilation_seq' : [1,2,4,8,16,32], 
                'kernel_size_seq' : [2,2,2,2,2,2], 
                'scaling_decoder_dynamic_feature' : False,
                'scaling': False 
                }
    
mqcnn_params['trainer'] = trainer

start_time = time.perf_counter()

mqcnn_reg = MQCNN(freq = "D", lookback=forecast_horizon*3, forecast_horizon=forecast_horizon, item_id="Store", 
            timestamp="Date", feat_static_cat=static_cat_vars, feat_dynamic_real=cont_vars,
            past_feat_dynamic_real=None, cardinality_static_cat=[len(full_train_df[var].unique()) for var in static_cat_vars], 
            dynamic_feature_scaler=StandardScaler(), quantiles=[0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95])
mqcnn_reg.fit(full_train_df, TARGET, params_mqcnn=mqcnn_params, verbose = True)
mqcnn_pred = mqcnn_reg.predict(full_test_df, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES])
    
mqcnn_metrics = mqcnn_reg.metrics(mqcnn_y_test, mqcnn_pred, confidence_interval_quantiles=[0.1,0.9])
    
#Evaluate MAPE & RMSPE without zero values as in Kaggle competition
y_test = np.reshape(mqcnn_y_test, newshape=(full_train_df['Store'].nunique()*forecast_horizon,1))
predictions = np.reshape(mqcnn_pred[PredEnum.POINT_ESTIMATES], newshape=(full_train_df['Store'].nunique()*forecast_horizon,1))
indices_nonzero = np.where(y_test!=0)
# Take only entries which have no zeros in ground truth
y_test = y_test[indices_nonzero]
predictions = predictions[indices_nonzero]
mqcnn_metrics['rmspe_only_nonzero'] = np.sqrt(np.mean(np.square((y_test - predictions) / (y_test))))
mqcnn_metrics['mape_only_nonzero'] = mean_absolute_percentage_error(y_test, predictions)

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
mqcnn_metrics['time'] = full_time

57e45991598645db9d3ef7f4f5993007
  0%|          | 0/100 [00:00<?, ?it/s] 12%|█▏        | 12/100 [00:10<01:16,  1.15it/s, epoch=1/100, avg_epoch_loss=0.205] 26%|██▌       | 26/100 [00:20<00:57,  1.28it/s, epoch=1/100, avg_epoch_loss=0.146] 40%|████      | 40/100 [00:30<00:45,  1.32it/s, epoch=1/100, avg_epoch_loss=0.112] 54%|█████▍    | 54/100 [00:41<00:34,  1.34it/s, epoch=1/100, avg_epoch_loss=0.0929] 68%|██████▊   | 68/100 [00:51<00:23,  1.36it/s, epoch=1/100, avg_epoch_loss=0.0798] 82%|████████▏ | 82/100 [01:01<00:13,  1.37it/s, epoch=1/100, avg_epoch_loss=0.0705] 96%|█████████▌| 96/100 [01:11<00:02,  1.36it/s, epoch=1/100, avg_epoch_loss=0.0635]100%|██████████| 100/100 [01:14<00:00,  1.34it/s, epoch=1/100, avg_epoch_loss=0.0618]
0it [00:00, ?it/s]5it [00:01,  3.04it/s, epoch=1/100, validation_avg_epoch_loss=0.021]
  0%|          | 0/100 [00:00<?, ?it/s] 14%|█▍        | 14/100 [00:10<01:04,  1.33it/s, epoch=2/100, avg_epoch_loss=0.0185] 28%|██▊       | 28/100 [00:20<00

In [None]:
mqcnn_metrics

Out[23]: {'mse': 321945876.61010975,
 'mae': 9487.349447532193,
 'rmse': 17942.850292250387,
 'mape': 6.913688861124928e+16,
 'rmspe': 2.7120015997410954e+18,
 'avg_interval_length': 7522.446,
 'coverage': 0.12104525172267021,
 'time': 4342.16,
 'rmspe_only_nonzero': 2.480712651351668,
 'mape_only_nonzero': 1.5431685973214713}