In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings(action='ignore')
import datetime
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import statistics
np.random.seed(1)

In [2]:
import mxnet as mx
from mxnet import gluon
from gluonts.dataset.common import ListDataset
from gluonts.dataset.util import to_pandas
from gluonts.model import deepar
from gluonts.mx.trainer import Trainer
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import Evaluator

In [3]:
import GPy, GPyOpt

### Setup

In [4]:
data = pd.read_csv("data.csv", index_col=0) 
agg_mat_df = pd.read_csv("agg_mat.csv", index_col=0) # matrix of aggregated data with bottom time series

In [5]:
data

Unnamed: 0,Total,de,en,fr,ja,ru,zh,de_AAC,de_DES,de_MOB,...,zh_DES_AAG_054,zh_DES_AAG_056,zh_DES_AAG_068,zh_DES_AAG_089,zh_DES_AAG_139,zh_MOB_AAG_005,zh_MOB_AAG_028,zh_MOB_AAG_031,zh_MOB_AAG_036,zh_MOB_AAG_138
2016-01-01,156508,15342,63319,33489,17242,12286,14830,10343,932,4067,...,228,874,329,335,251,6,42,230,262,66
2016-01-02,129902,16782,46894,15613,19981,14283,16349,11767,1127,3888,...,179,855,334,471,316,13,62,287,320,88
2016-01-03,138203,12662,47014,18796,18793,15537,25401,7188,1318,4156,...,200,1028,551,297,513,11,57,226,2184,99
2016-01-04,115017,12305,42230,14975,18418,14484,12605,7251,2162,2892,...,309,1208,679,322,415,22,39,213,423,65
2016-01-05,126042,14036,50473,14786,16794,17602,12351,9311,1935,2790,...,303,1319,716,262,351,13,32,156,309,96
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-12-27,147461,11449,45257,17133,48520,11160,13942,6208,1169,4072,...,237,1211,737,104,1828,25,22,490,136,69
2016-12-28,152287,12407,43929,17097,52437,10479,15938,5210,1221,5976,...,300,1083,649,78,1158,13,15,400,117,64
2016-12-29,137953,11042,51460,23959,22600,10331,18561,5978,1213,3851,...,313,912,466,113,905,11,21,406,133,94
2016-12-30,113121,10375,44781,14478,17949,9994,15544,5792,1011,3572,...,185,981,411,141,635,15,21,465,112,99


In [18]:
# Split the data
### pivot data such as index is the name of columns
#data = data.pivot(index='date', columns='symbol', values='close')
pivot_df = data.T

#X_train = pivot_df.iloc[:,:20]
#X_val = data.iloc[nb_train:nb_train+nb_val,:]
#X_test = pivot_df.iloc[:,8:28]

#y_train = pivot_df.iloc[:,20:28]
#y_val = data.iloc[nb_train+nb_val,:]
y_test = pivot_df.iloc[:,365:]

prediction_length = 1
pred_length = prediction_length
start_date = '2016-01-01'
freq = "1D"

In [19]:
level0total = 1
level1total = 6
level2total = 6*3
level3total = 24
level4total = 150

In [74]:
levels_left = [0, level0total, level0total+level1total, level0total+level1total+level2total, level0total+level1total+level2total+level3total]
levels_right = [0, level1total, level1total+level2total, level1total+level2total+level3total, level1total+level2total+level3total+level4total]
nb_ts_levels = [level0total, level1total, level2total, level3total, level4total]
total_ts = [0,level0total,level0total+level1total,level0total+level1total+level2total,level0total+level1total+level2total+level3total, level0total+level1total+level2total+level3total+level4total]
lengths = nb_ts_levels

In [76]:
def calculate_wmape(actual_values, forecasted_values):
    n = len(actual_values)
    num = np.sum(np.abs(actual_values - forecasted_values))
    den = np.sum(np.abs(actual_values))
    wmape = 100*num/den
    return wmape

In [77]:
data_for_model = data

## DeepAR

In [78]:
def deepAR_ds(data_for_model, prediction_length, freq, start):
    # train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
    train_ds = ListDataset([{'target': data_for_model[x][:-prediction_length], 'start': start}
                        #'feat_static_cat':feat_static_cat[x].values}
                        #'feat_dynamic_cat':[feat_dynamic_cat_month[x][:-prediction_length]]}
                        for x in data_for_model.columns],
                        freq=freq)
    # test dataset: use the whole dataset, add "target" and "start" fields
    test_ds = ListDataset([{'target': data_for_model[x].values, 'start': start}
                        #'feat_static_cat':feat_static_cat[x].values}
                        #'feat_dynamic_cat':[feat_dynamic_cat_month[x].values]}
                        for x in data_for_model.columns],
                        freq=freq)
    return train_ds, test_ds

In [79]:
def deepAR_fit(train_ds, test_ds, prediction_length, freq, learning_rate, cell_type, num_layers, num_cells, num_epochs, num_batches_per_epoch):
    
    trainer = Trainer(epochs=num_epochs, learning_rate=learning_rate,num_batches_per_epoch=num_batches_per_epoch)
    
    estimator = deepar.DeepAREstimator(
        freq=freq, prediction_length=prediction_length, trainer=trainer, cell_type=cell_type,
        num_layers=num_layers,num_cells=num_cells)
    
    predictor = estimator.train(training_data=train_ds)
    
    return predictor

In [80]:
def deepAR_predict(predictor, test_ds):

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_ds,  # test dataset
        predictor=predictor,  # predictor
        num_samples=100,  # number of sample paths we want for evaluation
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)
    
    return forecasts,tss

In [81]:
def run_deepAR(data_for_model, prediction_length, freq, start, learning_rate, cell_type, num_layers, num_cells, num_epochs, num_batches_per_epoch):
    train_ds, test_ds = deepAR_ds(data_for_model, prediction_length, freq, start)
    predictor = deepAR_fit(train_ds, test_ds, prediction_length, freq, learning_rate, cell_type, num_layers, num_cells, num_epochs, num_batches_per_epoch)
    forecasts,tss = deepAR_predict(predictor, test_ds)
    return forecasts,tss

#### b) Fine-Tuning

In [82]:
def run_deepAR_FT(data_for_model, prediction_length, freq, start, learning_rate, cell_type, num_layers, num_cells, num_epochs, num_batches_per_epoch):
    train_ds, test_ds = deepAR_ds(data_for_model, prediction_length, freq, start)
    predictor = deepAR_fit(train_ds, test_ds, prediction_length, freq, learning_rate, cell_type, num_layers, num_cells, num_epochs, num_batches_per_epoch)
    forecasts,tss = deepAR_predict(predictor, test_ds)
    evaluator = Evaluator()
    agg_metrics, item_metrics = evaluator(tss, forecasts)
    return agg_metrics['RMSE']

In [83]:
def optimize_on_metric(data_for_model, prediction_length, freq, start):
    # bounds for hyper-parameters
    # the bounds dict should be in order of continuous type and then discrete type
    bounds = [{'name': 'learning_rate', 'type': 'discrete',  'domain': (0.001, 0.05, 0.01)},
              {'name': 'num_layers', 'type': 'discrete',    'domain': (3, 4, 5)},
              {'name': 'num_cells', 'type': 'discrete',    'domain': (50,60)},
              {'name': 'num_batches_per_epoch', 'type': 'discrete',    'domain': (5, 10, 32)},
              {'name': 'epochs', 'type': 'discrete', 'domain': (10, 50, 100, 150)}
              #{'name': 'cell_type', 'type': 'discrete', 'domain': ("lstm", "gru")}               
              ]

    def f(x):
        print(x)
        evaluation = run_deepAR_FT(
            data_for_model, prediction_length, freq, start,
            learning_rate = float(x[:,0]),  
            cell_type= "lstm",
            num_layers= int(x[:,1]), 
            num_cells= int(x[:,2]),
            num_batches_per_epoch= int(x[:,3]), 
            num_epochs=  int(x[:,4]))
        print("LOSS:\t{0}".format(evaluation))
        print(evaluation)
        return evaluation

    opt_transformer = GPyOpt.methods.BayesianOptimization(f=f, domain=bounds)

    opt_transformer.run_optimization(max_iter=10)

    print("RESULTS:")
    print(opt_transformer.x_opt) 
    print(opt_transformer.fx_opt)

    return opt_transformer

In [20]:
opt_sol = optimize_on_metric(data_for_model, prediction_length, freq, start_date)

[[1.e-03 4.e+00 6.e+01 1.e+01 1.e+01]]


100%|██████████| 10/10 [00:00<00:00, 14.12it/s, epoch=1/10, avg_epoch_loss=7.83]
100%|██████████| 10/10 [00:00<00:00, 19.72it/s, epoch=2/10, avg_epoch_loss=7.57]
100%|██████████| 10/10 [00:00<00:00, 19.94it/s, epoch=3/10, avg_epoch_loss=7.49]
100%|██████████| 10/10 [00:00<00:00, 20.79it/s, epoch=4/10, avg_epoch_loss=7.58]
100%|██████████| 10/10 [00:00<00:00, 19.79it/s, epoch=5/10, avg_epoch_loss=7.21]
100%|██████████| 10/10 [00:00<00:00, 19.99it/s, epoch=6/10, avg_epoch_loss=7.41]
100%|██████████| 10/10 [00:00<00:00, 18.01it/s, epoch=7/10, avg_epoch_loss=7.26]
100%|██████████| 10/10 [00:00<00:00, 21.16it/s, epoch=8/10, avg_epoch_loss=7.59]
100%|██████████| 10/10 [00:00<00:00, 19.23it/s, epoch=9/10, avg_epoch_loss=7.4]
100%|██████████| 10/10 [00:00<00:00, 19.13it/s, epoch=10/10, avg_epoch_loss=7.44]
Running evaluation: 89it [00:00, 181.45it/s]


LOSS:	1249.660653872944
1249.660653872944
[[5.0e-02 4.0e+00 6.0e+01 3.2e+01 5.0e+01]]


100%|██████████| 32/32 [00:02<00:00, 13.89it/s, epoch=1/50, avg_epoch_loss=10.3]
100%|██████████| 32/32 [00:03<00:00,  8.50it/s, epoch=2/50, avg_epoch_loss=8.01]
100%|██████████| 32/32 [00:03<00:00,  9.06it/s, epoch=3/50, avg_epoch_loss=7.67]
100%|██████████| 32/32 [00:03<00:00,  8.08it/s, epoch=4/50, avg_epoch_loss=7.64]
100%|██████████| 32/32 [00:03<00:00,  8.47it/s, epoch=5/50, avg_epoch_loss=7.81]
100%|██████████| 32/32 [00:03<00:00,  9.32it/s, epoch=6/50, avg_epoch_loss=7.95]
100%|██████████| 32/32 [00:02<00:00, 10.88it/s, epoch=7/50, avg_epoch_loss=7.6]
100%|██████████| 32/32 [00:03<00:00,  9.48it/s, epoch=8/50, avg_epoch_loss=7.76]
100%|██████████| 32/32 [00:04<00:00,  6.97it/s, epoch=9/50, avg_epoch_loss=7.48]
100%|██████████| 32/32 [00:05<00:00,  6.28it/s, epoch=10/50, avg_epoch_loss=7.64]
100%|██████████| 32/32 [00:05<00:00,  5.58it/s, epoch=11/50, avg_epoch_loss=7.63]
100%|██████████| 32/32 [00:04<00:00,  7.12it/s, epoch=12/50, avg_epoch_loss=7.54]
100%|██████████| 32/32 [00

LOSS:	907.2551228836478
907.2551228836478
[[1.e-02 5.e+00 6.e+01 1.e+01 1.e+01]]


100%|██████████| 10/10 [00:00<00:00, 10.73it/s, epoch=1/10, avg_epoch_loss=9.97]
100%|██████████| 10/10 [00:00<00:00, 14.60it/s, epoch=2/10, avg_epoch_loss=7.65]
100%|██████████| 10/10 [00:01<00:00,  8.55it/s, epoch=3/10, avg_epoch_loss=7.4]
100%|██████████| 10/10 [00:00<00:00, 11.02it/s, epoch=4/10, avg_epoch_loss=7.57]
100%|██████████| 10/10 [00:01<00:00,  9.92it/s, epoch=5/10, avg_epoch_loss=7.22]
100%|██████████| 10/10 [00:01<00:00,  8.87it/s, epoch=6/10, avg_epoch_loss=7.51]
100%|██████████| 10/10 [00:00<00:00, 11.70it/s, epoch=7/10, avg_epoch_loss=7.56]
100%|██████████| 10/10 [00:00<00:00, 17.68it/s, epoch=8/10, avg_epoch_loss=7.48]
100%|██████████| 10/10 [00:00<00:00, 17.90it/s, epoch=9/10, avg_epoch_loss=7.3]
100%|██████████| 10/10 [00:00<00:00, 16.86it/s, epoch=10/10, avg_epoch_loss=7.3]
Running evaluation: 89it [00:00, 166.93it/s]


LOSS:	1193.92475486046
1193.92475486046
[[1.e-02 3.e+00 6.e+01 1.e+01 1.e+01]]


100%|██████████| 10/10 [00:00<00:00, 10.78it/s, epoch=1/10, avg_epoch_loss=8.43]
100%|██████████| 10/10 [00:00<00:00, 17.03it/s, epoch=2/10, avg_epoch_loss=7.6]
100%|██████████| 10/10 [00:00<00:00, 21.23it/s, epoch=3/10, avg_epoch_loss=7.37]
100%|██████████| 10/10 [00:00<00:00, 12.32it/s, epoch=4/10, avg_epoch_loss=7.47]
100%|██████████| 10/10 [00:00<00:00, 12.12it/s, epoch=5/10, avg_epoch_loss=7.32]
100%|██████████| 10/10 [00:00<00:00, 14.17it/s, epoch=6/10, avg_epoch_loss=7.28]
100%|██████████| 10/10 [00:00<00:00, 11.30it/s, epoch=7/10, avg_epoch_loss=7.41]
100%|██████████| 10/10 [00:00<00:00, 11.94it/s, epoch=8/10, avg_epoch_loss=7.22]
100%|██████████| 10/10 [00:00<00:00, 20.82it/s, epoch=9/10, avg_epoch_loss=7.3]
100%|██████████| 10/10 [00:00<00:00, 22.12it/s, epoch=10/10, avg_epoch_loss=7.18]
Running evaluation: 89it [00:01, 88.58it/s]


LOSS:	852.5637959111086
852.5637959111086
[[1.e-03 5.e+00 6.e+01 5.e+00 5.e+01]]


100%|██████████| 5/5 [00:00<00:00,  6.78it/s, epoch=1/50, avg_epoch_loss=7.83]
100%|██████████| 5/5 [00:00<00:00, 17.17it/s, epoch=2/50, avg_epoch_loss=7.76]
100%|██████████| 5/5 [00:00<00:00, 16.31it/s, epoch=3/50, avg_epoch_loss=7.6]
100%|██████████| 5/5 [00:00<00:00, 14.55it/s, epoch=4/50, avg_epoch_loss=7.57]
100%|██████████| 5/5 [00:00<00:00,  7.63it/s, epoch=5/50, avg_epoch_loss=7.45]
100%|██████████| 5/5 [00:00<00:00,  9.18it/s, epoch=6/50, avg_epoch_loss=7.37]
100%|██████████| 5/5 [00:00<00:00,  9.14it/s, epoch=7/50, avg_epoch_loss=7.56]
100%|██████████| 5/5 [00:00<00:00, 13.89it/s, epoch=8/50, avg_epoch_loss=7.67]
100%|██████████| 5/5 [00:00<00:00, 16.97it/s, epoch=9/50, avg_epoch_loss=7.28]
100%|██████████| 5/5 [00:00<00:00, 16.74it/s, epoch=10/50, avg_epoch_loss=7.36]
100%|██████████| 5/5 [00:00<00:00, 11.29it/s, epoch=11/50, avg_epoch_loss=7.47]
100%|██████████| 5/5 [00:00<00:00,  8.54it/s, epoch=12/50, avg_epoch_loss=7.21]
100%|██████████| 5/5 [00:00<00:00, 17.55it/s, epoc

LOSS:	1211.3238779935803
1211.3238779935803
[[5.e-02 3.e+00 6.e+01 1.e+01 1.e+01]]


100%|██████████| 10/10 [00:00<00:00, 18.07it/s, epoch=1/10, avg_epoch_loss=11]
100%|██████████| 10/10 [00:00<00:00, 22.62it/s, epoch=2/10, avg_epoch_loss=9.3]
100%|██████████| 10/10 [00:00<00:00, 12.73it/s, epoch=3/10, avg_epoch_loss=8.14]
100%|██████████| 10/10 [00:00<00:00, 18.27it/s, epoch=4/10, avg_epoch_loss=7.87]
100%|██████████| 10/10 [00:00<00:00, 23.32it/s, epoch=5/10, avg_epoch_loss=7.66]
100%|██████████| 10/10 [00:00<00:00, 17.84it/s, epoch=6/10, avg_epoch_loss=7.59]
100%|██████████| 10/10 [00:00<00:00, 13.14it/s, epoch=7/10, avg_epoch_loss=7.46]
100%|██████████| 10/10 [00:00<00:00, 23.82it/s, epoch=8/10, avg_epoch_loss=7.5]
100%|██████████| 10/10 [00:00<00:00, 23.88it/s, epoch=9/10, avg_epoch_loss=7.45]
100%|██████████| 10/10 [00:00<00:00, 13.77it/s, epoch=10/10, avg_epoch_loss=7.47]
Running evaluation: 89it [00:00, 193.11it/s]


LOSS:	1447.6961896917153
1447.6961896917153
[[5.0e-02 4.0e+00 6.0e+01 3.2e+01 5.0e+01]]


100%|██████████| 32/32 [00:02<00:00, 13.04it/s, epoch=1/50, avg_epoch_loss=9.42]
100%|██████████| 32/32 [00:02<00:00, 13.54it/s, epoch=2/50, avg_epoch_loss=7.54]
100%|██████████| 32/32 [00:02<00:00, 14.30it/s, epoch=3/50, avg_epoch_loss=7.57]
100%|██████████| 32/32 [00:02<00:00, 15.10it/s, epoch=4/50, avg_epoch_loss=7.56]
100%|██████████| 32/32 [00:02<00:00, 12.86it/s, epoch=5/50, avg_epoch_loss=7.64]
100%|██████████| 32/32 [00:02<00:00, 13.71it/s, epoch=6/50, avg_epoch_loss=7.32]
100%|██████████| 32/32 [00:02<00:00, 15.25it/s, epoch=7/50, avg_epoch_loss=7.62]
100%|██████████| 32/32 [00:02<00:00, 14.32it/s, epoch=8/50, avg_epoch_loss=7.48]
100%|██████████| 32/32 [00:02<00:00, 14.11it/s, epoch=9/50, avg_epoch_loss=7.47]
100%|██████████| 32/32 [00:02<00:00, 13.39it/s, epoch=10/50, avg_epoch_loss=7.42]
100%|██████████| 32/32 [00:02<00:00, 10.99it/s, epoch=11/50, avg_epoch_loss=7.5]
100%|██████████| 32/32 [00:03<00:00,  8.05it/s, epoch=12/50, avg_epoch_loss=7.57]
100%|██████████| 32/32 [00

LOSS:	1268.5501676868473
1268.5501676868473
[[5.0e-02 4.0e+00 6.0e+01 3.2e+01 5.0e+01]]


100%|██████████| 32/32 [00:03<00:00,  9.62it/s, epoch=1/50, avg_epoch_loss=9.11]
100%|██████████| 32/32 [00:02<00:00, 11.10it/s, epoch=2/50, avg_epoch_loss=7.65]
100%|██████████| 32/32 [00:01<00:00, 16.30it/s, epoch=3/50, avg_epoch_loss=7.43]
100%|██████████| 32/32 [00:02<00:00, 14.38it/s, epoch=4/50, avg_epoch_loss=7.48]
100%|██████████| 32/32 [00:02<00:00, 13.47it/s, epoch=5/50, avg_epoch_loss=7.35]
100%|██████████| 32/32 [00:02<00:00, 14.28it/s, epoch=6/50, avg_epoch_loss=7.2]
100%|██████████| 32/32 [00:04<00:00,  7.14it/s, epoch=7/50, avg_epoch_loss=7.89]
100%|██████████| 32/32 [00:03<00:00,  9.35it/s, epoch=8/50, avg_epoch_loss=7.39]
100%|██████████| 32/32 [00:03<00:00,  8.66it/s, epoch=9/50, avg_epoch_loss=7.32]
100%|██████████| 32/32 [00:03<00:00,  8.63it/s, epoch=10/50, avg_epoch_loss=7.31]
100%|██████████| 32/32 [00:03<00:00,  8.03it/s, epoch=11/50, avg_epoch_loss=7.4]
100%|██████████| 32/32 [00:03<00:00,  8.14it/s, epoch=12/50, avg_epoch_loss=7.24]
100%|██████████| 32/32 [00:

LOSS:	727.4693941612077
727.4693941612077
RESULTS:
[5.0e-02 4.0e+00 6.0e+01 3.2e+01 5.0e+01]
727.4693941612077





#### c) Final Prediction

In [59]:
### Prediction for first class
### learning rate, cell type, num of layers, num of cells, num of epochs, batch size
forecasts0,tss0 = run_deepAR(data, prediction_length, freq, start_date, 0.05, "lstm", 4, 60, 100, 16)

  0%|          | 0/16 [00:00<?, ?it/s]

100%|██████████| 16/16 [00:01<00:00, 13.93it/s, epoch=1/100, avg_epoch_loss=8.82]
100%|██████████| 16/16 [00:00<00:00, 18.15it/s, epoch=2/100, avg_epoch_loss=6.91]
100%|██████████| 16/16 [00:01<00:00, 13.39it/s, epoch=3/100, avg_epoch_loss=6.61]
100%|██████████| 16/16 [00:01<00:00, 13.86it/s, epoch=4/100, avg_epoch_loss=5.77]
100%|██████████| 16/16 [00:01<00:00, 14.25it/s, epoch=5/100, avg_epoch_loss=5.93]
100%|██████████| 16/16 [00:01<00:00, 12.86it/s, epoch=6/100, avg_epoch_loss=5.23]
100%|██████████| 16/16 [00:01<00:00, 13.87it/s, epoch=7/100, avg_epoch_loss=5.71]
100%|██████████| 16/16 [00:01<00:00, 15.34it/s, epoch=8/100, avg_epoch_loss=5.54]
100%|██████████| 16/16 [00:01<00:00, 14.86it/s, epoch=9/100, avg_epoch_loss=5.41]
100%|██████████| 16/16 [00:01<00:00, 14.15it/s, epoch=10/100, avg_epoch_loss=5.49]
100%|██████████| 16/16 [00:01<00:00, 14.54it/s, epoch=11/100, avg_epoch_loss=5.46]
100%|██████████| 16/16 [00:01<00:00, 13.91it/s, epoch=12/100, avg_epoch_loss=6.24]
100%|████████

In [84]:
### round to int value of array
def round_array(array):
    for i in range(len(array)):
        array[i] = round(array[i])
        if array[i] <= 0:
            array[i] = 0
    return array

In [85]:
### create dataframe with predictions
def create_df_deepar(forecast, data_for_model):
    ### dataframe with name of columns same as in data_for_model_000
    df = pd.DataFrame(columns=data_for_model.columns)
    for i,col in enumerate(data_for_model.columns):
        df[col] = round_array(forecast[i].median)
    return df

In [86]:
y_predict = create_df_deepar(forecasts0, data)
y_predict = y_predict.T
y_predict.columns = y_test.columns

In [87]:
y_predict

Unnamed: 0,2016-12-31
Total,121422.0
de,11083.0
en,47919.0
fr,15266.0
ja,18884.0
...,...
zh_MOB_AAG_005,17.0
zh_MOB_AAG_028,22.0
zh_MOB_AAG_031,565.0
zh_MOB_AAG_036,113.0


In [88]:
y_test

Unnamed: 0,2016-12-31
Total,113839
de,14854
en,42342
fr,11835
ja,16716
...,...
zh_MOB_AAG_005,13
zh_MOB_AAG_028,17
zh_MOB_AAG_031,555
zh_MOB_AAG_036,110


#### WMAPE

In [96]:
def wmape_level(actual_value, forecasted_value, total_ts, lengths):
    nb_levels = len(lengths)
    wmapes = []
    for l in range(nb_levels):
        actual_value_ts = actual_value[total_ts[l]:total_ts[l+1], :]
        forecasted_value_ts = forecasted_value[total_ts[l]:total_ts[l+1], :]
        wmapes.append(calculate_wmape(actual_value_ts, forecasted_value_ts))
    return wmapes

In [97]:
wmape_level(y_test.to_numpy(), y_predict.to_numpy(), total_ts, lengths)

[6.6611618162492645,
 17.36487495498028,
 22.989485150080377,
 26.590184383207863,
 38.06077003487381]

In [98]:
calculate_wmape(y_test.to_numpy(), y_predict.to_numpy())

22.33329526787832

### RMSSE

In [68]:
### I have an array of shape (89,5)
### create dataframe with predictions
def create_df(y_predict, pred_length, data):
    ### dataframe with name of columns same as in data_for_model_000
    ### create a dataframe based on data, remove last pred_length rows, and add y_predict
    ### return dataframe
    y_predict_df = y_predict.astype(np.float32)
    y_predict_df = pd.DataFrame(y_predict_df)
    y_predict_df = y_predict_df.T
    df = data.copy()
    for i,col in enumerate(data.columns):
        df[col][-(pred_length):] = y_predict_df[:][i]
    return df

In [69]:
data_pred = create_df(y_predict.to_numpy(), pred_length, data)

In [70]:
def rmsse_ts(pred_length, data, data_pred, ts):
    H = pred_length
    T = data.shape[0] - H
    ts_array = data.iloc[:,ts].values
    ts_array_pred = data_pred.iloc[:,ts].values
    e = (1/H)*np.sum((ts_array[t] - ts_array_pred[t])**2 for t in range(T, T+H))
    e_naive = (1/(T-1))*np.sum((ts_array[t] - ts_array[t-1])**2 for t in range(1, T))
    return np.sqrt(e/e_naive)

In [71]:
total_ts = [0,1,5,5+28,5+28+56]
lengths = [1, 4, 28, 56]
def rmsse_level(pred_length, data, data_pred, total_ts, lengths):
    nb_levels = len(lengths)
    R = 0
    r_l = [0]*nb_levels
    for l in range(nb_levels):
        for j in range(total_ts[l], total_ts[l+1]):
            r_l[l] += (1/lengths[l])*rmsse_ts(pred_length, data, data_pred, j)
            #print(l, j)
    print(r_l)
    R += np.mean(r_l)
    return R

In [72]:
rmsse_level(pred_length, data, data_pred, total_ts, lengths)

[0.13641538003574646, 0.31638487909891044, 0.26427348425376634, 0.3654991693043279]


0.2706432281731878