In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import itertools
import os
import warnings
import category_encoders
import gluonts
import mxnet
import altair as alt
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
from category_encoders.hashing import HashingEncoder
from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.evaluation import Evaluator, MultivariateEvaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.common import ListDataset
from gluonts.dataset.field_names import FieldName
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.deepvar import DeepVAREstimator
from gluonts.model.predictor import Predictor
from gluonts.mx.distribution import (
    LowrankMultivariateGaussianOutput,
    NegativeBinomialOutput, 
    PoissonOutput,
)
from gluonts.mx.trainer import Trainer
from mxnet.context import num_gpus
from utils.evaluation import WRMSSEEvaluator


mxnet.random.seed(42)
np.random.seed(42)
warnings.filterwarnings("ignore")
print(f"<VERSION>\ncategory_encoders: {category_encoders.__version__}, gluonts: {gluonts.__version__}, mxnet: {mxnet.__version__}")

<VERSION>
category_encoders: 2.2.2, gluonts: 0.8.0, mxnet: 1.8.0


The Kaggle dataset was saved in the local directory `~/data/mofc-demand-forecast` in advance.

In [3]:
DATA_PATH = "../../data/mofc-demand-forecast"
MODEL_PATH = "models"

calendar = pd.read_csv(os.path.join(DATA_PATH, "calendar.csv"))
selling_prices = pd.read_csv(os.path.join(DATA_PATH, "sell_prices.csv"))
df_train_valid = pd.read_csv(os.path.join(DATA_PATH, "sales_train_validation.csv"))
df_train_eval = pd.read_csv(os.path.join(DATA_PATH, "sales_train_evaluation.csv"))
sample_submission = pd.read_csv(os.path.join(DATA_PATH, "sample_submission.csv"))

In [4]:
key_names = ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]
all_ids = df_train_eval["id"].unique()
date_names = ["d_" + str(i) for i in range(1, 1942)]
dates = calendar["date"].unique()
test_steps = 28

key_pairs = list(itertools.product(all_ids, dates))
key_pairs = pd.DataFrame(key_pairs, columns=["id", "date"])

test_sample_ratio = 0.1

if test_sample_ratio == 1.0:
    test_sampled_ids = all_ids
else:
    test_sampled_ids = np.random.choice(
        all_ids, round(test_sample_ratio * len(all_ids)), replace=False
    ).tolist()
    
print(
    f"{len(test_sampled_ids)} out of {len(all_ids)} IDs were selected for testing."
)

3049 out of 30490 IDs were selected for testing.


# Data Preprocessing

In [5]:
target = df_train_eval[["id"] + date_names]
target = target.set_index("id").T.reset_index()
date_dict = calendar[["date", "d"]].set_index("d").to_dict()["date"]
target["index"] = target["index"].replace(date_dict)
target.columns = ["date"] + target.columns[1:].tolist()
target = target.set_index("date")

In [6]:
feature_names = ["event_name_1", "event_type_1", "event_name_2", "event_type_2"]
events = calendar[["date"] + feature_names].fillna("NA")
train = events[events["date"] < dates[-2 * test_steps]][feature_names]

encoder = HashingEncoder(drop_invariant=True)
_ = encoder.fit(train)
encoded = encoder.transform(events[feature_names])
events = pd.concat([events[["date"]], encoded], axis=1)

time_related = calendar[["date", "wday", "month"]]
time_related["day"] = time_related["date"].map(lambda x: int(x.split("-")[2]))

feat_dynamic_cat = events.merge(time_related).set_index("date")

scaler = MinMaxScaler()
scaled = scaler.fit_transform(feat_dynamic_cat)
feat_dynamic_cat = pd.DataFrame(
    scaled, columns=feat_dynamic_cat.columns, index=feat_dynamic_cat.index
)
n_feat_dynamic_cat = feat_dynamic_cat.shape[1]

In [7]:
prices = (
    df_train_eval[["id", "store_id", "item_id"]]
    .merge(selling_prices, how="left")
    .drop(["store_id", "item_id"], axis=1)
)
week_to_date = calendar[["date", "wm_yr_wk"]].drop_duplicates()
prices = week_to_date.merge(prices, how="left").drop(
    ["wm_yr_wk"], axis=1
)

scaler = MinMaxScaler()
train = prices[prices["date"] < dates[-2 * test_steps]][["sell_price"]]

_ = scaler.fit(train)
prices["sell_price"] = scaler.transform(prices[["sell_price"]])
prices = prices.pivot(index="date", columns="id", values="sell_price")
prices = prices.fillna(method="bfill")

snap = calendar[["date", "snap_CA", "snap_TX", "snap_WI"]]
snap.columns = ["date", "CA", "TX", "WI"]
snap = pd.melt(
    snap,
    id_vars="date",
    value_vars=["CA", "TX", "WI"],
    var_name="state_id",
    value_name="snap",
)
snap = key_pairs.merge(df_train_eval[["id", "state_id"]], how="left").merge(
    snap, on=["date", "state_id"], how="left"
)
snap = snap.pivot(index="date", columns="id", values="snap")

feat_dynamic_real = pd.concat([prices, snap], axis=1)
n_feat_dynamic_real = int(feat_dynamic_real.shape[1] / target.shape[1])

In [8]:
feature_names = ["item_id", "dept_id", "cat_id", "store_id", "state_id"]
feat_static_cat = df_train_eval[["id"] + feature_names]

encoder = OrdinalEncoder()
feat_static_cat[feature_names] = encoder.fit_transform(feat_static_cat[feature_names])
feat_static_cat[feature_names] = feat_static_cat[feature_names].astype(int)
feat_static_cat = feat_static_cat.set_index("id").T

cardinality = [len(category) for category in encoder.categories_]

In [9]:
def split_into_n_array(x, n):
    return np.hsplit(x.values.T.ravel(), n)


train_list = []
for test_sampled_id in test_sampled_ids:
    dict_by_id = {
        FieldName.TARGET: target[test_sampled_id].iloc[:-test_steps].values,
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat.iloc[: -2 * test_steps], 
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[test_sampled_id].iloc[: -2 * test_steps],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[test_sampled_id].values,
    }
    train_list.append(dict_by_id)

test_list = []
for test_sampled_id in test_sampled_ids:
    dict_by_id = {
        FieldName.TARGET: target[test_sampled_id].values,
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat.iloc[: -test_steps], 
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[test_sampled_id].iloc[: -test_steps],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[test_sampled_id].values,
    }
    test_list.append(dict_by_id)
    
train_dataset = ListDataset(train_list, freq="D")
test_dataset = ListDataset(test_list, freq="D")

# DeepAR: Model Training

In [10]:
device = "gpu" if num_gpus() > 0 else "cpu"
trainer = Trainer(
    ctx=device,
    epochs=200,
    num_batches_per_epoch=50,
    learning_rate=0.001,
    hybridize=True,
)

In [11]:
deepar_estimator = DeepAREstimator(
    freq="D", 
    prediction_length=test_steps,
    trainer=trainer,
    context_length=2 * test_steps,
    num_layers=2,
    num_cells=40,
    cell_type="lstm",
    dropout_rate=0.1,
    use_feat_dynamic_real=True,
    use_feat_static_cat=True,
    use_feat_static_real=False,
    cardinality=cardinality,
    distr_output=NegativeBinomialOutput(),
    # distr_output=PoissonOutput(), 
    batch_size=30,
)

In [12]:
%%time
deepar_predictor = deepar_estimator.train(train_dataset)

os.makedirs(os.path.join(MODEL_PATH, "deepar"), exist_ok=True)
deepar_predictor.serialize(Path(os.path.join(MODEL_PATH, "deepar")))

100%|██████████| 50/50 [00:04<00:00, 10.18it/s, epoch=1/200, avg_epoch_loss=0.935]
100%|██████████| 50/50 [00:04<00:00, 10.97it/s, epoch=2/200, avg_epoch_loss=0.87]
100%|██████████| 50/50 [00:04<00:00, 10.91it/s, epoch=3/200, avg_epoch_loss=0.886]
100%|██████████| 50/50 [00:04<00:00, 10.94it/s, epoch=4/200, avg_epoch_loss=0.876]
100%|██████████| 50/50 [00:04<00:00, 10.98it/s, epoch=5/200, avg_epoch_loss=0.865]
100%|██████████| 50/50 [00:04<00:00, 10.91it/s, epoch=6/200, avg_epoch_loss=0.894]
100%|██████████| 50/50 [00:04<00:00, 10.95it/s, epoch=7/200, avg_epoch_loss=0.835]
100%|██████████| 50/50 [00:04<00:00, 10.90it/s, epoch=8/200, avg_epoch_loss=0.834]
100%|██████████| 50/50 [00:04<00:00, 11.00it/s, epoch=9/200, avg_epoch_loss=0.878]
100%|██████████| 50/50 [00:04<00:00, 10.97it/s, epoch=10/200, avg_epoch_loss=0.924]
100%|██████████| 50/50 [00:04<00:00, 11.01it/s, epoch=11/200, avg_epoch_loss=0.865]
100%|██████████| 50/50 [00:04<00:00, 11.01it/s, epoch=12/200, avg_epoch_loss=0.875]
10

Early stopping based on learning rate scheduler callback (min_lr was reached).
CPU times: user 6min 11s, sys: 10.2 s, total: 6min 21s
Wall time: 6min 12s


# DeepAR: Model Evaluation

In [13]:
deepar_predictor = Predictor.deserialize(Path(os.path.join(MODEL_PATH, "deepar")))

In [14]:
%%time
forecast_iter, ts_iter = make_evaluation_predictions( 
    dataset=test_dataset,
    predictor=deepar_predictor, 
    num_samples=100,
) 
forecasts = list(forecast_iter)
tss = list(ts_iter)

CPU times: user 1min 32s, sys: 11.4 s, total: 1min 43s
Wall time: 1min 30s


In [15]:
num_series = len(test_sampled_ids)

evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(
    iter(tss), iter(forecasts), num_series=num_series
)

Running evaluation: 100%|██████████| 3049/3049 [00:00<00:00, 48787.91it/s]


In [16]:
string = ""
for key, value in agg_metrics.items():
    if not np.isnan(value):
        string += key + ": " + f"{value:.4f}\n"
        
print(string[:-2])

MSE: 4.9731
abs_error: 81637.0000
abs_target_sum: 117963.0000
abs_target_mean: 1.3818
seasonal_error: 0.9078
MASE: 1.4058
MAPE: 0.7671
sMAPE: 1.5382
MSIS: 11.7334
QuantileLoss[0.1]: 22262.4000
Coverage[0.1]: 0.0078
QuantileLoss[0.5]: 81637.0000
Coverage[0.5]: 0.1660
QuantileLoss[0.9]: 58147.2000
Coverage[0.9]: 0.7299
RMSE: 2.2300
NRMSE: 1.6139
ND: 0.6921
wQuantileLoss[0.1]: 0.1887
wQuantileLoss[0.5]: 0.6921
wQuantileLoss[0.9]: 0.4929
mean_absolute_QuantileLoss: 54015.5333
mean_wQuantileLoss: 0.4579
MAE_Coverage: 0.198


In [17]:
sampled_df_train_eval = (
    df_train_eval.set_index("id").loc[test_sampled_ids].reset_index()
)
sampled_df_train = sampled_df_train_eval.loc[:, key_names + date_names[:-test_steps]]
sampled_df_test = sampled_df_train_eval.loc[:, date_names[-test_steps:]]

wrmsse_evaluator = WRMSSEEvaluator(
    sampled_df_train, sampled_df_test, calendar, selling_prices, test_steps
)

  0%|          | 0/12 [00:00<?, ?it/s]

In [18]:
predictions = [forecast.mean for forecast in forecasts]
sampled_df_pred = pd.DataFrame(predictions, columns=sampled_df_test.columns)

wrmsse = wrmsse_evaluator.score(sampled_df_pred)

print(f"DeepAR WRMSSE: {wrmsse:.6f}")

DeepAR WRMSSE: 0.698085


In [19]:
def plot_forecast(source, test_steps, plot_id=None, model_name=None, start_date=None):
    if start_date is not None:
        source = source[source["time"] >= start_date]

    points = (
        alt.Chart(source)
        .mark_circle(size=10.0, color="#000000")
        .encode(
            x=alt.X("time:T", axis=alt.Axis(title="Date")),
            y=alt.Y("y", axis=alt.Axis(title="Sales")),
            tooltip=["time:T", "y:Q"],
        )
    )

    line = (
        alt.Chart(source)
        .mark_line(size=1.0, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst",
        )
    )

    band_90 = (
        alt.Chart(source)
        .mark_area(opacity=0.25, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst_lower_05",
            y2="fcst_upper_95",
        )
    )

    band_50 = (
        alt.Chart(source)
        .mark_area(opacity=0.5, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst_lower_25",
            y2="fcst_upper_75",
        )
    )

    rule = (
        alt.Chart(source[["time"]].iloc[-test_steps : -test_steps + 1])
        .mark_rule(size=1.0, color="#FF0000", strokeDash=[2, 2])
        .encode(x="time:T")
    )

    title = "Sales Forecast"
    if plot_id is not None:
        title += f" for '{plot_id}'"
    if model_name is not None:
        title = f"{model_name}: " + title

    return (points + line + band_90 + band_50 + rule).properties(title=title, width=1000, height=300)

In [20]:
plot_index = 2  # 2, 4, 8
plot_id = test_sampled_ids[plot_index]

y = target[[plot_id]].reset_index()
y.columns = ["time", "y"]
y["time"] = pd.to_datetime(y["time"])

forecast = pd.DataFrame(
    [forecasts[plot_index].mean]
    + [forecasts[plot_index].quantile(p) for p in [0.05, 0.25, 0.75, 0.95]],
    columns=forecasts[plot_index].index,
).T.reset_index()
forecast.columns = [
    "time",
    "fcst",
    "fcst_lower_05",
    "fcst_lower_25",
    "fcst_upper_75",
    "fcst_upper_95",
]

source = y.merge(forecast, how="left")
plot_forecast(
    source, test_steps, plot_id=plot_id, model_name="DeepAR", start_date="2015-05-23"
)

# DeepVAR: Model Training

In [21]:
train_grouper = MultivariateGrouper(max_target_dim=num_series)
train_dataset = train_grouper(train_dataset)

test_grouper = MultivariateGrouper(max_target_dim=num_series)
test_dataset = test_grouper(test_dataset)

In [22]:
trainer = Trainer(
    ctx=device,
    epochs=200,
    num_batches_per_epoch=50,
    learning_rate=0.001,
    hybridize=True,
)

In [23]:
deepvar_estimator = DeepVAREstimator(
    freq="D", 
    prediction_length=test_steps,
    target_dim=num_series,
    trainer=trainer,
    context_length=2 * test_steps,
    num_layers=2,
    num_cells=40,
    cell_type="lstm",
    dropout_rate=0.2,
    cardinality=cardinality,
    distr_output=LowrankMultivariateGaussianOutput(dim=num_series, rank=5),
    batch_size=30,
)

In [24]:
%%time
deepvar_predictor = deepvar_estimator.train(train_dataset)

os.makedirs(os.path.join(MODEL_PATH, "deepvar"), exist_ok=True)
deepvar_predictor.serialize(Path(os.path.join(MODEL_PATH, "deepvar")))

100%|██████████| 50/50 [05:10<00:00,  6.21s/it, epoch=1/200, avg_epoch_loss=1.61e+18]
100%|██████████| 50/50 [05:10<00:00,  6.21s/it, epoch=2/200, avg_epoch_loss=3.95e+17]
100%|██████████| 50/50 [05:16<00:00,  6.32s/it, epoch=3/200, avg_epoch_loss=2.32e+17]
100%|██████████| 50/50 [05:10<00:00,  6.21s/it, epoch=4/200, avg_epoch_loss=1.95e+17]
100%|██████████| 50/50 [05:12<00:00,  6.25s/it, epoch=5/200, avg_epoch_loss=1.6e+17] 
100%|██████████| 50/50 [05:08<00:00,  6.17s/it, epoch=6/200, avg_epoch_loss=1.31e+17]
100%|██████████| 50/50 [05:05<00:00,  6.10s/it, epoch=7/200, avg_epoch_loss=1.35e+17]
100%|██████████| 50/50 [05:03<00:00,  6.06s/it, epoch=8/200, avg_epoch_loss=9.67e+16]
100%|██████████| 50/50 [05:03<00:00,  6.06s/it, epoch=9/200, avg_epoch_loss=5.91e+16]
100%|██████████| 50/50 [05:02<00:00,  6.05s/it, epoch=10/200, avg_epoch_loss=1.38e+17]
100%|██████████| 50/50 [05:03<00:00,  6.07s/it, epoch=11/200, avg_epoch_loss=4.51e+16]
100%|██████████| 50/50 [05:04<00:00,  6.10s/it, epoc

Early stopping based on learning rate scheduler callback (min_lr was reached).




CPU times: user 6h 20min 5s, sys: 13min 17s, total: 6h 33min 23s
Wall time: 6h 32min 43s


# DeepVAR: Model Evaluation

In [25]:
deepvar_predictor = Predictor.deserialize(Path(os.path.join(MODEL_PATH, "deepvar")))

In [26]:
%%time
forecast_iter, ts_iter = make_evaluation_predictions( 
    dataset=test_dataset,
    predictor=deepvar_predictor, 
    num_samples=100,
) 
forecasts = list(forecast_iter)
tss = list(ts_iter)

CPU times: user 1.95 s, sys: 827 ms, total: 2.78 s
Wall time: 2.63 s


In [27]:
evaluator = MultivariateEvaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics = evaluator.calculate_aggregate_multivariate_metrics(
    iter(tss), iter(forecasts), np.mean
)

Running evaluation: 1it [00:00, 25.80it/s]


In [28]:
string = ""
for key, value in agg_metrics.items():
    if not np.isnan(value):
        string += key + ": " + f"{value:.4f}\n"
        
print(string[:-2])

MSE: 3.7794
abs_error: 53.8521
abs_target_sum: 38.6891
abs_target_mean: 1.3818
seasonal_error: 0.1393
MASE: 13.8039
MAPE: 1.4066
sMAPE: 2.0000
MSIS: 383.5658
QuantileLoss[0.1]: 13.0227
Coverage[0.1]: 0.0000
QuantileLoss[0.5]: 53.8521
Coverage[0.5]: 0.0000
QuantileLoss[0.9]: 76.6139
Coverage[0.9]: 0.0000
RMSE: 1.9441
NRMSE: 1.4070
ND: 1.3919
wQuantileLoss[0.1]: 0.3366
wQuantileLoss[0.5]: 1.3919
wQuantileLoss[0.9]: 1.9802
mean_absolute_QuantileLoss: 47.8296
mean_wQuantileLoss: 1.2363
MAE_Coverage: 0.500


In [29]:
predictions = [forecast.mean for forecast in forecasts]
sampled_df_pred = pd.DataFrame(predictions[0].T, columns=sampled_df_test.columns)

wrmsse = wrmsse_evaluator.score(sampled_df_pred)

print(f"DeepVAR WRMSSE: {wrmsse:.6f}")

DeepVAR WRMSSE: 6.429227


In [30]:
plot_index = 2  # 2, 4, 8
plot_id = test_sampled_ids[plot_index]

y = target[[plot_id]].reset_index()
y.columns = ["time", "y"]
y["time"] = pd.to_datetime(y["time"])

forecast = pd.DataFrame(
    [forecasts[0].mean[plot_index][-test_steps:]]
    + [forecasts[0].quantile(p)[plot_index][-test_steps:] for p in [0.05, 0.25, 0.75, 0.95]],
    columns=forecasts[0].index,
).T.reset_index()
forecast.columns = [
    "time",
    "fcst",
    "fcst_lower_05",
    "fcst_lower_25",
    "fcst_upper_75",
    "fcst_upper_95",
]

source = y.merge(forecast, how="left")
plot_forecast(
    source, test_steps, plot_id=plot_id, model_name="DeepVAR", start_date="2015-05-23"
)

# DeepAR: Prediction

In [31]:
feature_names = ["event_name_1", "event_type_1", "event_name_2", "event_type_2"]
events = calendar[["date"] + feature_names].fillna("NA")
train = events[events["date"] < dates[-test_steps]][feature_names]

encoder = HashingEncoder(drop_invariant=True)
_ = encoder.fit(train)
encoded = encoder.transform(events[feature_names])
events = pd.concat([events[["date"]], encoded], axis=1)

feat_dynamic_cat = events.merge(time_related).set_index("date")

scaler = MinMaxScaler()
scaled = scaler.fit_transform(feat_dynamic_cat)
feat_dynamic_cat = pd.DataFrame(
    scaled, columns=feat_dynamic_cat.columns, index=feat_dynamic_cat.index
)

In [32]:
prices = (
    df_train_eval[["id", "store_id", "item_id"]]
    .merge(selling_prices, how="left")
    .drop(["store_id", "item_id"], axis=1)
)
week_to_date = calendar[["date", "wm_yr_wk"]].drop_duplicates()
prices = week_to_date.merge(prices, how="left").drop(
    ["wm_yr_wk"], axis=1
)

scaler = MinMaxScaler()
train = prices[prices["date"] < dates[-test_steps]][["sell_price"]]

_ = scaler.fit(train)
prices["sell_price"] = scaler.transform(prices[["sell_price"]])
prices = prices.pivot(index="date", columns="id", values="sell_price")
prices = prices.fillna(method="bfill")

feat_dynamic_real = pd.concat([prices, snap], axis=1)

In [33]:
train_list = []
for each_id in all_ids:
    dict_by_id = {
        FieldName.TARGET: target[each_id].values,
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat.iloc[:-test_steps],
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[each_id].iloc[:-test_steps],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[each_id].values,
    }
    train_list.append(dict_by_id)

test_list = []
for each_id in all_ids:
    dict_by_id = {
        FieldName.TARGET: np.append(
            target[each_id].values, np.repeat(np.nan, test_steps)
        ),
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat,
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[each_id],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[each_id].values,
    }
    test_list.append(dict_by_id)

train_dataset = ListDataset(train_list, freq="D")
test_dataset = ListDataset(test_list, freq="D")

In [34]:
trainer = Trainer(
    ctx=device,
    epochs=200,
    num_batches_per_epoch=50,
    learning_rate=0.001,
    hybridize=True,
)

In [35]:
deepar_estimator = DeepAREstimator(
    freq="D", 
    prediction_length=test_steps,
    trainer=trainer,
    context_length=2 * test_steps,
    num_layers=2,
    num_cells=40,
    cell_type="lstm",
    dropout_rate=0.2,
    use_feat_dynamic_real=True,
    use_feat_static_cat=True,
    use_feat_static_real=False,
    cardinality=cardinality,
    distr_output=NegativeBinomialOutput(),  
    # distr_output=PoissonOutput(), 
    batch_size=30,
)

In [36]:
%%time
deepar_predictor = deepar_estimator.train(train_dataset)

100%|██████████| 50/50 [00:05<00:00,  9.48it/s, epoch=1/200, avg_epoch_loss=0.875]
100%|██████████| 50/50 [00:04<00:00, 10.52it/s, epoch=2/200, avg_epoch_loss=1.23]
100%|██████████| 50/50 [00:04<00:00, 10.66it/s, epoch=3/200, avg_epoch_loss=0.847]
100%|██████████| 50/50 [00:04<00:00, 10.69it/s, epoch=4/200, avg_epoch_loss=0.902]
100%|██████████| 50/50 [00:04<00:00, 10.72it/s, epoch=5/200, avg_epoch_loss=0.974]
100%|██████████| 50/50 [00:04<00:00, 10.73it/s, epoch=6/200, avg_epoch_loss=1.21]
100%|██████████| 50/50 [00:04<00:00, 10.72it/s, epoch=7/200, avg_epoch_loss=0.718]
100%|██████████| 50/50 [00:04<00:00, 10.75it/s, epoch=8/200, avg_epoch_loss=0.859]
100%|██████████| 50/50 [00:04<00:00, 10.79it/s, epoch=9/200, avg_epoch_loss=0.753]
100%|██████████| 50/50 [00:04<00:00, 10.75it/s, epoch=10/200, avg_epoch_loss=0.879]
100%|██████████| 50/50 [00:04<00:00, 10.82it/s, epoch=11/200, avg_epoch_loss=0.925]
100%|██████████| 50/50 [00:04<00:00, 10.80it/s, epoch=12/200, avg_epoch_loss=0.942]
100

Early stopping based on learning rate scheduler callback (min_lr was reached).
CPU times: user 6min 8s, sys: 14.1 s, total: 6min 22s
Wall time: 6min 12s





In [37]:
%%time
forecast_iter, ts_iter = make_evaluation_predictions( 
    dataset=test_dataset,
    predictor=deepar_predictor, 
    num_samples=100,
) 
forecasts = list(forecast_iter)
tss = list(ts_iter)

CPU times: user 15min 39s, sys: 2min 20s, total: 18min
Wall time: 15min 51s


In [38]:
column_names = ["F" + str(i) for i in range(1, 29)]
valid_submission = df_train_eval.loc[
    :, ["id"] + date_names[-test_steps:]
]
valid_submission.columns = ["id"] + column_names
valid_submission["id"] = valid_submission["id"].str.replace("evaluation", "validation")
eval_submission = pd.DataFrame(
    [forecast.mean for forecast in forecasts],
    columns=column_names,
    index=all_ids,
).reset_index()
eval_submission.columns = ["id"] + eval_submission.columns[1:].tolist()
submission = pd.concat([valid_submission, eval_submission])
submission.to_csv("submission.csv", index=False)