In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import itertools
import os
import warnings
import category_encoders
import gluonts
import mxnet
import numpy as np
import pandas as pd
import altair as alt
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
from category_encoders.hashing import HashingEncoder
from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.evaluation import Evaluator, MultivariateEvaluator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.common import ListDataset
from gluonts.dataset.field_names import FieldName
from gluonts.model.deepar import DeepAREstimator
from gluonts.model.deepvar import DeepVAREstimator
from gluonts.model.predictor import Predictor
from gluonts.mx.distribution import (
    LowrankMultivariateGaussianOutput,
    NegativeBinomialOutput, 
)
from gluonts.mx.trainer import Trainer
from mxnet.context import num_gpus
from utils.evaluation import calc_eval_metric, WRMSSEEvaluator

mxnet.random.seed(42)
np.random.seed(42)
warnings.filterwarnings("ignore")

The Kaggle dataset was saved in the local directory `~/data/mofc-demand-forecast` in advance.

In [None]:
DATA_PATH = "../../data/mofc-demand-forecast"
MODEL_PATH = "models"

calendar = pd.read_csv(os.path.join(DATA_PATH, "calendar.csv"))
selling_prices = pd.read_csv(os.path.join(DATA_PATH, "sell_prices.csv"))
df_train_valid = pd.read_csv(os.path.join(DATA_PATH, "sales_train_validation.csv"))
df_train_eval = pd.read_csv(os.path.join(DATA_PATH, "sales_train_evaluation.csv"))
sample_submission = pd.read_csv(os.path.join(DATA_PATH, "sample_submission.csv"))

In [None]:
key_names = ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]
all_ids = df_train_eval["id"].unique()
date_names = ["d_" + str(i) for i in range(1, 1942)]
dates = calendar["date"].unique()
test_steps = 28

key_pairs = list(itertools.product(all_ids, dates))
key_pairs = pd.DataFrame(key_pairs, columns=["id", "date"])

test_sample_ratio = 0.1

if test_sample_ratio == 1.0:
    test_sampled_ids = all_ids
else:
    test_sampled_ids = np.random.choice(
        all_ids, round(test_sample_ratio * len(all_ids)), replace=False
    ).tolist()
    
print(
    f"{len(test_sampled_ids)} out of {len(all_ids)} IDs were selected for testing."
)

# Data Preprocessing

In [None]:
target = df_train_eval[["id"] + date_names]
target = target.set_index("id").T.reset_index()
date_dict = calendar[["date", "d"]].set_index("d").to_dict()["date"]
target["index"] = target["index"].replace(date_dict)
target.columns = ["date"] + target.columns[1:].tolist()
target = target.set_index("date")

In [None]:
feature_names = ["event_name_1", "event_type_1", "event_name_2", "event_type_2"]
events = calendar[["date"] + feature_names].fillna("NA")
train = events[events["date"] < dates[-2 * test_steps]][feature_names]

encoder = HashingEncoder(drop_invariant=True)
_ = encoder.fit(train)
encoded = encoder.transform(events[feature_names])
events = pd.concat([events[["date"]], encoded], axis=1)

time_related = calendar[["date", "wday", "month"]]
time_related["day"] = time_related["date"].map(lambda x: int(x.split("-")[2]))

feat_dynamic_cat = events.merge(time_related).set_index("date")

scaler = MinMaxScaler()
scaled = scaler.fit_transform(feat_dynamic_cat)
feat_dynamic_cat = pd.DataFrame(
    scaled, columns=feat_dynamic_cat.columns, index=feat_dynamic_cat.index
)
n_feat_dynamic_cat = feat_dynamic_cat.shape[1]

In [None]:
prices = (
    df_train_eval[["id", "store_id", "item_id"]]
    .merge(selling_prices, how="left")
    .drop(["store_id", "item_id"], axis=1)
)
week_to_date = calendar[["date", "wm_yr_wk"]].drop_duplicates()
prices = week_to_date.merge(prices, how="left").drop(
    ["wm_yr_wk"], axis=1
)

scaler = MinMaxScaler()
train = prices[prices["date"] < dates[-2 * test_steps]][["sell_price"]]

_ = scaler.fit(train)
prices["sell_price"] = scaler.transform(prices[["sell_price"]])
prices = prices.pivot(index="date", columns="id", values="sell_price")
prices = prices.fillna(method="bfill")

snap = calendar[["date", "snap_CA", "snap_TX", "snap_WI"]]
snap.columns = ["date", "CA", "TX", "WI"]
snap = pd.melt(
    snap,
    id_vars="date",
    value_vars=["CA", "TX", "WI"],
    var_name="state_id",
    value_name="snap",
)
snap = key_pairs.merge(df_train_eval[["id", "state_id"]], how="left").merge(
    snap, on=["date", "state_id"], how="left"
)
snap = snap.pivot(index="date", columns="id", values="snap")

feat_dynamic_real = pd.concat([prices, snap], axis=1)
n_feat_dynamic_real = int(feat_dynamic_real.shape[1] / target.shape[1])

In [None]:
feature_names = ["item_id", "dept_id", "cat_id", "store_id", "state_id"]
feat_static_cat = df_train_eval[["id"] + feature_names]

encoder = OrdinalEncoder()
feat_static_cat[feature_names] = encoder.fit_transform(feat_static_cat[feature_names])
feat_static_cat[feature_names] = feat_static_cat[feature_names].astype(int)
feat_static_cat = feat_static_cat.set_index("id").T

cardinality = [len(category) for category in encoder.categories_]

In [None]:
def split_into_n_array(x, n):
    return np.hsplit(x.values.T.ravel(), n)


train_list = []
for test_sampled_id in test_sampled_ids:
    dict_by_id = {
        FieldName.TARGET: target[test_sampled_id].iloc[:-test_steps].values,
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat.iloc[: -2 * test_steps], 
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[test_sampled_id].iloc[: -2 * test_steps],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[test_sampled_id].values,
    }
    train_list.append(dict_by_id)

test_list = []
for test_sampled_id in test_sampled_ids:
    dict_by_id = {
        FieldName.TARGET: target[test_sampled_id].values,
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat.iloc[: -test_steps], 
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[test_sampled_id].iloc[: -test_steps],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[test_sampled_id].values,
    }
    test_list.append(dict_by_id)
    
train_dataset = ListDataset(train_list, freq="D")
test_dataset = ListDataset(test_list, freq="D")

# DeepAR: Model Training

In [None]:
device = "gpu" if num_gpus() > 0 else "cpu"
trainer = Trainer(
    ctx=device,
    epochs=200,
    num_batches_per_epoch=50,
    learning_rate=0.001,
    hybridize=True,
)

In [None]:
deepar_estimator = DeepAREstimator(
    freq="D", 
    prediction_length=test_steps,
    trainer=trainer,
    context_length=2 * test_steps,
    num_layers=2,
    num_cells=40,
    cell_type="lstm",
    dropout_rate=0.1,
    use_feat_dynamic_real=True,
    use_feat_static_cat=True,
    use_feat_static_real=False,
    cardinality=cardinality,
    distr_output=NegativeBinomialOutput(),
    batch_size=30,
)

In [None]:
%%time
deepar_predictor = deepar_estimator.train(train_dataset)

os.makedirs(os.path.join(MODEL_PATH, "deepar"), exist_ok=True)
deepar_predictor.serialize(Path(os.path.join(MODEL_PATH, "deepar")))

# DeepAR: Model Evaluation

In [None]:
deepar_predictor = Predictor.deserialize(Path(os.path.join(MODEL_PATH, "deepar")))

In [None]:
%%time
forecast_iter, ts_iter = make_evaluation_predictions( 
    dataset=test_dataset,
    predictor=deepar_predictor, 
    num_samples=100,
) 
forecasts = list(forecast_iter)
tss = list(ts_iter)

In [None]:
num_series = len(test_sampled_ids)

evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics, item_metrics = evaluator(
    iter(tss), iter(forecasts), num_series=num_series
)

In [None]:
string = ""
for key, value in agg_metrics.items():
    if not np.isnan(value):
        string += key + ": " + f"{value:.4f}\n"
        
print(string[:-2])

In [None]:
df_sampled = (
    df_train_eval.set_index("id").loc[test_sampled_ids].reset_index()
)
df_train_sampled = df_sampled.loc[:, key_names + date_names[:-test_steps]]
df_test_sampled = df_sampled.loc[:, date_names[-test_steps:]]

wrmsse_evaluator = WRMSSEEvaluator(
    df_train_sampled, df_test_sampled, calendar, selling_prices, test_steps
)

In [None]:
predictions = [forecast.mean for forecast in forecasts]
df_pred_sampled = pd.DataFrame(predictions, columns=df_test_sampled.columns)

wrmsse = wrmsse_evaluator.score(df_pred_sampled)

print(f"DeepAR WRMSSE: {wrmsse:.6f}")

In [None]:
def plot_forecast(source, test_steps, plot_id=None, model_name=None, start_date=None):
    if start_date is not None:
        source = source[source["time"] >= start_date]

    points = (
        alt.Chart(source)
        .mark_circle(size=10.0, color="#000000")
        .encode(
            x=alt.X("time:T", axis=alt.Axis(title="Date")),
            y=alt.Y("y", axis=alt.Axis(title="Demand")),
            tooltip=["time:T", "y:Q"],
        )
    )

    line = (
        alt.Chart(source)
        .mark_line(size=1.0, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst",
        )
    )

    band_90 = (
        alt.Chart(source)
        .mark_area(opacity=0.25, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst_lower_05",
            y2="fcst_upper_95",
        )
    )

    band_50 = (
        alt.Chart(source)
        .mark_area(opacity=0.5, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst_lower_25",
            y2="fcst_upper_75",
        )
    )

    rule = (
        alt.Chart(source[["time"]].iloc[-test_steps : -test_steps + 1])
        .mark_rule(size=1.0, color="#FF0000", strokeDash=[2, 2])
        .encode(x="time:T")
    )

    title = "Demand Forecast"
    if plot_id is not None:
        title += f" for '{plot_id}'"
    if model_name is not None:
        title = f"{model_name}: " + title

    return (points + line + band_90 + band_50 + rule).properties(title=title, width=1000, height=300)

In [None]:
plot_indices = [2, 4, 8]
plots = []

for plot_index in plot_indices:
    plot_id = test_sampled_ids[plot_index]

    y = target[[plot_id]].reset_index()
    y.columns = ["time", "y"]
    y["time"] = pd.to_datetime(y["time"])

    forecast = pd.DataFrame(
        [forecasts[plot_index].mean]
        + [forecasts[plot_index].quantile(p) for p in [0.05, 0.25, 0.75, 0.95]],
        columns=forecasts[plot_index].index,
    ).T.reset_index()
    forecast.columns = [
        "time",
        "fcst",
        "fcst_lower_05",
        "fcst_lower_25",
        "fcst_upper_75",
        "fcst_upper_95",
    ]

    source = y.merge(forecast, how="left")
    p = plot_forecast(
        source, test_steps, plot_id=plot_id, model_name="DeepAR", start_date="2015-05-23"
    )
    
    plots.append(p)
    
alt.VConcatChart(vconcat=plots)

# DeepVAR: Model Training

In [None]:
train_grouper = MultivariateGrouper(max_target_dim=num_series)
train_dataset = train_grouper(train_dataset)

test_grouper = MultivariateGrouper(max_target_dim=num_series)
test_dataset = test_grouper(test_dataset)

In [None]:
trainer = Trainer(
    ctx=device,
    epochs=200,
    num_batches_per_epoch=50,
    learning_rate=0.001,
    hybridize=True,
)

In [None]:
deepvar_estimator = DeepVAREstimator(
    freq="D", 
    prediction_length=test_steps,
    target_dim=num_series,
    trainer=trainer,
    context_length=2 * test_steps,
    num_layers=2,
    num_cells=40,
    cell_type="lstm",
    dropout_rate=0.2,
    cardinality=cardinality,
    distr_output=LowrankMultivariateGaussianOutput(dim=num_series, rank=5),
    batch_size=30,
)

In [None]:
%%time
deepvar_predictor = deepvar_estimator.train(train_dataset)

os.makedirs(os.path.join(MODEL_PATH, "deepvar"), exist_ok=True)
deepvar_predictor.serialize(Path(os.path.join(MODEL_PATH, "deepvar")))

# DeepVAR: Model Evaluation

In [None]:
deepvar_predictor = Predictor.deserialize(Path(os.path.join(MODEL_PATH, "deepvar")))

In [None]:
%%time
forecast_iter, ts_iter = make_evaluation_predictions( 
    dataset=test_dataset,
    predictor=deepvar_predictor, 
    num_samples=100,
) 
forecasts = list(forecast_iter)
tss = list(ts_iter)

In [None]:
evaluator = MultivariateEvaluator(quantiles=[0.1, 0.5, 0.9])
agg_metrics = evaluator.calculate_aggregate_multivariate_metrics(
    iter(tss), iter(forecasts), np.mean
)

In [None]:
string = ""
for key, value in agg_metrics.items():
    if not np.isnan(value):
        string += key + ": " + f"{value:.4f}\n"
        
print(string[:-2])

In [None]:
predictions = [forecast.mean for forecast in forecasts]
df_pred_sampled = pd.DataFrame(predictions[0].T, columns=df_test_sampled.columns)

wrmsse = wrmsse_evaluator.score(df_pred_sampled)

print(f"DeepVAR WRMSSE: {wrmsse:.6f}")

In [None]:
plot_indices = [2, 4, 8]
plots = []

for plot_index in plot_indices:
    plot_id = test_sampled_ids[plot_index]

    y = target[[plot_id]].reset_index()
    y.columns = ["time", "y"]
    y["time"] = pd.to_datetime(y["time"])

    forecast = pd.DataFrame(
        [forecasts[0].mean[plot_index][-test_steps:]]
        + [forecasts[0].quantile(p)[plot_index][-test_steps:] for p in [0.05, 0.25, 0.75, 0.95]],
        columns=forecasts[0].index,
    ).T.reset_index()
    forecast.columns = [
        "time",
        "fcst",
        "fcst_lower_05",
        "fcst_lower_25",
        "fcst_upper_75",
        "fcst_upper_95",
    ]

    source = y.merge(forecast, how="left")
    p = plot_forecast(
        source, test_steps, plot_id=plot_id, model_name="DeepVAR", start_date="2015-05-23"
    )
    
    plots.append(p)
    
alt.VConcatChart(vconcat=plots)

# DeepAR: Prediction

In [None]:
feature_names = ["event_name_1", "event_type_1", "event_name_2", "event_type_2"]
events = calendar[["date"] + feature_names].fillna("NA")
train = events[events["date"] < dates[-test_steps]][feature_names]

encoder = HashingEncoder(drop_invariant=True)
_ = encoder.fit(train)
encoded = encoder.transform(events[feature_names])
events = pd.concat([events[["date"]], encoded], axis=1)

feat_dynamic_cat = events.merge(time_related).set_index("date")

scaler = MinMaxScaler()
scaled = scaler.fit_transform(feat_dynamic_cat)
feat_dynamic_cat = pd.DataFrame(
    scaled, columns=feat_dynamic_cat.columns, index=feat_dynamic_cat.index
)

In [None]:
prices = (
    df_train_eval[["id", "store_id", "item_id"]]
    .merge(selling_prices, how="left")
    .drop(["store_id", "item_id"], axis=1)
)
week_to_date = calendar[["date", "wm_yr_wk"]].drop_duplicates()
prices = week_to_date.merge(prices, how="left").drop(
    ["wm_yr_wk"], axis=1
)

scaler = MinMaxScaler()
train = prices[prices["date"] < dates[-test_steps]][["sell_price"]]

_ = scaler.fit(train)
prices["sell_price"] = scaler.transform(prices[["sell_price"]])
prices = prices.pivot(index="date", columns="id", values="sell_price")
prices = prices.fillna(method="bfill")

feat_dynamic_real = pd.concat([prices, snap], axis=1)

In [None]:
train_list = []
for each_id in all_ids:
    dict_by_id = {
        FieldName.TARGET: target[each_id].values,
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat.iloc[:-test_steps],
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[each_id].iloc[:-test_steps],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[each_id].values,
    }
    train_list.append(dict_by_id)

test_list = []
for each_id in all_ids:
    dict_by_id = {
        FieldName.TARGET: np.append(
            target[each_id].values, np.repeat(np.nan, test_steps)
        ),
        FieldName.START: target.index[0],
        FieldName.FEAT_DYNAMIC_REAL: split_into_n_array(
            feat_dynamic_cat,
            n_feat_dynamic_cat,
        )
        + split_into_n_array(
            feat_dynamic_real[each_id],
            n_feat_dynamic_real,
        ),
        FieldName.FEAT_STATIC_CAT: feat_static_cat[each_id].values,
    }
    test_list.append(dict_by_id)

train_dataset = ListDataset(train_list, freq="D")
test_dataset = ListDataset(test_list, freq="D")

In [None]:
trainer = Trainer(
    ctx=device,
    epochs=200,
    num_batches_per_epoch=50,
    learning_rate=0.001,
    hybridize=True,
)

In [None]:
deepar_estimator = DeepAREstimator(
    freq="D", 
    prediction_length=test_steps,
    trainer=trainer,
    context_length=2 * test_steps,
    num_layers=2,
    num_cells=40,
    cell_type="lstm",
    dropout_rate=0.2,
    use_feat_dynamic_real=True,
    use_feat_static_cat=True,
    use_feat_static_real=False,
    cardinality=cardinality,
    distr_output=NegativeBinomialOutput(),  
    batch_size=30,
)

In [None]:
%%time
deepar_predictor = deepar_estimator.train(train_dataset)

In [None]:
%%time
forecast_iter, ts_iter = make_evaluation_predictions( 
    dataset=test_dataset,
    predictor=deepar_predictor, 
    num_samples=100,
) 
forecasts = list(forecast_iter)
tss = list(ts_iter)

In [None]:
column_names = ["F" + str(i) for i in range(1, 29)]
valid_submission = df_train_eval.loc[
    :, ["id"] + date_names[-test_steps:]
]
valid_submission.columns = ["id"] + column_names
valid_submission["id"] = valid_submission["id"].str.replace("evaluation", "validation")
eval_submission = pd.DataFrame(
    [forecast.mean for forecast in forecasts],
    columns=column_names,
    index=all_ids,
).reset_index()
eval_submission.columns = ["id"] + eval_submission.columns[1:].tolist()
submission = pd.concat([valid_submission, eval_submission])
submission.to_csv("submission.csv", index=False)