In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import multiprocessing
import os
import warnings
import numpy as np
import pandas as pd
import altair as alt
import kats.utils.time_series_parameter_tuning as tspt
from sklearn.metrics import (
    mean_absolute_error, 
    mean_squared_error, 
    mean_tweedie_deviance,
)
from sklearn.model_selection import TimeSeriesSplit
from kats.consts import SearchMethodEnum, TimeSeriesData
from kats.models.prophet import ProphetModel, ProphetParams
from kats.utils.backtesters import BackTesterSimple
from kats.utils.parameter_tuning_utils import (
    get_default_prophet_parameter_search_space,
    get_default_var_parameter_search_space,
)
from kats.models.var import VARModel, VARParams
from pandarallel import pandarallel
from utils.evaluation import calc_eval_metric, WRMSSEEvaluator
from utils.misc import dump_pickle, load_pickle

np.random.seed(42)
warnings.filterwarnings("ignore")

The Kaggle dataset was saved in the local directory `~/data/mofc-demand-forecast` in advance.

In [3]:
DATA_PATH = "../../data/mofc-demand-forecast"
MODEL_PATH = "models"
TUNE_PARAMS = True

calendar = pd.read_csv(os.path.join(DATA_PATH, "calendar.csv"))
selling_prices = pd.read_csv(os.path.join(DATA_PATH, "sell_prices.csv"))
# df_train_valid = pd.read_csv(os.path.join(DATA_PATH, "sales_train_validation.csv"))
df_train_eval = pd.read_csv(os.path.join(DATA_PATH, "sales_train_evaluation.csv"))
# sample_submission = pd.read_csv(os.path.join(DATA_PATH, "sample_submission.csv"))

In [4]:
key_names = ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]
date_names = ["d_" + str(i) for i in range(1, 1942)]
all_ids = df_train_eval["id"].unique()
test_steps = 28

valid_sample_ratio = 0.001
test_sample_ratio = 0.01

if valid_sample_ratio == 1.0:
    valid_sampled_ids = all_ids
else:
    valid_sampled_ids = np.random.choice(
        all_ids, round(valid_sample_ratio * len(all_ids)), replace=False
    ).tolist()

if test_sample_ratio == 1.0:
    test_sampled_ids = all_ids
else:
    test_sampled_ids = np.random.choice(
        all_ids, round(test_sample_ratio * len(all_ids)), replace=False
    ).tolist()

print(
    f"{len(valid_sampled_ids)} out of {len(all_ids)} IDs were selected for validation, and {len(test_sampled_ids)} out of {len(all_ids)} IDs were selected for testing."
)

30 out of 30490 IDs were selected for validation, and 305 out of 30490 IDs were selected for testing.


# Data Preprocessing

In [5]:
df_train = df_train_eval[key_names[:1] + date_names[:-test_steps]]
df_train = df_train.set_index("id").T.reset_index()
date_dict = calendar[["date", "d"]].set_index("d").to_dict()["date"]
df_train["index"] = df_train["index"].replace(date_dict)
df_train.columns = ["time"] + df_train.columns[1:].tolist()
df_train.index.name = ""

series_time = df_train["time"]

In [6]:
indices = calendar[["event_name_1", "event_name_2"]].dropna(how="all").index
holidays = calendar.loc[indices, ["date", "event_name_1", "event_name_2"]]
holidays = (
    pd.melt(
        holidays,
        id_vars="date",
        value_vars=["event_name_1", "event_name_2"],
        value_name="holiday",
    )
    .dropna()[["holiday", "date"]]
    .sort_values("date")
)
holidays.index = range(holidays.shape[0])
holidays.columns = ["holiday", "ds"]
holidays["lower_window"] = 0
holidays["upper_window"] = 1

# Baseline: Model Evaluation

In [7]:
df_sampled = df_train_eval.set_index("id").loc[test_sampled_ids].reset_index()
df_train_sampled = df_sampled.loc[:, key_names + date_names[:-test_steps]]
df_test_sampled = df_sampled.loc[:, date_names[-test_steps:]]

evaluator = WRMSSEEvaluator(df_train_sampled, df_test_sampled, calendar, selling_prices, test_steps)

  0%|          | 0/12 [00:00<?, ?it/s]

In [8]:
df_pred_sampled = pd.DataFrame(
    np.repeat(
        df_train_sampled[date_names[:-test_steps]].mean(axis=1).values.reshape(-1, 1),
        test_steps,
        axis=1,
    )
)
df_pred_sampled.columns = df_test_sampled.columns
wrmsse = evaluator.score(df_pred_sampled)
eval_metrics = calc_eval_metric(df_test_sampled, df_pred_sampled)

print(f"Mean Method WRMSSE: {wrmsse:.6f}")
display(eval_metrics.describe())

Mean Method WRMSSE: 1.598445


Unnamed: 0,mae,rmse,smape,mase
count,305.0,305.0,305.0,293.0
mean,1.199681,1.535152,1.461574,1.070848
std,1.646749,1.952234,0.459922,1.069985
min,0.031364,0.031364,0.453855,0.516798
25%,0.418658,0.564439,1.079742,0.744331
50%,0.723004,0.95103,1.607044,0.864268
75%,1.245538,1.635186,1.88978,1.015874
max,14.438877,16.306426,2.0,14.95777


In [9]:
df_pred_sampled = pd.DataFrame(
    np.repeat(
        df_train_sampled[date_names[-test_steps - 1 : -test_steps]].values,
        test_steps,
        axis=1,
    )
)
df_pred_sampled.columns = df_test_sampled.columns
wrmsse = evaluator.score(df_pred_sampled)
eval_metrics = calc_eval_metric(df_test_sampled, df_pred_sampled)

print(f"Naive Method WRMSSE: {wrmsse:.6f}")
display(eval_metrics.describe())

Naive Method WRMSSE: 1.342978


Unnamed: 0,mae,rmse,smape,mase
count,305.0,305.0,293.0,293.0
mean,1.372951,1.743987,1.507373,1.107744
std,1.797061,1.976978,0.539195,0.926748
min,0.0,0.0,0.333798,0.482143
25%,0.321429,0.681385,0.990476,0.642857
50%,0.821429,1.08562,1.690476,0.821429
75%,1.607143,1.88035,2.0,1.145089
max,13.928571,14.711512,2.0,8.747449


# Prophet: Hyperparameter Tuning

In [10]:
def fit_and_predict(df, steps, params, include_history=False):
    sales_ts = TimeSeriesData(df=df, sort_by_time=True, time_col_name="time")

    model = ProphetModel(data=sales_ts, params=params)
    model.fit()

    forecast = model.predict(
        steps=steps,
        include_history=include_history,
        freq="D",
    )

    return model, forecast
    

def get_func(prophet_params, train_index, test_index):
    def calc_model_loss(y):
        global series_time
        eps = 1e-6
        
        df = pd.concat([series_time, y], axis=1)
        df.columns = ["time", "y"]
        
        model, forecast = fit_and_predict(
            df.loc[train_index, :], 
            len(test_index),
            prophet_params
        )

        y_true = df["y"].loc[test_index].values
        y_pred = forecast["fcst"].values
        y_pred = np.where(y_pred < eps, eps, y_pred)

        return mean_tweedie_deviance(y_true, y_pred, power=1.5)

    return calc_model_loss


def evaluation_function(params):
    prophet_params = ProphetParams(
        n_changepoints=params["n_changepoints"],
        changepoint_range=params["changepoint_range"],
        yearly_seasonality="auto",
        weekly_seasonality="auto",
        daily_seasonality="auto",
        holidays=holidays,
        holidays_prior_scale=params["holidays_prior_scale"],
        seasonality_mode=params["seasonality_mode"],
        seasonality_prior_scale=params["seasonality_prior_scale"],
        changepoint_prior_scale=params["changepoint_prior_scale"],
        floor=0.0,
    )

    tscv = TimeSeriesSplit(n_splits=3)
    losses = []

    for train_index, test_index in tscv.split(df_train):
        evaluate = get_func(prophet_params, train_index, test_index)
        res = df_train[valid_sampled_ids].apply(evaluate)
        losses.append(res.mean())

    return np.mean(losses)

In [11]:
%%time
%%capture
os.makedirs(MODEL_PATH, exist_ok=True)

if TUNE_PARAMS:
    parameters = get_default_prophet_parameter_search_space()
    parameters = parameters[:1] + parameters[4:]
    parameters[0]["values"] = parameters[0]["values"] + [25.0, 50.0, 100.0]
    parameters.append(
        {
            "name": "n_changepoints",
            "type": "choice",
            "value_type": "int",
            "values": [5, 10, 25, 50, 100],
            "is_ordered": True,
        }
    )
    parameters.append(
        {
            "name": "holidays_prior_scale",
            "type": "choice",
            "value_type": "float",
            "values": [0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5, 10.0, 25.0, 50.0, 100.0],
            "is_ordered": True,
        }
    )

    parameter_tuner = tspt.SearchMethodFactory.create_search_method(
        parameters=parameters,
        evaluation_function=evaluation_function,
        bootstrap_size=10,
        selected_search_method=SearchMethodEnum.BAYES_OPT,
        seed=42,
        multiprocessing=True,
    )

    parameter_tuner.generate_evaluate_new_parameter_values(
        evaluation_function=evaluation_function,
        arm_count=20,
    )

    tuning_results = parameter_tuner.list_parameter_value_scores()
    
    os.makedirs(os.path.join(MODEL_PATH, "prophet"), exist_ok=True)
    dump_pickle(os.path.join(MODEL_PATH, "prophet", "prophet_tuning_results.pkl"), tuning_results)

else:
    tuning_results = load_pickle(os.path.join(MODEL_PATH, "prophet", "prophet_tuning_results.pkl"))

best_params = tuning_results.loc[tuning_results["mean"].argmin(), "parameters"]

CPU times: user 19 s, sys: 3.15 s, total: 22.1 s
Wall time: 1h 15min 30s


In [12]:
summary = pd.concat(
    [pd.json_normalize(tuning_results["parameters"]), tuning_results["mean"]],
    axis=1,
)
summary.columns = summary.columns[:-1].tolist() + ["loss"]

display(summary.sort_values("loss")[:10])

Unnamed: 0,seasonality_prior_scale,changepoint_prior_scale,changepoint_range,n_changepoints,holidays_prior_scale,seasonality_mode,loss
11,0.01,0.046416,0.93,5,100.0,multiplicative,684.15531
12,0.01,0.046416,0.88,5,100.0,multiplicative,695.869638
16,0.01,0.1,0.92,5,100.0,multiplicative,763.245191
1,0.01,0.1,0.91,5,100.0,multiplicative,783.640619
15,0.01,0.1,0.91,5,100.0,multiplicative,783.640619
14,0.01,0.1,0.9,5,100.0,multiplicative,799.021134
3,0.464159,0.1,0.82,10,0.25,additive,863.044868
6,0.1,0.1,0.87,5,1.0,additive,982.879381
0,0.215443,0.215443,0.89,5,0.05,multiplicative,986.245485
4,0.464159,0.002154,0.88,25,0.01,multiplicative,1000.860651


# Prophet: Model Evaluation

In [13]:
prophet_params = ProphetParams(
    n_changepoints=best_params["n_changepoints"],
    changepoint_range=best_params["changepoint_range"],
    yearly_seasonality="auto",
    weekly_seasonality="auto",
    daily_seasonality="auto",
    holidays=holidays,
    seasonality_mode=best_params["seasonality_mode"],
    seasonality_prior_scale=best_params["seasonality_prior_scale"],
    holidays_prior_scale=best_params["holidays_prior_scale"],
    changepoint_prior_scale=best_params["changepoint_prior_scale"],
    floor=0.0,
)

In [14]:
def backtest(y):
    global series_time
    global train_percentage
    global test_percentage
    global error_methods
    global prophet_params

    df = pd.concat([series_time, y], axis=1)
    df.columns = ["time", "y"]
    
    sales_ts = TimeSeriesData(df=df, sort_by_time=True, time_col_name="time")
    
    backtester = BackTesterSimple(
        train_percentage=train_percentage,
        test_percentage=test_percentage, 
        error_methods=error_methods,
        data=sales_ts,
        params=prophet_params,
        model_class=ProphetModel,
    )
    
    backtester.run_backtest()
    
    return list(backtester.errors.values())


def predict(y):
    global series_time
    global test_steps
    global prophet_params

    df = pd.concat([series_time, y], axis=1)
    df.columns = ["time", "y"]
    
    model, forecast = fit_and_predict(df, test_steps, prophet_params)

    y_pred = forecast["fcst"].values

    return y_pred

In [15]:
%%time
%%capture
pandarallel.initialize(
    nb_workers=multiprocessing.cpu_count() - 1,
    progress_bar=False,
    verbose=0,
)

train_percentage = 100 * len(date_names) / (len(date_names) + test_steps)
test_percentage = 100 - train_percentage
error_methods = ["mape", "smape", "mae", "mase", "mse", "rmse"]

backtests = df_train[test_sampled_ids].parallel_apply(backtest, result_type="reduce")

parsed = dict()
for index, values in backtests.iteritems():
    parsed[index] = list(values)
backtests = pd.DataFrame(parsed, index=error_methods).T

display(backtests.describe())

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=Tru

CPU times: user 801 ms, sys: 310 ms, total: 1.11 s
Wall time: 16min 34s


In [16]:
%%time
%%capture
predictions = df_train[test_sampled_ids].parallel_apply(predict, result_type="reduce")

parsed = dict()
for index, values in predictions.iteritems():
    parsed[index] = list(values)
predictions = pd.DataFrame(parsed).iloc[-test_steps:, :]

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=Tru

CPU times: user 660 ms, sys: 267 ms, total: 927 ms
Wall time: 13min 30s


In [17]:
df_pred_sampled = predictions.T
df_pred_sampled = df_pred_sampled.loc[test_sampled_ids]
df_pred_sampled.columns = df_test_sampled.columns
df_pred_sampled.index = range(len(test_sampled_ids))

wrmsse = evaluator.score(df_pred_sampled)
eval_metrics = calc_eval_metric(df_test_sampled, df_pred_sampled)

print(f"Propeht WRMSSE: {wrmsse:.6f}")
display(eval_metrics.describe())

Propeht WRMSSE: 1.081953


Unnamed: 0,mae,rmse,smape,mase
count,305.0,305.0,305.0,293.0
mean,1.101399,1.441043,1.417434,1.026926
std,1.180822,1.55816,0.495334,0.611804
min,0.02641,0.026411,0.253061,0.531564
25%,0.438196,0.566929,0.975264,0.763699
50%,0.764916,0.983803,1.552525,0.865755
75%,1.273994,1.68852,1.883987,1.069642
max,7.388481,10.758758,2.38113,6.087151


In [18]:
def plot_forecast(source, test_steps, plot_id=None, model_name=None, start_date=None):
    if start_date is not None:
        source = source[source["time"] >= start_date]

    points = (
        alt.Chart(source)
        .mark_circle(size=10.0, color="#000000")
        .encode(
            x=alt.X("time:T", axis=alt.Axis(title="Date")),
            y=alt.Y("y", axis=alt.Axis(title="Demand")),
            tooltip=["time:T", "y:Q"],
        )
    )

    line = (
        alt.Chart(source)
        .mark_line(size=1.0, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst",
        )
    )

    band = (
        alt.Chart(source)
        .mark_area(opacity=0.25, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst_lower",
            y2="fcst_upper",
        )
    )

    rule = (
        alt.Chart(source[["time"]].iloc[-test_steps : -test_steps + 1])
        .mark_rule(size=1.0, color="#FF0000", strokeDash=[2, 2])
        .encode(x="time:T")
    )

    title = "Demand Forecast"
    if plot_id is not None:
        title += f" for '{plot_id}'"
    if model_name is not None:
        title = f"{model_name}: " + title

    return (points + line + band + rule).properties(title=title, width=1000, height=300)

In [19]:
plot_indices = [2, 4, 8]
plots = []

for plot_index in plot_indices:
    plot_id = test_sampled_ids[plot_index]

    df = df_train[["time"] + [plot_id]]
    df.columns = ["time", "y"]

    model, forecast = fit_and_predict(
        df, test_steps, prophet_params, include_history=True
    )

    y = (df_train_eval[df_train_eval["id"] == plot_id].loc[:, date_names]).T
    y.columns = ["y"]
    y = calendar.merge(y, left_on="d", right_index=True)[["date", "y"]]
    y["time"] = pd.to_datetime(y["date"])

    source = y.merge(forecast, how="left").drop(["date"], axis=1)
    p = plot_forecast(
        source, test_steps, plot_id=plot_id, model_name="Prophet", start_date="2015-05-23"
    )
    
    plots.append(p)
    
alt.VConcatChart(vconcat=plots)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


# VAR: Model Evaluation

In [20]:
%%time
var_params = VARParams(trend="ct")

df = pd.concat([series_time, df_train[test_sampled_ids]], axis=1)
sales_ts = TimeSeriesData(
    df=df, sort_by_time=True, time_col_name="time"
)

model = VARModel(data=sales_ts, params=var_params)
model.fit()

forecast = model.predict(
    steps=test_steps,
    include_history=True,
    freq="D",
)

CPU times: user 18.4 s, sys: 524 ms, total: 18.9 s
Wall time: 5.56 s


* The backtesters module currently only supports univariate, so it was not used for *VAR*, a multivariate model.

In [21]:
parsed = dict()
for test_sampled_id in test_sampled_ids:
    parsed[test_sampled_id] = (
        forecast[test_sampled_id].to_dataframe()["fcst"].values.tolist()
    )
predictions = pd.DataFrame(parsed).iloc[-test_steps:, :]

In [22]:
df_pred_sampled = predictions.T
df_pred_sampled = df_pred_sampled.loc[test_sampled_ids]
df_pred_sampled.columns = df_test_sampled.columns
df_pred_sampled.index = range(len(test_sampled_ids))

wrmsse = evaluator.score(df_pred_sampled)
eval_metrics = calc_eval_metric(df_test_sampled, df_pred_sampled)

print(f"VAR WRMSSE: {wrmsse:.6f}")
display(eval_metrics.describe())

VAR WRMSSE: 1.287645


Unnamed: 0,mae,rmse,smape,mase
count,305.0,305.0,305.0,293.0
mean,1.554544,1.950154,2.381751,1.68714
std,1.665845,2.053308,3.503832,1.748101
min,0.190502,0.239445,0.318231,0.569255
25%,0.628939,0.806562,1.360025,0.998723
50%,1.049182,1.323544,1.87317,1.233765
75%,1.791731,2.257092,2.075877,1.688981
max,11.733371,13.7719,41.027928,18.139414


In [23]:
plot_indices = [2, 4, 8]
plots = []

for plot_index in plot_indices:
    plot_id = test_sampled_ids[plot_index]

    y = (df_train_eval[df_train_eval["id"] == plot_id].loc[:, date_names]).T
    y.columns = ["y"]
    y = calendar.merge(y, left_on="d", right_index=True)[["date", "y"]]
    y["time"] = pd.to_datetime(y["date"])

    source = y.merge(forecast[plot_id].to_dataframe(), how="left").drop(["date"], axis=1)

    p = plot_forecast(source, test_steps, plot_id=plot_id, model_name="VAR", start_date="2015-05-23")
    plots.append(p)
    
alt.VConcatChart(vconcat=plots)