In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import multiprocessing
import os
import warnings
import altair as alt
import numpy as np
import pandas as pd
import kats.utils.time_series_parameter_tuning as tspt
from sklearn.metrics import mean_tweedie_deviance
from sklearn.model_selection import TimeSeriesSplit
from kats.consts import SearchMethodEnum, TimeSeriesData
from kats.models.prophet import ProphetModel, ProphetParams
from kats.utils.backtesters import BackTesterSimple
from kats.utils.parameter_tuning_utils import (
    get_default_prophet_parameter_search_space,
    get_default_var_parameter_search_space,
)
from kats.models.var import VARModel, VARParams
from pandarallel import pandarallel
from utils.evaluation import WRMSSEEvaluator
from utils.misc import dump_pickle, load_pickle

np.random.seed(42)
warnings.filterwarnings("ignore")
print(f"<VERSION>\naltair: {alt.__version__}, kats: 0.1.0, pandarallel: 1.5.2")

<VERSION>
altair: 4.1.0, kats: 0.1.0, pandarallel: 1.5.2


The Kaggle dataset was saved in the local directory `~/data/mofc-sales-forecast` in advance.

In [3]:
DATA_PATH = "../../data/mofc-sales-forecast"
MODEL_PATH = "models"
TUNE_PARAMS = True

calendar = pd.read_csv(os.path.join(DATA_PATH, "calendar.csv"))
selling_prices = pd.read_csv(os.path.join(DATA_PATH, "sell_prices.csv"))
# df_train_valid = pd.read_csv(os.path.join(DATA_PATH, "sales_train_validation.csv"))
df_train_eval = pd.read_csv(os.path.join(DATA_PATH, "sales_train_evaluation.csv"))
# sample_submission = pd.read_csv(os.path.join(DATA_PATH, "sample_submission.csv"))

In [4]:
key_names = ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]
date_names = ["d_" + str(i) for i in range(1, 1942)]
all_ids = df_train_eval["id"].unique()
test_steps = 28

valid_sample_ratio = 0.01
test_sample_ratio = 0.1

if valid_sample_ratio == 1.0:
    valid_sampled_ids = all_ids
else:
    valid_sampled_ids = np.random.choice(
        all_ids, round(valid_sample_ratio * len(all_ids)), replace=False
    ).tolist()

if test_sample_ratio == 1.0:
    test_sampled_ids = all_ids
else:
    test_sampled_ids = np.random.choice(
        all_ids, round(test_sample_ratio * len(all_ids)), replace=False
    ).tolist()

print(
    f"{len(valid_sampled_ids)} out of {len(all_ids)} IDs were selected for a validation, and {len(test_sampled_ids)} out of {len(all_ids)} IDs were selected for a test."
)

305 out of 30490 IDs were selected for a validation, and 3049 out of 30490 IDs were selected for a test.


# Data Preprocessing

In [5]:
df_train = df_train_eval[key_names[:1] + date_names[:-test_steps]]
df_train = df_train.set_index("id").T.reset_index()
date_dict = calendar[["date", "d"]].set_index("d").to_dict()["date"]
df_train["index"] = df_train["index"].replace(date_dict)
df_train.columns = ["time"] + df_train.columns[1:].tolist()
df_train.index.name = ""

series_time = df_train["time"]

In [6]:
indices = calendar[["event_name_1", "event_name_2"]].dropna(how="all").index
holidays = calendar.loc[indices, ["date", "event_name_1", "event_name_2"]]
holidays = (
    pd.melt(
        holidays,
        id_vars="date",
        value_vars=["event_name_1", "event_name_2"],
        value_name="holiday",
    )
    .dropna()[["holiday", "date"]]
    .sort_values("date")
)
holidays.index = range(holidays.shape[0])
holidays.columns = ["holiday", "ds"]
holidays["lower_window"] = 0
holidays["upper_window"] = 1

# Baseline: Model Evaluation

In [7]:
sampled_df_train_eval = df_train_eval.set_index("id").loc[test_sampled_ids].reset_index()
sampled_df_train = sampled_df_train_eval.loc[:, key_names + date_names[:-test_steps]]
sampled_df_test = sampled_df_train_eval.loc[:, date_names[-test_steps:]]

evaluator = WRMSSEEvaluator(sampled_df_train, sampled_df_test, calendar, selling_prices, test_steps)

  0%|          | 0/12 [00:00<?, ?it/s]

In [8]:
sampled_df_pred = pd.DataFrame(
    np.repeat(
        sampled_df_train[date_names[:-test_steps]].mean(axis=1).values.reshape(-1, 1),
        test_steps,
        axis=1,
    )
)
sampled_df_pred.columns = sampled_df_test.columns
wrmsse = evaluator.score(sampled_df_pred)

print(f"Mean Method WRMSSE: {wrmsse:.6f}")

Mean Method WRMSSE: 1.532600


In [9]:
sampled_df_pred = pd.DataFrame(
    np.repeat(
        sampled_df_train[date_names[-test_steps - 1 : -test_steps]].values,
        test_steps,
        axis=1,
    )
)
sampled_df_pred.columns = sampled_df_test.columns
wrmsse = evaluator.score(sampled_df_pred)

print(f"Naive Method WRMSSE: {wrmsse:.6f}")

Naive Method WRMSSE: 1.393950


# Prophet: Hyperparameter Tuning

In [10]:
def fit_and_predict_model(df, steps, params, include_history=False):
    sales_ts = TimeSeriesData(df=df, sort_by_time=True, time_col_name="time")

    model = ProphetModel(data=sales_ts, params=params)
    model.fit()

    forecast = model.predict(
        steps=steps,
        include_history=include_history,
        freq="D",
    )

    return model, forecast
    

def get_func(prophet_params, train_index, test_index):
    def calc_model_loss(y):
        global series_time
        eps = 1e-6
        
        df = pd.concat([series_time, y], axis=1)
        df.columns = ["time", "y"]
        
        model, forecast = fit_and_predict_model(
            df.loc[train_index, :], 
            len(test_index),
            prophet_params
        )

        y_true = df.loc[test_index, :].iloc[:, 1].values
        y_pred = forecast["fcst"].values
        y_pred = np.where(y_pred < eps, eps, y_pred)

        return mean_tweedie_deviance(y_true, y_pred, power=1.5)

    return calc_model_loss


def evaluation_function(params):
    prophet_params = ProphetParams(
        n_changepoints=params["n_changepoints"],
        changepoint_range=params["changepoint_range"],
        yearly_seasonality=params["yearly_seasonality"],
        weekly_seasonality=params["weekly_seasonality"],
        daily_seasonality=params["daily_seasonality"],
        holidays=holidays,
        holidays_prior_scale=params["holidays_prior_scale"],
        seasonality_mode=params["seasonality_mode"],
        seasonality_prior_scale=params["seasonality_prior_scale"],
        changepoint_prior_scale=params["changepoint_prior_scale"],
        floor=0.0,
    )

    tscv = TimeSeriesSplit(n_splits=3)
    losses = []

    for train_index, test_index in tscv.split(df_train):
        evaluate = get_func(prophet_params, train_index, test_index)
        res = df_train[valid_sampled_ids].apply(evaluate)
        losses.append(res.mean())

    return np.array(losses).mean()

In [11]:
%%time
os.makedirs(MODEL_PATH, exist_ok=True)

if TUNE_PARAMS:
    parameters = get_default_prophet_parameter_search_space()
    parameters[0]["values"] = parameters[0]["values"] + [25.0, 50.0, 100.0]
    parameters.append(
        {
            "name": "n_changepoints",
            "type": "choice",
            "value_type": "int",
            "values": [5, 10, 25, 50, 100],
            "is_ordered": True,
        }
    )
    parameters.append(
        {
            "name": "holidays_prior_scale",
            "type": "choice",
            "value_type": "float",
            "values": [0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5, 10.0, 25.0, 50.0, 100.0],
            "is_ordered": True,
        }
    )

    parameter_tuner = tspt.SearchMethodFactory.create_search_method(
        parameters=parameters,
        evaluation_function=evaluation_function,
        bootstrap_size=10,
        selected_search_method=SearchMethodEnum.BAYES_OPT,
        seed=42,
        multiprocessing=True,
    )

    parameter_tuner.generate_evaluate_new_parameter_values(
        evaluation_function=evaluation_function,
        arm_count=20,
    )

    tuning_results = parameter_tuner.list_parameter_value_scores()
    
    os.makedirs(os.path.join(MODEL_PATH, "prophet"), exist_ok=True)
    dump_pickle(os.path.join(MODEL_PATH, "prophet", "prophet_tuning_results.pkl"), tuning_results)

else:
    tuning_results = load_pickle(os.path.join(MODEL_PATH, "prophet", "prophet_tuning_results.pkl"))

best_params = tuning_results.loc[tuning_results["mean"].argmin(), "parameters"]



CPU times: user 42.8 s, sys: 11.1 s, total: 54 s
Wall time: 15h 48min 11s


In [12]:
summary = pd.concat(
    [pd.json_normalize(tuning_results["parameters"]), tuning_results["mean"]],
    axis=1,
)
summary.columns = summary.columns[:-1].tolist() + ["loss"]
summary.sort_values("loss").head(10)

Unnamed: 0,seasonality_prior_scale,changepoint_prior_scale,changepoint_range,n_changepoints,holidays_prior_scale,yearly_seasonality,weekly_seasonality,daily_seasonality,seasonality_mode,loss
14,0.464159,0.004642,0.9,50,0.05,False,True,False,multiplicative,517.939237
27,0.215443,0.004642,0.89,50,0.1,False,True,False,multiplicative,523.325939
22,0.215443,0.01,0.85,50,0.025,False,True,False,additive,537.71192
10,0.215443,0.004642,0.88,50,0.05,False,True,False,additive,540.256855
28,1.0,0.002154,0.82,25,0.5,False,True,True,additive,542.567333
21,50.0,0.004642,0.84,10,0.05,False,True,True,additive,543.606909
25,0.215443,0.004642,0.91,50,0.05,False,True,False,additive,545.664801
20,2.154435,0.002154,0.81,10,2.5,False,True,True,additive,548.978916
24,25.0,0.002154,0.84,5,1.0,False,True,True,additive,551.946418
2,0.464159,0.01,0.87,50,0.05,False,True,False,additive,559.769725


# Prophet: Model Evaluation

In [13]:
prophet_params = ProphetParams(
    n_changepoints=best_params["n_changepoints"],
    changepoint_range=best_params["changepoint_range"],
    yearly_seasonality=best_params["yearly_seasonality"],
    weekly_seasonality=best_params["weekly_seasonality"],
    daily_seasonality=best_params["daily_seasonality"],
    holidays=holidays,
    seasonality_mode=best_params["seasonality_mode"],
    seasonality_prior_scale=best_params["seasonality_prior_scale"],
    holidays_prior_scale=best_params["holidays_prior_scale"],
    changepoint_prior_scale=best_params["changepoint_prior_scale"],
    floor=0.0,
)

In [14]:
def backtest(y):
    global series_time
    global all_errors
    global prophet_params
    global train_percentage
    global test_percentage
    
    df = pd.concat([series_time, y], axis=1)
    df.columns = ["time", "y"]
    
    sales_ts = TimeSeriesData(df=df, sort_by_time=True, time_col_name="time")
    
    backtester = BackTesterSimple(
        train_percentage=train_percentage,
        test_percentage=test_percentage, 
        error_methods=all_errors,
        data=sales_ts,
        params=prophet_params,
        model_class=ProphetModel,
    )
    
    backtester.run_backtest()
    
    return list(backtester.errors.values())


def predict(y):
    global series_time
    global prophet_params
    global test_steps

    df = pd.concat([series_time, y], axis=1)
    df.columns = ["time", "y"]
    
    model, forecast = fit_and_predict_model(df, test_steps, prophet_params)

    y_pred = forecast["fcst"].values

    return y_pred

In [15]:
%%time
pandarallel.initialize(
    nb_workers=multiprocessing.cpu_count() - 1,
    progress_bar=False,
    verbose=0,
)

all_errors = ["mape", "smape", "mae", "mase", "mse", "rmse"]
train_percentage = 100 * len(date_names) / (len(date_names) + test_steps)
test_percentage = 100 - train_percentage

backtests = df_train[test_sampled_ids].parallel_apply(backtest, result_type="reduce")

parsed = dict()
for index, values in backtests.iteritems():
    parsed[index] = list(values)
backtests = pd.DataFrame(parsed, index=all_errors).mean(axis=1)

CPU times: user 1.16 s, sys: 426 ms, total: 1.59 s
Wall time: 44min 39s


In [16]:
string = ""
for key, value in backtests.items():
    if not np.isnan(value):
        string += key + ": " + f"{value:.4f}\n"
        
print(string[:-2])

mape: inf
smape: 1.4120
mae: 1.1167
mase: 1.6707
mse: 5.6493
rmse: 1.417


In [17]:
%%time
predictions = df_train[test_sampled_ids].parallel_apply(predict, result_type="reduce")

parsed = dict()
for index, values in predictions.iteritems():
    parsed[index] = list(values)
predictions = pd.DataFrame(parsed).iloc[-test_steps:, :]

CPU times: user 1.18 s, sys: 400 ms, total: 1.58 s
Wall time: 45min 5s


In [18]:
sampled_df_pred = predictions.T
sampled_df_pred = sampled_df_pred.loc[test_sampled_ids]
sampled_df_pred.columns = sampled_df_test.columns
sampled_df_pred.index = range(len(test_sampled_ids))

wrmsse = evaluator.score(sampled_df_pred)

print(f"Propeht WRMSSE: {wrmsse:.6f}")

Propeht WRMSSE: 0.868968


In [19]:
def plot_forecast(source, test_steps, plot_id=None, model_name=None, start_date=None):
    if start_date is not None:
        source = source[source["time"] >= start_date]

    points = (
        alt.Chart(source)
        .mark_circle(size=10.0, color="#000000")
        .encode(
            x=alt.X("time:T", axis=alt.Axis(title="Date")),
            y=alt.Y("y", axis=alt.Axis(title="Sales")),
            tooltip=["time:T", "y:Q"],
        )
    )

    line = (
        alt.Chart(source)
        .mark_line(size=1.0, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst",
        )
    )

    band = (
        alt.Chart(source)
        .mark_area(opacity=0.25, color="#4267B2")
        .encode(
            x="time:T",
            y="fcst_lower",
            y2="fcst_upper",
        )
    )

    rule = (
        alt.Chart(source[["time"]].iloc[-test_steps : -test_steps + 1])
        .mark_rule(size=1.0, color="#FF0000", strokeDash=[2, 2])
        .encode(x="time:T")
    )

    title = "Sales Forecast"
    if plot_id is not None:
        title += f" for '{plot_id}'"
    if model_name is not None:
        title = f"{model_name}: " + title

    return (points + line + band + rule).properties(title=title, width=1000, height=300)

In [20]:
plot_index = 2  # 2, 4, 8
plot_id = test_sampled_ids[plot_index]

df = df_train[["time"] + [plot_id]]
df.columns = ["time", "y"]

model, forecast = fit_and_predict_model(
    df, test_steps, prophet_params, include_history=True
)

y = (df_train_eval[df_train_eval["id"] == plot_id].loc[:, date_names]).T
y.columns = ["y"]
y = calendar.merge(y, left_on="d", right_index=True)[["date", "y"]]
y["time"] = pd.to_datetime(y["date"])

source = y.merge(forecast, how="left").drop(["date"], axis=1)
plot_forecast(
    source, test_steps, plot_id=plot_id, model_name="Prophet", start_date="2015-05-23"
)

# VAR: Model Evaluation

In [21]:
%%time
var_params = VARParams(trend="ct")

df = pd.concat([series_time, df_train[test_sampled_ids]], axis=1)
sales_ts = TimeSeriesData(
    df=df, sort_by_time=True, time_col_name="time"
)

model = VARModel(data=sales_ts, params=var_params)
model.fit()

forecast = model.predict(
    steps=test_steps,
    include_history=True,
    freq="D",
)

CPU times: user 13min 22s, sys: 49.5 s, total: 14min 11s
Wall time: 3min 10s


* The backtesters module currently only supports univariate, so it was not used for *VAR*, a multivariate model.

In [22]:
parsed = dict()
for test_sampled_id in test_sampled_ids:
    parsed[test_sampled_id] = (
        forecast[test_sampled_id].to_dataframe()["fcst"].values.tolist()
    )
predictions = pd.DataFrame(parsed).iloc[-test_steps:, :]

In [23]:
sampled_df_pred = predictions.T
sampled_df_pred = sampled_df_pred.loc[test_sampled_ids]
sampled_df_pred.columns = sampled_df_test.columns
sampled_df_pred.index = range(len(test_sampled_ids))

wrmsse = evaluator.score(sampled_df_pred)

print(f"VAR WRMSSE: {wrmsse:.6f}")

VAR WRMSSE: 0.752849


In [24]:
plot_index = 2  # 2, 4, 8
plot_id = test_sampled_ids[plot_index]

y = (df_train_eval[df_train_eval["id"] == plot_id].loc[:, date_names]).T
y.columns = ["y"]
y = calendar.merge(y, left_on="d", right_index=True)[["date", "y"]]
y["time"] = pd.to_datetime(y["date"])

source = y.merge(forecast[plot_id].to_dataframe(), how="left").drop(["date"], axis=1)

plot_forecast(source, test_steps, plot_id=plot_id, model_name="VAR", start_date="2015-05-23")