# Forecasting Holiday Effects With Prophet

In [None]:
import pandas as pd
from workalendar.asia import Israel
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from darts_config import *
from darts_functions import count_zeroes, get_relevent_columns, preprocess_data, calc_nMAPE_series
from prophet.plot import plot_plotly, plot_components_plotly
from prophet import Prophet
import plotly.offline as py
import warnings

warnings.filterwarnings("ignore")
warnings.simplefilter("ignore", category=FutureWarning)

## Load Dataset

In [None]:
# Load data
file_path = file_name
raw_df = pd.read_excel(file_path)

# Get the index of the forecast date
FORECAST_DATE = raw_df["Forecast Date"][0]
df_sliced = get_relevent_columns(data=raw_df, forecast_date=FORECAST_DATE)

# Preprocess data frame
df = preprocess_data(raw_data=raw_df, data_df=df_sliced, column_index="Item Code")

# Sanity check for date range and values
print(f"Actual sales dates range from {df.index[0]} to {df.index[-1]}")
print(f"Number of Zeros: {count_zeroes(df)}\nNumber of NaNs: {(df.isna().sum().sum())}")
print(f"Data includes {df.shape[0]} months and {df.shape[1]} products")

## Manipulate Dataset for Prophet

In [None]:
df.reset_index(drop=False, inplace=True)
df = df.rename_axis(None, axis=1)
df = df.rename(columns={"index": "ds"})

Find covariance

In [None]:
SKU_INDEX = 2  # index 4 has many outliers
SKU_NAME = df.columns[SKU_INDEX + 1]
SKU = df[["ds", SKU_NAME]]
SKU = SKU.rename(columns={SKU_NAME: "y"})
SKU.head()

## Default Prophet

In [None]:
std_dev = SKU["y"].std()
mean_val = SKU["y"].mean()
covariance = std_dev / mean_val
print(f"Covariance: {covariance:.2f}")

Fit model

In [None]:
m = Prophet(yearly_seasonality=20)
m.fit(SKU)
future = m.make_future_dataframe(periods=6, freq="MS")
forecast = m.predict(future)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail()

Plot decomposition

In [None]:
plot_components_plotly(m, forecast)

Plot forecast

In [None]:
plot_plotly(m, forecast)

## Prophet Model with Adjustments

Set holidays dataframe

In [None]:
cal = Israel()
years = range(2019, 2026)

all_holidays = pd.concat(
    [pd.DataFrame(cal.holidays(year), columns=["ds", "holiday"])[["holiday", "ds"]] for year in years],
    ignore_index=True,
)

all_holidays["ds"] = pd.to_datetime(all_holidays["ds"])

# Adjust holidays to the start of the month
all_holidays["ds"] = all_holidays["ds"].dt.to_period("M").dt.to_timestamp()
all_holidays = all_holidays.drop_duplicates(subset=["ds"])


all_holidays["lower_window"] = -21
all_holidays["upper_window"] = 1

# Santity check
# YEAR = 2022
# all_holidays[all_holidays["ds"].dt.year == YEAR].head(20)

all_holidays.head(50)

Fit model

In [None]:
m2 = Prophet(yearly_seasonality=20, holidays=all_holidays)
m2.fit(SKU)
future = m2.make_future_dataframe(periods=8, freq="MS")
forecast = m2.predict(future)
forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]].tail()

Plot decomposition

In [None]:
plot_components_plotly(m2, forecast)

Plot forecast

In [None]:
plot_plotly(m2, forecast)

## Model Evaluation

Try first with one SKU

In [None]:
PERIODS = 6
train_set, val_set = SKU.iloc[:-PERIODS], SKU.iloc[-PERIODS:]
print(f"Test set includes {len(train_set)} months and Val Set includes {len(val_set)} months")

In [None]:
model = Prophet(yearly_seasonality=20, holidays=all_holidays)
model.fit(train_set)
future = model.make_future_dataframe(periods=PERIODS, freq="MS")
forecast = model.predict(future)

rmse_value = mean_squared_error(val_set["y"], forecast["yhat"][-PERIODS:])
mape_value = mean_absolute_percentage_error(val_set["y"], forecast["yhat"][-PERIODS:])

print(f"RMSE value is {rmse_value:.2f}")
print(f"MAPE value is {mape_value:.2f}")

Now let's loop the SKO's and evaluate each one

In [None]:
def evaluate_prophet(
    df: pd.DataFrame,
    num_values: int,
    fourier: int,
    plot: bool,
    holidays: pd.DataFrame = None,
    change_prior_scale: int = 0.05,
    seasonal_prior_scale: int = 10.0,
) -> tuple:
    results = []
    for index in range(1, num_values + 1):
        SKU_NAME = df.columns[index]
        SKU = df[["ds", SKU_NAME]]
        SKU = SKU.rename(columns={SKU_NAME: "y"})

        # Calculate covariance
        std_dev = SKU["y"].std()
        mean_val = SKU["y"].mean()
        covariance_value = std_dev / mean_val

        # Train-Test split
        train_set, val_set = SKU.iloc[:-PERIODS], SKU.iloc[-PERIODS:]
        if index == 1:
            print(f"Train Set includes {len(train_set)} months and Validation Set includes {len(val_set)} months")

        # Fit and predict model
        model = Prophet(
            yearly_seasonality=fourier,
            holidays=holidays,
            changepoint_prior_scale=change_prior_scale,
            seasonality_prior_scale=seasonal_prior_scale,
        )
        model.fit(SKU)
        future = model.make_future_dataframe(periods=FORECAST_PERIODS, freq="MS")
        forecast = model.predict(future)
        # print(f"Santiy check: Months compared are {val_set['ds']} & {forecast['ds'][-(PERIODS + FORECAST_PERIODS): -PERIODS]}")
        # rmse_value = mean_squared_error(val_set["y"], forecast["yhat"][-PERIODS:])
        # mape_value = mean_absolute_percentage_error(val_set["y"], forecast["yhat"][-PERIODS:])
        nmape_value = calc_nMAPE_series(val_set["y"], forecast["yhat"][-(PERIODS + FORECAST_PERIODS) : -PERIODS])
        result = f"Name: {SKU_NAME}, Index: {index}, nMAPE: {nmape_value:.2f} COV: {covariance_value:.2f}"
        results.append(result)

        print("SKU:", SKU_NAME)
        if plot == True:
            fig = plot_plotly(model, forecast)
            py.iplot(fig)

    return results

## Evaluate Model Metrics

Optimal Model

In [None]:
results = evaluate_prophet(df=df, num_values=3, fourier=20, plot=False, holidays=all_holidays)
for result in results:
    print(result)

Test without holidays

Cannot have low fourier (like 10) with holidays

In [None]:
results = evaluate_prophet(df=df, num_values=3, fourier=20, plot=False, holidays=False)
for result in results:
    print(result)

### Run on seasonal SKUs

In [None]:
code_1 = "7288014"
code_2 = "7288015"
code_3 = "834500"

subset = df.loc[:, ["ds", code_1, code_2, code_3]]
subset

evaluate_prophet(subset, num_values=3, fourier=20, plot=True, holidays=all_holidays)

More Testing

In [None]:
results = evaluate_prophet(
    df=subset, num_values=3, fourier=20, plot=True, holidays=all_holidays, seasonal_prior_scale=1
)
for result in results:
    print(result)