In [None]:
#### Grid Search for prophet univariate indicator's selection #####

In [1]:
# ============================================================
# Grid search Prophet (univariate) hyperparams for rolling cutoffs
# Selection window: cutoffs >= 2024-03-01
# Objective: minimize mean WAPE%; tie-breaker abs(mean Bias%)
# ============================================================

import itertools
import warnings
import numpy as np
import pandas as pd

from prophet import Prophet

# -----------------------------
# Config
# -----------------------------
PATH = r"\\Sherwood\accounting\Share\Finance Meetings & Reports\Monthly Reports\BCM Report Monthly\Power BI Report\Forecasting Method\0_data_input_monthly.csv"

DATE_COL = "ACC_MONTH"
Y_COL = "TOT_PAID"

H = 18
TRAIN_WINDOW_MONTHS = 60
MIN_TRAIN_MONTHS = 60

SELECTION_CUTOFF_START = pd.Timestamp("2024-03-01")

# outlier handling (match your pipeline)
OUTLIER_START = pd.Timestamp("2020-04-01")
OUTLIER_END   = pd.Timestamp("2021-03-01")

# -----------------------------
# Hyperparameter grid (EDIT THESE)
# Keep it small first, then expand.
# -----------------------------
CP_GRID = [0.05, 0.10, 0.15, 0.20]
SP_GRID = [10.0, 15.0, 20.0]
N_CP_GRID = [25, 40, 60]
CPR_GRID = [0.80, 0.90, 0.95]  # changepoint_range

# -----------------------------
# Metrics
# -----------------------------
def wape_pct(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    m = np.isfinite(y_true) & np.isfinite(y_pred)
    if m.sum() == 0:
        return np.nan
    denom = np.nansum(np.abs(y_true[m]))
    if denom == 0 or not np.isfinite(denom):
        return np.nan
    return float(100.0 * np.nansum(np.abs(y_pred[m] - y_true[m])) / denom)

def bias_wape_pct(y_true, y_pred):
    """Bias%: 100 * sum(pred-actual) / sum(|actual|)"""
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    m = np.isfinite(y_true) & np.isfinite(y_pred)
    if m.sum() == 0:
        return np.nan
    denom = np.nansum(np.abs(y_true[m]))
    if denom == 0 or not np.isfinite(denom):
        return np.nan
    return float(100.0 * np.nansum(y_pred[m] - y_true[m]) / denom)

# -----------------------------
# Outlier handling
# -----------------------------
def fill_outlier_with_past_ma(s: pd.Series, start: pd.Timestamp, end: pd.Timestamp, window_months: int = 24) -> pd.Series:
    s = s.astype(float).copy()
    outlier_idx = s.loc[start:end].index
    for d in outlier_idx:
        hist_end = d - pd.DateOffset(months=1)
        hist_start = d - pd.DateOffset(months=window_months)
        hist = s.loc[hist_start:hist_end].dropna()
        if len(hist) == 0:
            hist2 = s.loc[:hist_end].dropna()
            fill_val = float(hist2.mean()) if len(hist2) else np.nan
        else:
            fill_val = float(hist.tail(window_months).mean())
        s.loc[d] = fill_val
    return s

# -----------------------------
# Prophet forecast
# -----------------------------
def prophet_univar_forecast(train_series: pd.Series,
                            future_index: pd.DatetimeIndex,
                            cp: float, sp: float,
                            n_changepoints: int,
                            changepoint_range: float) -> pd.Series:
    s = train_series.astype(float).dropna()
    if len(s) < 12:
        return pd.Series(np.nan, index=future_index)

    train_df = s.reset_index()
    train_df.columns = ["ds", "y"]

    m = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        seasonality_mode="additive",
        changepoint_prior_scale=float(cp),
        seasonality_prior_scale=float(sp),
        n_changepoints=int(n_changepoints),
        changepoint_range=float(changepoint_range),
    )
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        m.fit(train_df)

    fut = pd.DataFrame({"ds": future_index})
    yhat = m.predict(fut)["yhat"].values
    return pd.Series(yhat, index=future_index, dtype=float)

# -----------------------------
# Read + prep data
# -----------------------------
df = pd.read_csv(PATH)
df.columns = [c.strip() for c in df.columns]

df[DATE_COL] = pd.to_datetime(df[DATE_COL], errors="coerce")
df = df.dropna(subset=[DATE_COL]).copy()
df[DATE_COL] = df[DATE_COL].dt.to_period("M").dt.to_timestamp(how="start")

df[Y_COL] = pd.to_numeric(df[Y_COL], errors="coerce")

df = df.sort_values(DATE_COL).set_index(DATE_COL).asfreq("MS")
df[Y_COL] = fill_outlier_with_past_ma(df[Y_COL], OUTLIER_START, OUTLIER_END, window_months=24)
df = df.dropna(subset=[Y_COL]).copy()

all_months = df.index.dropna()
last_date = all_months.max()

# Eligible cutoffs must have full 18-month window inside available actuals
cutoffs = [
    d for d in all_months
    if (d >= SELECTION_CUTOFF_START) and (d + pd.DateOffset(months=H) <= last_date)
]

print(f"Last actual month: {last_date.date()}")
print(f"Selection cutoff start: {SELECTION_CUTOFF_START.date()}")
print(f"Eligible cutoffs: {len(cutoffs)}")
if not cutoffs:
    raise ValueError("No eligible cutoffs found. Check SELECTION_CUTOFF_START vs last_date and H=18.")

# -----------------------------
# Grid search
# -----------------------------
param_grid = list(itertools.product(CP_GRID, SP_GRID, N_CP_GRID, CPR_GRID))
print(f"Grid size: {len(param_grid)} combinations")

results = []

for i, (cp, sp, ncp, cpr) in enumerate(param_grid, start=1):
    wapes = []
    biases = []
    n_cutoffs_used = 0

    for cutoff in cutoffs:
        train_full = df.loc[:cutoff, Y_COL].copy()
        train = train_full.tail(TRAIN_WINDOW_MONTHS).copy()

        if train.dropna().shape[0] < MIN_TRAIN_MONTHS:
            continue

        future_idx = pd.date_range(cutoff + pd.DateOffset(months=1), periods=H, freq="MS")

        # Forecast
        try:
            yhat = prophet_univar_forecast(
                train, future_idx,
                cp=cp, sp=sp,
                n_changepoints=ncp,
                changepoint_range=cpr
            )
        except Exception:
            # If a setting fails to fit, skip this cutoff
            continue

        # Actuals for the same horizon
        y_true = df[Y_COL].reindex(future_idx)

        # Compute metrics (skip if missing)
        w = wape_pct(y_true.values, yhat.values)
        b = bias_wape_pct(y_true.values, yhat.values)

        if np.isfinite(w) and np.isfinite(b):
            wapes.append(w)
            biases.append(b)
            n_cutoffs_used += 1

    if n_cutoffs_used == 0:
        mean_wape = np.nan
        mean_bias = np.nan
        abs_mean_bias = np.nan
    else:
        mean_wape = float(np.nanmean(wapes))
        mean_bias = float(np.nanmean(biases))
        abs_mean_bias = float(abs(mean_bias))

    results.append({
        "cp": cp,
        "sp": sp,
        "n_changepoints": ncp,
        "changepoint_range": cpr,
        "n_cutoffs_used": n_cutoffs_used,
        "wape_mean_selection": mean_wape,
        "bias_mean_selection": mean_bias,
        "abs_bias_mean_selection": abs_mean_bias,
    })

    if i % 10 == 0 or i == len(param_grid):
        print(f"Done {i}/{len(param_grid)}")

res_df = pd.DataFrame(results)

# Keep only valid rows
res_ok = res_df[np.isfinite(res_df["wape_mean_selection"]) & (res_df["n_cutoffs_used"] > 0)].copy()
if res_ok.empty:
    raise ValueError("All parameter combos failed or produced no valid evaluation rows.")

# Sort by objective: mean WAPE asc, tie-break abs(mean bias) asc, then more cutoffs desc
res_ok = res_ok.sort_values(
    by=["wape_mean_selection", "abs_bias_mean_selection", "n_cutoffs_used"],
    ascending=[True, True, False]
).reset_index(drop=True)

best = res_ok.iloc[0].to_dict()

print("\nTop 10 settings:")
print(res_ok.head(10).to_string(index=False))

print("\n✅ Best setting (min mean WAPE; tie-break abs mean Bias):")
print(best)

# If you want to save results for Power BI / review:
# OUT_GRID = r"\\Sherwood\...\Output_SingleTrend\prophet_univariate_gridsearch_selection_window.csv"
# res_ok.to_csv(OUT_GRID, index=False)
# print("Saved:", OUT_GRID)


Last actual month: 2025-12-01
Selection cutoff start: 2024-03-01
Eligible cutoffs: 4
Grid size: 108 combinations


16:40:07 - cmdstanpy - INFO - Chain [1] start processing
16:40:08 - cmdstanpy - INFO - Chain [1] done processing
16:40:08 - cmdstanpy - INFO - Chain [1] start processing
16:40:08 - cmdstanpy - INFO - Chain [1] done processing
16:40:08 - cmdstanpy - INFO - Chain [1] start processing
16:40:09 - cmdstanpy - INFO - Chain [1] done processing
16:40:09 - cmdstanpy - INFO - Chain [1] start processing
16:40:10 - cmdstanpy - INFO - Chain [1] done processing
16:40:10 - cmdstanpy - INFO - Chain [1] start processing
16:40:11 - cmdstanpy - INFO - Chain [1] done processing
16:40:12 - cmdstanpy - INFO - Chain [1] start processing
16:40:12 - cmdstanpy - INFO - Chain [1] done processing
16:40:13 - cmdstanpy - INFO - Chain [1] start processing
16:40:13 - cmdstanpy - INFO - Chain [1] done processing
16:40:14 - cmdstanpy - INFO - Chain [1] start processing
16:40:14 - cmdstanpy - INFO - Chain [1] done processing
16:40:15 - cmdstanpy - INFO - Chain [1] start processing
16:40:15 - cmdstanpy - INFO - Chain [1]

Done 10/108


16:40:37 - cmdstanpy - INFO - Chain [1] start processing
16:40:37 - cmdstanpy - INFO - Chain [1] done processing
16:40:37 - cmdstanpy - INFO - Chain [1] start processing
16:40:38 - cmdstanpy - INFO - Chain [1] done processing
16:40:38 - cmdstanpy - INFO - Chain [1] start processing
16:40:38 - cmdstanpy - INFO - Chain [1] done processing
16:40:39 - cmdstanpy - INFO - Chain [1] start processing
16:40:40 - cmdstanpy - INFO - Chain [1] done processing
16:40:40 - cmdstanpy - INFO - Chain [1] start processing
16:40:40 - cmdstanpy - INFO - Chain [1] done processing
16:40:40 - cmdstanpy - INFO - Chain [1] start processing
16:40:41 - cmdstanpy - INFO - Chain [1] done processing
16:40:41 - cmdstanpy - INFO - Chain [1] start processing
16:40:41 - cmdstanpy - INFO - Chain [1] done processing
16:40:42 - cmdstanpy - INFO - Chain [1] start processing
16:40:42 - cmdstanpy - INFO - Chain [1] done processing
16:40:42 - cmdstanpy - INFO - Chain [1] start processing
16:40:42 - cmdstanpy - INFO - Chain [1]

Done 20/108


16:41:04 - cmdstanpy - INFO - Chain [1] start processing
16:41:04 - cmdstanpy - INFO - Chain [1] done processing
16:41:05 - cmdstanpy - INFO - Chain [1] start processing
16:41:05 - cmdstanpy - INFO - Chain [1] done processing
16:41:05 - cmdstanpy - INFO - Chain [1] start processing
16:41:05 - cmdstanpy - INFO - Chain [1] done processing
16:41:06 - cmdstanpy - INFO - Chain [1] start processing
16:41:06 - cmdstanpy - INFO - Chain [1] done processing
16:41:06 - cmdstanpy - INFO - Chain [1] start processing
16:41:07 - cmdstanpy - INFO - Chain [1] done processing
16:41:07 - cmdstanpy - INFO - Chain [1] start processing
16:41:07 - cmdstanpy - INFO - Chain [1] done processing
16:41:08 - cmdstanpy - INFO - Chain [1] start processing
16:41:08 - cmdstanpy - INFO - Chain [1] done processing
16:41:08 - cmdstanpy - INFO - Chain [1] start processing
16:41:09 - cmdstanpy - INFO - Chain [1] done processing
16:41:09 - cmdstanpy - INFO - Chain [1] start processing
16:41:09 - cmdstanpy - INFO - Chain [1]

Done 30/108


16:41:30 - cmdstanpy - INFO - Chain [1] start processing
16:41:31 - cmdstanpy - INFO - Chain [1] done processing
16:41:31 - cmdstanpy - INFO - Chain [1] start processing
16:41:32 - cmdstanpy - INFO - Chain [1] done processing
16:41:32 - cmdstanpy - INFO - Chain [1] start processing
16:41:32 - cmdstanpy - INFO - Chain [1] done processing
16:41:33 - cmdstanpy - INFO - Chain [1] start processing
16:41:33 - cmdstanpy - INFO - Chain [1] done processing
16:41:33 - cmdstanpy - INFO - Chain [1] start processing
16:41:34 - cmdstanpy - INFO - Chain [1] done processing
16:41:34 - cmdstanpy - INFO - Chain [1] start processing
16:41:34 - cmdstanpy - INFO - Chain [1] done processing
16:41:35 - cmdstanpy - INFO - Chain [1] start processing
16:41:35 - cmdstanpy - INFO - Chain [1] done processing
16:41:36 - cmdstanpy - INFO - Chain [1] start processing
16:41:36 - cmdstanpy - INFO - Chain [1] done processing
16:41:37 - cmdstanpy - INFO - Chain [1] start processing
16:41:37 - cmdstanpy - INFO - Chain [1]

Done 40/108


16:42:01 - cmdstanpy - INFO - Chain [1] done processing
16:42:02 - cmdstanpy - INFO - Chain [1] start processing
16:42:02 - cmdstanpy - INFO - Chain [1] done processing
16:42:02 - cmdstanpy - INFO - Chain [1] start processing
16:42:03 - cmdstanpy - INFO - Chain [1] done processing
16:42:03 - cmdstanpy - INFO - Chain [1] start processing
16:42:03 - cmdstanpy - INFO - Chain [1] done processing
16:42:04 - cmdstanpy - INFO - Chain [1] start processing
16:42:04 - cmdstanpy - INFO - Chain [1] done processing
16:42:04 - cmdstanpy - INFO - Chain [1] start processing
16:42:05 - cmdstanpy - INFO - Chain [1] done processing
16:42:05 - cmdstanpy - INFO - Chain [1] start processing
16:42:05 - cmdstanpy - INFO - Chain [1] done processing
16:42:06 - cmdstanpy - INFO - Chain [1] start processing
16:42:06 - cmdstanpy - INFO - Chain [1] done processing
16:42:06 - cmdstanpy - INFO - Chain [1] start processing
16:42:07 - cmdstanpy - INFO - Chain [1] done processing
16:42:07 - cmdstanpy - INFO - Chain [1] 

Done 50/108


16:42:30 - cmdstanpy - INFO - Chain [1] start processing
16:42:30 - cmdstanpy - INFO - Chain [1] done processing
16:42:31 - cmdstanpy - INFO - Chain [1] start processing
16:42:31 - cmdstanpy - INFO - Chain [1] done processing
16:42:31 - cmdstanpy - INFO - Chain [1] start processing
16:42:32 - cmdstanpy - INFO - Chain [1] done processing
16:42:32 - cmdstanpy - INFO - Chain [1] start processing
16:42:32 - cmdstanpy - INFO - Chain [1] done processing
16:42:33 - cmdstanpy - INFO - Chain [1] start processing
16:42:33 - cmdstanpy - INFO - Chain [1] done processing
16:42:34 - cmdstanpy - INFO - Chain [1] start processing
16:42:34 - cmdstanpy - INFO - Chain [1] done processing
16:42:35 - cmdstanpy - INFO - Chain [1] start processing
16:42:35 - cmdstanpy - INFO - Chain [1] done processing
16:42:35 - cmdstanpy - INFO - Chain [1] start processing
16:42:36 - cmdstanpy - INFO - Chain [1] done processing
16:42:36 - cmdstanpy - INFO - Chain [1] start processing
16:42:37 - cmdstanpy - INFO - Chain [1]

Done 60/108


16:43:02 - cmdstanpy - INFO - Chain [1] start processing
16:43:02 - cmdstanpy - INFO - Chain [1] done processing
16:43:03 - cmdstanpy - INFO - Chain [1] start processing
16:43:03 - cmdstanpy - INFO - Chain [1] done processing
16:43:03 - cmdstanpy - INFO - Chain [1] start processing
16:43:04 - cmdstanpy - INFO - Chain [1] done processing
16:43:04 - cmdstanpy - INFO - Chain [1] start processing
16:43:05 - cmdstanpy - INFO - Chain [1] done processing
16:43:05 - cmdstanpy - INFO - Chain [1] start processing
16:43:06 - cmdstanpy - INFO - Chain [1] done processing
16:43:06 - cmdstanpy - INFO - Chain [1] start processing
16:43:07 - cmdstanpy - INFO - Chain [1] done processing
16:43:07 - cmdstanpy - INFO - Chain [1] start processing
16:43:07 - cmdstanpy - INFO - Chain [1] done processing
16:43:08 - cmdstanpy - INFO - Chain [1] start processing
16:43:09 - cmdstanpy - INFO - Chain [1] done processing
16:43:09 - cmdstanpy - INFO - Chain [1] start processing
16:43:10 - cmdstanpy - INFO - Chain [1]

Done 70/108


16:43:53 - cmdstanpy - INFO - Chain [1] start processing
16:43:54 - cmdstanpy - INFO - Chain [1] done processing
16:43:54 - cmdstanpy - INFO - Chain [1] start processing
16:43:55 - cmdstanpy - INFO - Chain [1] done processing
16:43:55 - cmdstanpy - INFO - Chain [1] start processing
16:43:56 - cmdstanpy - INFO - Chain [1] done processing
16:43:56 - cmdstanpy - INFO - Chain [1] start processing
16:43:59 - cmdstanpy - INFO - Chain [1] done processing
16:43:59 - cmdstanpy - INFO - Chain [1] start processing
16:44:00 - cmdstanpy - INFO - Chain [1] done processing
16:44:01 - cmdstanpy - INFO - Chain [1] start processing
16:44:02 - cmdstanpy - INFO - Chain [1] done processing
16:44:02 - cmdstanpy - INFO - Chain [1] start processing
16:44:03 - cmdstanpy - INFO - Chain [1] done processing
16:44:04 - cmdstanpy - INFO - Chain [1] start processing
16:44:06 - cmdstanpy - INFO - Chain [1] done processing
16:44:06 - cmdstanpy - INFO - Chain [1] start processing
16:44:07 - cmdstanpy - INFO - Chain [1]

Done 80/108


16:44:53 - cmdstanpy - INFO - Chain [1] start processing
16:44:54 - cmdstanpy - INFO - Chain [1] done processing
16:44:55 - cmdstanpy - INFO - Chain [1] start processing
16:44:56 - cmdstanpy - INFO - Chain [1] done processing
16:44:56 - cmdstanpy - INFO - Chain [1] start processing
16:44:58 - cmdstanpy - INFO - Chain [1] done processing
16:44:58 - cmdstanpy - INFO - Chain [1] start processing
16:45:01 - cmdstanpy - INFO - Chain [1] done processing
16:45:02 - cmdstanpy - INFO - Chain [1] start processing
16:45:03 - cmdstanpy - INFO - Chain [1] done processing
16:45:04 - cmdstanpy - INFO - Chain [1] start processing
16:45:04 - cmdstanpy - INFO - Chain [1] done processing
16:45:05 - cmdstanpy - INFO - Chain [1] start processing
16:45:05 - cmdstanpy - INFO - Chain [1] done processing
16:45:06 - cmdstanpy - INFO - Chain [1] start processing
16:45:07 - cmdstanpy - INFO - Chain [1] done processing
16:45:07 - cmdstanpy - INFO - Chain [1] start processing
16:45:08 - cmdstanpy - INFO - Chain [1]

Done 90/108


16:45:44 - cmdstanpy - INFO - Chain [1] start processing
16:45:44 - cmdstanpy - INFO - Chain [1] done processing
16:45:45 - cmdstanpy - INFO - Chain [1] start processing
16:45:45 - cmdstanpy - INFO - Chain [1] done processing
16:45:45 - cmdstanpy - INFO - Chain [1] start processing
16:45:46 - cmdstanpy - INFO - Chain [1] done processing
16:45:46 - cmdstanpy - INFO - Chain [1] start processing
16:45:46 - cmdstanpy - INFO - Chain [1] done processing
16:45:47 - cmdstanpy - INFO - Chain [1] start processing
16:45:47 - cmdstanpy - INFO - Chain [1] done processing
16:45:48 - cmdstanpy - INFO - Chain [1] start processing
16:45:48 - cmdstanpy - INFO - Chain [1] done processing
16:45:49 - cmdstanpy - INFO - Chain [1] start processing
16:45:49 - cmdstanpy - INFO - Chain [1] done processing
16:45:50 - cmdstanpy - INFO - Chain [1] start processing
16:45:51 - cmdstanpy - INFO - Chain [1] done processing
16:45:51 - cmdstanpy - INFO - Chain [1] start processing
16:45:51 - cmdstanpy - INFO - Chain [1]

Done 100/108


16:46:20 - cmdstanpy - INFO - Chain [1] start processing
16:46:20 - cmdstanpy - INFO - Chain [1] done processing
16:46:20 - cmdstanpy - INFO - Chain [1] start processing
16:46:21 - cmdstanpy - INFO - Chain [1] done processing
16:46:21 - cmdstanpy - INFO - Chain [1] start processing
16:46:21 - cmdstanpy - INFO - Chain [1] done processing
16:46:22 - cmdstanpy - INFO - Chain [1] start processing
16:46:22 - cmdstanpy - INFO - Chain [1] done processing
16:46:23 - cmdstanpy - INFO - Chain [1] start processing
16:46:23 - cmdstanpy - INFO - Chain [1] done processing
16:46:23 - cmdstanpy - INFO - Chain [1] start processing
16:46:23 - cmdstanpy - INFO - Chain [1] done processing
16:46:24 - cmdstanpy - INFO - Chain [1] start processing
16:46:24 - cmdstanpy - INFO - Chain [1] done processing
16:46:24 - cmdstanpy - INFO - Chain [1] start processing
16:46:25 - cmdstanpy - INFO - Chain [1] done processing
16:46:25 - cmdstanpy - INFO - Chain [1] start processing
16:46:26 - cmdstanpy - INFO - Chain [1]

Done 108/108

Top 10 settings:
  cp   sp  n_changepoints  changepoint_range  n_cutoffs_used  wape_mean_selection  bias_mean_selection  abs_bias_mean_selection
0.05 20.0              40               0.90               4            15.286770            -2.278865                 2.278865
0.05 10.0              25               0.90               4            15.290461            -2.242768                 2.242768
0.05 20.0              25               0.90               4            15.290512            -2.258829                 2.258829
0.05 10.0              40               0.95               4            15.291347            -2.234769                 2.234769
0.05 10.0              60               0.90               4            15.291967            -2.250410                 2.250410
0.05 10.0              25               0.95               4            15.293006            -2.185030                 2.185030
0.05 20.0              60               0.90               4            1