# Planet | Capacity Forecast v15 (Stability‑First, Auditable)

This notebook provides a **clean, minimal, and fully auditable** capacity forecasting pipeline.

**Scope**
- **Monthly forecast**: 12 months ahead (department-level, allocated from vertical forecasts)
- **Daily plan**: 90 days ahead (department-level) using a historical **day-of-week (DOW) profile** (monthly forecast remains untouched)
- **Languages**: Only `English, Spanish, Portuguese, French, German, Italian`. Any other language is mapped to **English**.

**Modeling strategy (stability-first)**
1. Aggregate incoming tickets to **monthly volumes per vertical**
2. Forecast each vertical using **ETS (ExponentialSmoothing) only**
3. Apply **vertical level recalibration** (last 3 months actual vs fitted)
4. Allocate vertical forecast to departments using **EWMA shares** (renormalized per vertical-month)
5. Compute **dept accuracy** via a clean rolling backtest (WAPE → Accuracy_staffing_%)

Output Excel is saved under `outputs/`.


In [1]:
# -----------------------------
# 0) Setup (interactive folder selector)
# -----------------------------
import os
from pathlib import Path

BASE_DIR = r"C:\Users\pt3canro\Desktop\CAPACITY"
BASE_DIR = str(Path(BASE_DIR).expanduser().resolve())

INPUT_DIR  = str(Path(BASE_DIR) / "input_model")
OUTPUT_DIR = str(Path(BASE_DIR) / "outputs")
Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)

INCOMING_SOURCE_PATH = os.path.join(INPUT_DIR, "Incoming_new.xlsx")  # Sheet 'Main'
INCOMING_SHEET = "Main"

DEPT_MAP_PATH = os.path.join(INPUT_DIR, "department.xlsx")
DEPT_MAP_SHEET = "map"

PRODUCTIVITY_PATH = os.path.join(INPUT_DIR, "productivity_agents.xlsx")  # optional

OUTPUT_XLSX = os.path.join(OUTPUT_DIR, "capacity_forecast_v15.xlsx")

# Horizons
H_MONTHS = 12
DAILY_HORIZON_DAYS = 90

# Prediction interval
PI_ALPHA = 0.05  # 95% PI

# Backtest settings
BT_MIN_TRAIN_MONTHS = 12
BT_EVAL_MONTHS = 9
BT_HORIZON_MONTHS = 1
BT_MAX_SPLITS = 9

# Governance
SUPPORTED_LANGUAGES = ["English","Spanish","Portuguese","French","German","Italian"]
DEFAULT_LANGUAGE = "English"

CRITICAL_VERTICALS = ["Payments","Hospitality","Partners"]  # adjust if needed

VERTICAL_LEVEL_ADJ = {
    "enabled": True,
    "lookback_months": 3,
    "clip_min": 0.70,
    "clip_max": 1.10,
}

DEPT_SHARE_EWMA_ALPHA = 0.50

# DOW profile
DOW_LOOKBACK_DAYS = 180
DOW_MIN_OBS = 30
WEEKEND_OPEN_THRESHOLD = 0.05  # if weekend share < 5%, treat dept as closed weekends for daily plan

print("BASE_DIR:", BASE_DIR)
print("INPUT_DIR:", INPUT_DIR)
print("OUTPUT_DIR:", OUTPUT_DIR)
print("Output file:", OUTPUT_XLSX)


BASE_DIR: C:\Users\pt3canro\Desktop\CAPACITY
INPUT_DIR: C:\Users\pt3canro\Desktop\CAPACITY\input_model
OUTPUT_DIR: C:\Users\pt3canro\Desktop\CAPACITY\outputs
Output file: C:\Users\pt3canro\Desktop\CAPACITY\outputs\capacity_forecast_v15.xlsx


In [2]:
# -----------------------------
# 1) Imports
# -----------------------------
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from typing import Optional

from statsmodels.tsa.holtwinters import ExponentialSmoothing


## 2) Data loaders & standardization

In [3]:
def load_incoming(path: str, sheet: str = "Main") -> pd.DataFrame:
    df = pd.read_excel(path, sheet_name=sheet)

    if "Date" not in df.columns:
        for c in ["date", "created_date", "created_at"]:
            if c in df.columns:
                df = df.rename(columns={c: "Date"})
                break
    if "Date" not in df.columns:
        raise ValueError("Incoming_new.xlsx must contain a 'Date' column (or a recognizable alternative).")

    df["Date"] = pd.to_datetime(df["Date"])

    # Ticket weight: if absent, each row is 1 ticket
    if "ticket_total" not in df.columns:
        df["ticket_total"] = 1.0
    else:
        df["ticket_total"] = pd.to_numeric(df["ticket_total"], errors="coerce").fillna(1.0)

    for col in ["department_id","department_name","vertical"]:
        if col not in df.columns:
            df[col] = np.nan

    if "language" not in df.columns:
        df["language"] = DEFAULT_LANGUAGE
    df["language"] = df["language"].fillna(DEFAULT_LANGUAGE).astype(str)
    df.loc[~df["language"].isin(SUPPORTED_LANGUAGES), "language"] = DEFAULT_LANGUAGE

    df["department_id"] = df["department_id"].astype(str)
    return df

def load_dept_map(path: str, sheet: str = "map") -> pd.DataFrame:
    m = pd.read_excel(path, sheet_name=sheet)
    if "department_id" not in m.columns:
        raise ValueError("department.xlsx must contain 'department_id'")
    m["department_id"] = m["department_id"].astype(str)
    if "vertical" not in m.columns and "vertical_name" in m.columns:
        m = m.rename(columns={"vertical_name":"vertical"})
    return m

def load_productivity(path: str) -> pd.DataFrame:
    p = pd.read_excel(path)
    if "Date" not in p.columns:
        raise ValueError("productivity_agents.xlsx must contain 'Date'")
    p["Date"] = pd.to_datetime(p["Date"])
    if "prod_total_model" in p.columns:
        p["prod_total_model"] = pd.to_numeric(p["prod_total_model"], errors="coerce").fillna(0.0)
    else:
        p["prod_total_model"] = 0.0
    for col in ["department_id","department_name"]:
        if col not in p.columns:
            p[col] = np.nan
    p["department_id"] = p["department_id"].astype(str)
    return p


## 3) Monthly aggregates & shares

In [4]:
def monthly_vertical_series(incoming: pd.DataFrame) -> pd.DataFrame:
    d = incoming.copy()
    d["month"] = d["Date"].dt.to_period("M")
    vm = d.groupby(["vertical","month"], as_index=False)["ticket_total"].sum()
    return vm.sort_values(["vertical","month"])

def monthly_dept_series(incoming: pd.DataFrame) -> pd.DataFrame:
    d = incoming.copy()
    d["month"] = d["Date"].dt.to_period("M")
    dm = d.groupby(["vertical","department_id","month"], as_index=False)["ticket_total"].sum()
    return dm.sort_values(["vertical","department_id","month"])

def dept_share_ewma_within_vertical(incoming: pd.DataFrame, alpha: float = 0.5) -> pd.DataFrame:
    dm = monthly_dept_series(incoming)
    vm = monthly_vertical_series(incoming).rename(columns={"ticket_total":"vertical_total"})
    x = dm.merge(vm, on=["vertical","month"], how="left")
    x["share"] = np.where(x["vertical_total"]>0, x["ticket_total"]/x["vertical_total"], 0.0)
    x = x.sort_values(["vertical","department_id","month"])

    x["share_ewma"] = x.groupby(["vertical","department_id"])["share"].transform(
        lambda s: s.ewm(alpha=alpha, adjust=False).mean()
    )
    denom = x.groupby(["vertical","month"])["share_ewma"].transform("sum")
    x["share_final"] = np.where(denom>0, x["share_ewma"]/denom, 0.0)

    return x[["vertical","department_id","month","share_final"]]


## 4) Vertical forecasting (ETS-only) + Level recalibration

In [5]:
def fit_ets_monthly(y_ts: pd.Series):
    y_ts = pd.Series(y_ts).dropna().astype(float).sort_index()
    if len(y_ts) < 6:
        return None, float(y_ts.std()) if len(y_ts) else 0.0

    try:
        seasonal = "add" if len(y_ts) >= 24 else None
        sp = 12 if seasonal else None
        model = ExponentialSmoothing(
            y_ts,
            trend="add",
            seasonal=seasonal,
            seasonal_periods=sp,
            initialization_method="estimated"
        )
        fit = model.fit(optimized=True)
        resid = (y_ts - fit.fittedvalues).dropna()
        sigma = float(resid.std()) if len(resid) else float(y_ts.std())
        return fit, sigma
    except Exception:
        return None, float(y_ts.std()) if len(y_ts) else 0.0

def forecast_vertical_final(incoming: pd.DataFrame, periods: int = 12, alpha: float = 0.05) -> pd.DataFrame:
    vm = monthly_vertical_series(incoming)
    rows = []

    lookback = int(VERTICAL_LEVEL_ADJ["lookback_months"])
    clip_min = float(VERTICAL_LEVEL_ADJ["clip_min"])
    clip_max = float(VERTICAL_LEVEL_ADJ["clip_max"])
    enabled = bool(VERTICAL_LEVEL_ADJ["enabled"])
    z = 1.96  # ~95%

    for v, g in vm.groupby("vertical"):
        y = g.set_index("month")["ticket_total"].sort_index()
        y_ts = y.copy()
        y_ts.index = y_ts.index.to_timestamp()

        fit, sigma = fit_ets_monthly(y_ts)

        last_m = y.index.max()
        future_m = pd.period_range(last_m + 1, periods=periods, freq="M")
        future_idx = future_m.to_timestamp()

        if fit is None:
            last = float(y.iloc[-1]) if len(y) else 0.0
            fc = pd.Series([last]*periods, index=future_idx)
            fitted = pd.Series([last]*len(y_ts), index=y_ts.index)
        else:
            fc_vals = fit.forecast(periods)
            fc = pd.Series(fc_vals.values, index=future_idx)
            fitted = fit.fittedvalues

        fc = fc.clip(lower=0.0)
        p05 = (fc - z*sigma).clip(lower=0.0)
        p95 = (fc + z*sigma).clip(lower=0.0)

        factor = 1.0
        if enabled and (v in CRITICAL_VERTICALS) and len(y_ts) >= lookback:
            actual_last = float(y_ts.tail(lookback).mean())
            fitted_last = float(fitted.tail(lookback).mean()) if len(fitted) else actual_last
            if fitted_last > 0:
                factor = float(np.clip(actual_last / fitted_last, clip_min, clip_max))

        rows.append(pd.DataFrame({
            "vertical": v,
            "month": future_m,
            "forecast_monthly_vertical": (fc*factor).values,
            "forecast_p05_vertical": (p05*factor).values,
            "forecast_p95_vertical": (p95*factor).values,
            "vertical_level_factor": factor,
            "model_used": "ETS"
        }))

    return pd.concat(rows, ignore_index=True) if rows else pd.DataFrame()


## 5) Allocation to department (EWMA shares, renormalized)

In [6]:
def allocate_vertical_to_dept(fc_vertical: pd.DataFrame, shares_vd: pd.DataFrame) -> pd.DataFrame:
    last_share = shares_vd.sort_values("month").groupby(["vertical","department_id"]).tail(1)
    last_share = last_share[["vertical","department_id","share_final"]].rename(columns={"share_final":"share_cf"})

    fc = fc_vertical.copy()
    fc = fc.merge(last_share, on="vertical", how="left")  # expands to depts per vertical
    fc["share_cf"] = fc["share_cf"].fillna(0.0)

    out = fc.assign(
        forecast_monthly_dept = fc["forecast_monthly_vertical"] * fc["share_cf"],
        forecast_p05_dept     = fc["forecast_p05_vertical"]     * fc["share_cf"],
        forecast_p95_dept     = fc["forecast_p95_vertical"]     * fc["share_cf"],
    )
    return out[["vertical","department_id","month",
                "forecast_monthly_dept","forecast_p05_dept","forecast_p95_dept",
                "vertical_level_factor","model_used","share_cf"]]


## 6) Daily plan (90d) using DOW profile (monthly remains untouched)

In [7]:
def compute_dept_dow_profile(incoming: pd.DataFrame, lookback_days: int = 180, min_obs: int = 30) -> pd.DataFrame:
    d = incoming.copy()
    d = d[d["Date"] >= (d["Date"].max() - pd.Timedelta(days=lookback_days))]
    daily = d.groupby(["department_id","Date"], as_index=False)["ticket_total"].sum()
    daily["dow"] = pd.to_datetime(daily["Date"]).dt.dayofweek

    prof = daily.groupby(["department_id","dow"], as_index=False)["ticket_total"].sum()
    tot = prof.groupby("department_id")["ticket_total"].transform("sum")
    prof["dow_share"] = np.where(tot>0, prof["ticket_total"]/tot, 0.0)

    idx = pd.MultiIndex.from_product([prof["department_id"].unique(), range(7)], names=["department_id","dow"])
    prof = prof.set_index(["department_id","dow"]).reindex(idx).reset_index()
    prof["dow_share"] = prof["dow_share"].fillna(0.0)

    obs = daily.groupby("department_id")["Date"].nunique().rename("n_days").reset_index()
    prof = prof.merge(obs, on="department_id", how="left")

    def _fix(g):
        if float(g["n_days"].iloc[0] or 0) < min_obs:
            g["dow_share"] = 0.0
            g.loc[g["dow"].isin([0,1,2,3,4]), "dow_share"] = 0.2
        else:
            s = g["dow_share"].sum()
            if s > 0:
                g["dow_share"] = g["dow_share"] / s
        return g

    prof = prof.groupby("department_id", group_keys=False).apply(_fix)
    return prof[["department_id","dow","dow_share"]]

def daily_plan_from_monthly(fc_dept_monthly: pd.DataFrame, dow_profile: pd.DataFrame,
                            start_date: Optional[pd.Timestamp] = None, horizon_days: int = 90) -> pd.DataFrame:
    if start_date is None:
        start_date = pd.Timestamp.today().normalize()

    end_date = start_date + pd.Timedelta(days=horizon_days-1)
    days = pd.date_range(start_date, end_date, freq="D")

    day_df = pd.DataFrame({"Date": days})
    day_df["month"] = day_df["Date"].dt.to_period("M")
    day_df["dow"] = day_df["Date"].dt.dayofweek

    m = fc_dept_monthly.copy()
    out = m.merge(day_df, on="month", how="right")
    out["department_id"] = out["department_id"].astype(str)

    out = out.merge(dow_profile, on=["department_id","dow"], how="left")
    out["dow_share"] = out["dow_share"].fillna(0.0)

    wk = dow_profile.copy()
    wk["is_weekend"] = wk["dow"].isin([5,6])
    wk_sum = wk.groupby("department_id").apply(lambda g: float(g.loc[g["is_weekend"],"dow_share"].sum())).rename("weekend_share").reset_index()
    out = out.merge(wk_sum, on="department_id", how="left")
    out["weekend_share"] = out["weekend_share"].fillna(0.0)

    closed = out["weekend_share"] < WEEKEND_OPEN_THRESHOLD
    out.loc[closed & out["dow"].isin([5,6]), "dow_share"] = 0.0

    denom = out.groupby(["department_id","month"])["dow_share"].transform("sum")
    out["dow_share_adj"] = np.where(denom>0, out["dow_share"]/denom, 0.0)

    out["forecast_daily_dept"] = out["forecast_monthly_dept"] * out["dow_share_adj"]
    out["p05_daily_dept"]      = out["forecast_p05_dept"]     * out["dow_share_adj"]
    out["p95_daily_dept"]      = out["forecast_p95_dept"]     * out["dow_share_adj"]

    return out[["Date","vertical","department_id","month","dow",
                "forecast_daily_dept","p05_daily_dept","p95_daily_dept",
                "forecast_monthly_dept","forecast_p05_dept","forecast_p95_dept"]]


## 7) Backtest (clean, same pipeline, WAPE → Accuracy_staffing_%)

In [8]:
def compute_wape(y_true: pd.Series, y_pred: pd.Series) -> float:
    y_true = pd.Series(y_true).astype(float)
    y_pred = pd.Series(y_pred).astype(float)
    denom = float(y_true.sum())
    if denom <= 0:
        return np.nan
    return float(np.abs(y_true - y_pred).sum() / denom)

def backtest_dept_accuracy(
    incoming: pd.DataFrame,
    min_train_months: int = 12,
    eval_months: int = 9,
    horizon_months: int = 1,
    max_splits: int = 9,
) -> pd.DataFrame:

    inc = incoming.copy()
    inc["Date"] = pd.to_datetime(inc["Date"], errors="coerce")
    inc = inc.dropna(subset=["Date"])
    inc["month"] = inc["Date"].dt.to_period("M")

    all_months = sorted(inc["month"].unique())
    if len(all_months) < (min_train_months + horizon_months + 1):
        return pd.DataFrame()

    eval_targets = all_months[-eval_months:]
    splits = eval_targets[-max_splits:]
    results = []

    # cache dept->vertical
    dept_vertical = None
    if "vertical" in inc.columns:
        dept_vertical = (
            inc[["department_id", "vertical"]]
            .drop_duplicates("department_id")
            .copy()
        )

    for target_month in splits:

        train_end = target_month - horizon_months
        train = inc[inc["month"] <= train_end].copy()
        test = inc[inc["month"] == target_month].copy()

        if train["month"].nunique() < min_train_months:
            continue

        shares_vd = dept_share_ewma_within_vertical(
            train, alpha=DEPT_SHARE_EWMA_ALPHA
        )

        fc_vert = forecast_vertical_final(
            train, periods=horizon_months, alpha=PI_ALPHA
        ).copy()

        if not pd.api.types.is_period_dtype(fc_vert["month"]):
            fc_vert["month"] = pd.to_datetime(fc_vert["month"]).dt.to_period("M")

        fc_vert = fc_vert[fc_vert["month"] == target_month]
        if fc_vert.empty:
            continue

        fc_dept = allocate_vertical_to_dept(fc_vert, shares_vd)

        # Ensure vertical in fc_dept
        if "vertical" not in fc_dept.columns:
            if "vertical" in shares_vd.columns:
                vd = shares_vd[["department_id", "vertical"]].drop_duplicates()
                fc_dept = fc_dept.merge(vd, on="department_id", how="left")

        if "vertical" not in fc_dept.columns and dept_vertical is not None:
            fc_dept = fc_dept.merge(dept_vertical, on="department_id", how="left")

        if "vertical" not in fc_dept.columns:
            fc_dept["vertical"] = "Unknown"

        fc_dept["vertical"] = (
            fc_dept["vertical"].fillna("Unknown").astype(str)
        )

        # Ensure vertical in test
        if "vertical" not in test.columns and dept_vertical is not None:
            test = test.merge(dept_vertical, on="department_id", how="left")

        if "vertical" not in test.columns:
            test["vertical"] = "Unknown"

        test["vertical"] = test["vertical"].fillna("Unknown").astype(str)

        actual = (
            test.groupby(["vertical", "department_id"], as_index=False)[
                "ticket_total"
            ]
            .sum()
            .rename(columns={"ticket_total": "actual"})
        )

        pred = (
            fc_dept.groupby(["vertical", "department_id"], as_index=False)[
                "forecast_monthly_dept"
            ]
            .sum()
            .rename(columns={"forecast_monthly_dept": "forecast"})
        )

        m = actual.merge(
            pred, on=["vertical", "department_id"], how="outer"
        ).fillna(0.0)

        m["month"] = target_month
        results.append(m)

    if not results:
        return pd.DataFrame()

    bt = pd.concat(results, ignore_index=True)

    def _metrics(g: pd.DataFrame) -> pd.Series:
        y = g["actual"].values.astype(float)
        yhat = g["forecast"].values.astype(float)

        mae = float(np.mean(np.abs(y - yhat)))
        bias = (
            float((yhat.sum() - y.sum()) / y.sum() * 100)
            if y.sum() > 0
            else np.nan
        )
        wape = (
            compute_wape(y, yhat) * 100
            if y.sum() > 0
            else np.nan
        )
        acc = (
            max(0.0, 100.0 - wape)
            if np.isfinite(wape)
            else np.nan
        )

        return pd.Series(
            {
                "MAE": mae,
                "Bias_%": bias,
                "WAPE_%": wape,
                "Accuracy_staffing_%": acc,
            }
        )

    out = (
        bt.groupby(["vertical", "department_id"])
        .apply(_metrics)
        .reset_index()
    )

    out["Eval_Months"] = (
        bt.groupby(["vertical", "department_id"])["month"]
        .nunique()
        .values
    )

    return out


## 8) Run end-to-end pipeline

In [9]:
# Load data
incoming = load_incoming(INCOMING_SOURCE_PATH, INCOMING_SHEET)
mapping = load_dept_map(DEPT_MAP_PATH, DEPT_MAP_SHEET)

incoming = incoming.merge(mapping, on="department_id", how="left", suffixes=("","_map"))



# -----------------------------
# SAFE COALESCE: vertical + department_name (v15.1)
# Guarantees columns exist and prevents KeyError: 'vertical'
# -----------------------------
if "vertical" not in incoming.columns:
    incoming["vertical"] = pd.NA
if "department_name" not in incoming.columns:
    incoming["department_name"] = pd.NA

if "vertical_map" in incoming.columns:
    incoming["vertical"] = incoming["vertical_map"].combine_first(incoming["vertical"])
if "department_name_map" in incoming.columns:
    incoming["department_name"] = incoming["department_name_map"].combine_first(incoming["department_name"])

incoming["vertical"] = incoming["vertical"].fillna("Unknown").astype(str)
incoming["department_name"] = incoming["department_name"].fillna("Unknown").astype(str)

if "vertical_map" in incoming.columns:
    incoming["vertical"] = incoming["vertical_map"].combine_first(incoming["vertical"])
if "department_name_map" in incoming.columns:
    incoming["department_name"] = incoming["department_name_map"].combine_first(incoming["department_name"])

incoming["vertical"] = incoming["vertical"].fillna("Unknown").astype(str)
incoming["department_name"] = incoming["department_name"].fillna("Unknown").astype(str)

print("Incoming rows:", len(incoming))
print("Verticals:", sorted(incoming["vertical"].unique().tolist()))
print("Departments:", incoming["department_id"].nunique())

# Shares
shares_vd = dept_share_ewma_within_vertical(incoming, alpha=DEPT_SHARE_EWMA_ALPHA)
san = shares_vd.groupby(["vertical","month"])["share_final"].sum().reset_index(name="share_sum")
print("Share sum (min/max):", float(san["share_sum"].min()), float(san["share_sum"].max()))

# Forecasts
fc_vertical = forecast_vertical_final(incoming, periods=H_MONTHS, alpha=PI_ALPHA)
fc_dept = allocate_vertical_to_dept(fc_vertical, shares_vd).merge(mapping, on="department_id", how="left")

# -----------------------------
# v15.1 GUARDAESPALDAS: fc_dept SIEMPRE con 'vertical'
# -----------------------------
if "vertical" not in fc_dept.columns:
    # 1) Preferencia: traértelo desde shares_vd (vertical-dept real del share)
    if "vertical" in shares_vd.columns:
        vd = shares_vd[["department_id", "vertical"]].drop_duplicates("department_id")
        fc_dept = fc_dept.merge(vd, on="department_id", how="left")

# 2) Si aún no está, fallback a mapping
if "vertical" not in fc_dept.columns:
    if "vertical" in mapping.columns:
        fc_dept = fc_dept.merge(
            mapping[["department_id", "vertical"]],
            on="department_id",
            how="left",
            suffixes=("", "_map")
        )

# 3) Normaliza nombre si quedó como vertical_map
if "vertical" not in fc_dept.columns and "vertical_map" in fc_dept.columns:
    fc_dept["vertical"] = fc_dept["vertical_map"]

# 4) Último recurso
if "vertical" not in fc_dept.columns:
    fc_dept["vertical"] = "Unknown"

fc_dept["vertical"] = fc_dept["vertical"].fillna("Unknown").astype(str)

# Daily plan
dow_profile = compute_dept_dow_profile(incoming, lookback_days=DOW_LOOKBACK_DAYS, min_obs=DOW_MIN_OBS)
needed = ["vertical","department_id","month","forecast_monthly_dept","forecast_p05_dept","forecast_p95_dept"]
missing = [c for c in needed if c not in fc_dept.columns]
if missing:
    raise ValueError(f"fc_dept missing columns for daily plan: {missing}. Columns={list(fc_dept.columns)}")

daily_plan = daily_plan_from_monthly(
    fc_dept_monthly=fc_dept[needed],
    dow_profile=dow_profile,
    start_date=pd.Timestamp.today().normalize(),
    horizon_days=DAILY_HORIZON_DAYS
).merge(mapping, on="department_id", how="left")


# --- v15.1: asegurar vertical en daily_plan ---
if "vertical" not in daily_plan.columns:
    if "vertical" in fc_dept.columns:
        daily_plan = daily_plan.merge(
            fc_dept[["department_id", "vertical"]].drop_duplicates("department_id"),
            on="department_id",
            how="left",
        )
    elif "vertical" in mapping.columns:
        daily_plan = daily_plan.merge(
            mapping[["department_id", "vertical"]],
            on="department_id",
            how="left",
            suffixes=("", "_map"),
        )
        if "vertical_map" in daily_plan.columns:
            daily_plan["vertical"] = daily_plan.get("vertical").fillna(daily_plan["vertical_map"])

if "vertical" not in daily_plan.columns:
    daily_plan["vertical"] = "Unknown"

daily_plan["vertical"] = daily_plan["vertical"].fillna("Unknown").astype(str)

# Backtest
acc_dept = backtest_dept_accuracy(
    incoming,
    min_train_months=BT_MIN_TRAIN_MONTHS,
    eval_months=BT_EVAL_MONTHS,
    horizon_months=BT_HORIZON_MONTHS,
    max_splits=BT_MAX_SPLITS
)

# Merge mapping (keep dept metadata) without breaking vertical
acc_dept = acc_dept.merge(mapping, on="department_id", how="left", suffixes=("", "_map"))

# Consolidate vertical if merge created duplicates
if "vertical" not in acc_dept.columns:
    if "vertical_x" in acc_dept.columns or "vertical_y" in acc_dept.columns:
        vx = acc_dept["vertical_x"] if "vertical_x" in acc_dept.columns else pd.Series([pd.NA]*len(acc_dept))
        vy = acc_dept["vertical_y"] if "vertical_y" in acc_dept.columns else pd.Series([pd.NA]*len(acc_dept))
        acc_dept["vertical"] = vx.fillna(vy)
    elif "vertical_map" in acc_dept.columns:
        acc_dept["vertical"] = acc_dept["vertical_map"]

if "vertical" not in acc_dept.columns:
    acc_dept["vertical"] = "Unknown"

acc_dept["vertical"] = acc_dept["vertical"].fillna("Unknown").astype(str)

# Clean duplicates if present
drop_cols = [c for c in ["vertical_x", "vertical_y", "vertical_map"] if c in acc_dept.columns]
if drop_cols:
    acc_dept = acc_dept.drop(columns=drop_cols)

acc_dept = acc_dept.sort_values(["vertical", "department_id"]) if not acc_dept.empty else acc_dept

print("\nVertical factors (sanity):")
display(fc_vertical.groupby("vertical", as_index=False)["vertical_level_factor"].mean().sort_values("vertical_level_factor"))

print("\nAccuracy table (top 15 rows):")
display(acc_dept.head(15))


Incoming rows: 478923
Verticals: ['Hospitality', 'Partners', 'Payments']
Departments: 21
Share sum (min/max): 0.9999999999999999 1.0000000000000002


  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction


Vertical factors (sanity):


  return get_prediction_index(


Unnamed: 0,vertical,vertical_level_factor
0,Hospitality,0.83387
2,Payments,0.856383
1,Partners,1.031163



Accuracy table (top 15 rows):


Unnamed: 0,vertical,department_id,MAE,Bias_%,WAPE_%,Accuracy_staffing_%,Eval_Months,department_name,department_group,team_hierarchy
0,Hospitality,10,143.645609,20.133689,25.156849,74.843151,9,CS_PMSP_PREM_L2,Planet Hospitality - PMS,Protel PMS
1,Hospitality,11,123.691947,27.222585,37.169533,62.830467,9,CS_PMSP_CLOUD_L2,Planet Hospitality - PMS,Protel PMS
2,Hospitality,23,149.121024,10.92539,23.620014,76.379986,9,CS_PMSP_FRANCE,Planet Hospitality - PMS,Protel PMS
3,Hospitality,4,89.714356,4249.627394,4249.627394,0.0,9,CS_PMSP_DIST,Planet Hospitality - PMS,Protel PMS
4,Hospitality,44,1.331229,1198.106173,1198.106173,0.0,9,CS_PMSP_PROJ,Planet Partners Support,Protel PMS
5,Hospitality,5,255.764087,18.269768,20.108996,79.891004,9,CS_PMSP_INTEG,Planet Hospitality - PMS,Protel PMS
6,Hospitality,6,125.355426,7.761858,127.9137,0.0,9,CS_PMSP_KEY,Planet Hospitality - PMS,Protel PMS
7,Hospitality,7,249.307922,5.879805,17.370684,82.629316,9,CS_PMSH_L1,Planet Hospitality - PMS,Hoist PMS
8,Hospitality,8,132.800738,14.117133,19.619282,80.380718,9,CS_PMSP_CLOUD_L1,Planet Hospitality - PMS,Protel PMS
9,Hospitality,9,165.429111,8.934268,12.553643,87.446357,9,CS_PMSP_PREM_L1,Planet Hospitality - PMS,Protel PMS


## 9) Export to Excel

In [10]:
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as w:
    fc_vertical.sort_values(["vertical","month"]).to_excel(w, "forecast_vertical_monthly", index=False)
    fc_dept.sort_values(["vertical","department_id","month"]).to_excel(w, "forecast_dept_monthly", index=False)
    daily_plan.sort_values(["vertical","department_id","Date"]).to_excel(w, "daily_plan_90d", index=False)
    shares_vd.sort_values(["vertical","department_id","month"]).to_excel(w, "dept_share_ewma", index=False)
    dow_profile.sort_values(["department_id","dow"]).to_excel(w, "dept_dow_profile", index=False)
    acc_dept.sort_values(["vertical","department_id"]).to_excel(w, "accuracy_dept_monthly", index=False)

print("Saved:", OUTPUT_XLSX)


Saved: C:\Users\pt3canro\Desktop\CAPACITY\outputs\capacity_forecast_v15.xlsx
