## Forecast departments according to incoming


________________________________________
Incoming tickets daily forecasting and staffing plan by language
Author: Antonio Romero (Continuous Improvement Lead)
Version: Forecast v4_20260113

What this script does:
1) Loads incoming tickets, keeps the target verticals, and builds a DAILY series per vertical.
2) Fills missing days, handles simple outliers (optional), and trains a seasonal model (m=7).
3) Produces a bi-annual (26 weeks) DAILY forecast per vertical.
4) Deterministically allocates daily totals to languages using Hamilton (largest remainder).
5) Converts tickets/day/language into agents needed given AHT, occupancy, and shrinkage.
6) Provides optional weekly (Thu–Wed) views for reporting consistency.
"""

## 0. Import libraries

In [31]:

# Cell 1: Imports & Setup

from pathlib import Path
import warnings
import numpy as np
import pandas as pd
from typing import Dict, Tuple

# Prophet import (supports both modern and legacy names)
try:
    from prophet import Prophet
except Exception:
    from fbprophet import Prophet  # fallback if older package is installed

# Optional visuals (off by default)
import plotly.express as px
import plotly.graph_objects as go

warnings.filterwarnings("ignore")



## 1. Configuration and statements

In [32]:

CONFIG = {
    # --- Paths (adjust to your environment) ---
    "incoming_path": r"C:\Users\pt3canro\Desktop\CAPACITY\Incoming_new.xlsx",
    "call_perf_path": r"C:\Users\pt3canro\Desktop\CAPACITY\call_performance.xlsx",
    "einstein_path": r"C:\Users\pt3canro\Desktop\CAPACITY\einstein.xlsx",

    # --- Business scope ---
    "verticals": ["Hospitality", "Payments", "Partners"],  # optional filter; set [] for all

    # --- Forecast horizon (bi-annual ≈ 26 weeks) ---
    "horizon_days": 26 * 7,

    # --- Test window for evaluation (rolling) ---
    "test_size_days": 28,

    # --- Language shares (fractions sum ≈ 1.0) ---
    "language_shares": {
        "English": 0.6435,
        "French": 0.0741,
        "German": 0.0860,
        "Italian": 0.0667,
        "Portuguese": 0.0162,
        "Spanish": 0.1135
    },

    # --- Default ticket AHT by language (seconds) (fallback only) ---
    "aht_ticket_lang_sec": {
        "English": 600, "French": 660, "German": 660,
        "Italian": 630, "Portuguese": 600, "Spanish": 620
    },

    # --- Convert call AHT -> ticket AHT (calls usually shorter than tickets) ---
    "ticket_over_call": 1.35,     # multiplier (tune if needed)
    "blend_alpha_prod": 0.70,     # if later you blend with productivity AHT (prod has weight α)

    # --- Staffing parameters ---
    "work_hours_effective": 7.0,
    "occupancy_target": 0.85,
    "shrinkage": 0.30,

    # --- Prophet & robustness ---
    "spike_window": 7,
    "spike_z": 6.0,
    "cap_extremes": True,
    "cap_quantile": 0.99,     # cap training series at this quantile (per vertical)
    "clip_quantile": 0.995,   # clip future forecasts to plausible upper bound
    "holiday_countries": ["ES","UK","US","DE","FR","IT","PT"],

    # --- FTE planning scenarios (coverage ratio of agents_needed baseline) ---
    "fte_scenarios": [0.8, 0.9, 1.0, 1.1],

    # --- Output dir ---
    "export_dir": Path("outputs")
}


## 2. Cleaning and Utilities

In [33]:

def audit_dataframe(df: pd.DataFrame) -> dict:
    """Quick audit for shape, types, nulls, duplicates."""
    return {
        "rows": len(df),
        "cols": df.shape[1],
        "dtypes": df.dtypes.astype(str).to_dict(),
        "nulls": df.isnull().sum().sort_values(ascending=False).to_dict(),
        "duplicates": int(df.duplicated().sum())
    }

def ensure_dir(path: Path):
    """Create dir if missing."""
    path.mkdir(parents=True, exist_ok=True)

def allocate_by_language(total: int, shares: dict) -> dict:
    """Deterministic rounding using Hamilton method (largest remainder)."""
    total = int(total) if pd.notna(total) else 0
    if total <= 0:
        return {k: 0 for k in shares.keys()}
    ssum = float(sum(shares.values()))
    if not (0.99 <= ssum <= 1.01):
        shares = {k: float(v) / ssum for k, v in shares.items()}
    raw = {k: total * float(v) for k, v in shares.items()}
    base = {k: int(np.floor(v)) for k, v in raw.items()}
    remainder = int(total - sum(base.values()))
    if remainder > 0:
        fracs = sorted(((k, raw[k] - base[k]) for k in shares.keys()),
                       key=lambda x: x[1], reverse=True)
        for k, _ in fracs[:remainder]:
            base[k] += 1
    return base

def agents_needed(tickets: int, aht_sec: int,
                  work_hours_effective: float,
                  occupancy: float,
                  shrinkage: float) -> int:
    """Agents/day using ticket AHT, occupancy, and shrinkage."""
    work_sec = work_hours_effective * 3600.0
    base = (tickets * aht_sec) / (work_sec * occupancy) if work_sec > 0 and occupancy > 0 else 0
    return int(np.ceil(base) * (1.0 + shrinkage))

# Robust metrics and baselines
def smape(y_true, y_pred, eps=1e-8):
    y_true = np.array(y_true, dtype=float)
    y_pred = np.array(y_pred, dtype=float)
    denom = (np.abs(y_true) + np.abs(y_pred)) + eps
    return float(np.mean(2.0 * np.abs(y_pred - y_true) / denom) * 100.0)

def mape_masked(y_true, y_pred, eps=1e-8):
    y_true = np.array(y_true, dtype=float)
    y_pred = np.array(y_pred, dtype=float)
    mask = y_true > 0
    if mask.sum() == 0:
        return np.nan
    return float(np.mean(np.abs((y_true[mask] - y_pred[mask]) / (y_true[mask] + eps))) * 100.0)

def seasonal_naive_forecast(train: pd.Series, horizon_days: int) -> np.ndarray:
    """Seasonal naive: repeat last 7-day pattern."""
    train = train.sort_index().astype(float)
    if len(train) < 8:
        last = train.iloc[-1] if len(train) else 0.0
        return np.full(horizon_days, last, dtype=float)
    last_week = train.iloc[-7:].values
    reps = (horizon_days + 6) // 7
    return np.tile(last_week, reps)[:horizon_days]

def moving_median_forecast(train: pd.Series, horizon_days: int, window: int = 7) -> np.ndarray:
    """Rolling median baseline."""
    train = train.sort_index().astype(float)
    med = float(train.rolling(window, min_periods=1).median().iloc[-1]) if len(train) else 0.0
    return np.full(horizon_days, med, dtype=float)

# Regressors for Prophet (weekend, month start/end, spike)
def detect_spikes_robust(ts: pd.Series, window: int = 7, z: float = 6.0) -> pd.Series:
    """Robust spike detection around a rolling median using MAD."""
    ts = ts.astype(float)
    rm = ts.rolling(window=window, center=True, min_periods=max(3, window//2)).median()
    mad = (np.abs(ts - rm)).rolling(window=window, center=True,
                                    min_periods=max(3, window//2)).median() * 1.4826
    mad = mad.replace(0, np.nan)
    robust_z = (ts - rm) / mad
    spikes = (np.abs(robust_z) >= z).astype(int)
    return spikes.fillna(0).astype(int)

def build_regressors(date_index: pd.DatetimeIndex, base_series: pd.Series,
                     window: int, z: float) -> pd.DataFrame:
    """Build exogenous regressors for Prophet."""
    reg = pd.DataFrame({"ds": pd.to_datetime(date_index)})
    reg["is_weekend"] = reg["ds"].dt.weekday.isin([5, 6]).astype(int)
    reg["is_month_start"] = reg["ds"].dt.is_month_start.astype(int)
    reg["is_month_end"] = reg["ds"].dt.is_month_end.astype(int)
    tmp = pd.Series(base_series.values, index=pd.to_datetime(base_series.index))
    spikes = detect_spikes_robust(tmp, window=window, z=z).reindex(reg["ds"]).fillna(0).astype(int)
    reg["spike"] = spikes.values
    return reg

# Prophet robust training (grid + baseline fallback)
def prophet_forecast_with_regressors(
    daily_v: pd.DataFrame,
    horizon_days: int,
    test_days: int,
    countries: list,
    spike_window: int,
    spike_z: float,
    cap_extremes: bool,
    cap_q: float,
    grid_cp=(0.3, 0.5, 0.8),
    grid_seas=(5.0, 10.0, 15.0),
    clip_quantile=0.995,
    use_smape=True
):
    """
    Train Prophet with regressors (weekend, month flags, spike) for incoming total.
    Compare vs seasonal-naive baseline; fallback to baseline if no gain.
    Returns (forecast_df, meta).
    """
    # Prepare daily series
    series = (daily_v.set_index("Date")["tickets"]
                      .asfreq("D").fillna(0).astype(float))
    series[series < 0] = 0.0

    # Cap extremes to reduce explosive behavior
    if cap_extremes and len(series) > 0:
        cap_val = float(series.quantile(cap_q))
        series = series.clip(upper=cap_val)

    # Split
    test_days = int(test_days)
    if len(series) > test_days:
        train = series.iloc[:-test_days]
        test = series.iloc[-test_days:]
    else:
        train, test = series, pd.Series(dtype=float)

    # Baseline on test
    if not test.empty:
        bl_pred = seasonal_naive_forecast(train, len(test))
        bl_rmse = float(np.sqrt(np.mean((test.values - bl_pred)**2)))
        bl_smape = smape(test.values, bl_pred)
    else:
        bl_rmse = bl_smape = np.nan

    # If insufficient history, return baseline for future
    if len(train) < 8:
        fc_baseline = seasonal_naive_forecast(train, horizon_days)
        fc_idx = pd.date_range(start=series.index[-1] + pd.Timedelta(days=1),
                               periods=horizon_days, freq="D")
        fc = np.clip(fc_baseline, 0, float(train.quantile(clip_quantile)) if len(train) else 0)
        fc = np.maximum(0, np.round(fc).astype(int))
        return (pd.DataFrame({"Date": fc_idx, "tickets": fc}),
                {"rmse": bl_rmse, "smape": bl_smape, "model": None, "chosen": "baseline"})

    # Train regressors
    reg_train = build_regressors(train.index, train, window=spike_window, z=spike_z)

    # Grid Prophet
    best_model, best_score, best_meta = None, np.inf, {}
    for cp in grid_cp:
        for seas in grid_seas:
            # Train DF
            train_df = reg_train.copy()
            train_df["y"] = np.log1p(train.values)
            train_df = train_df.dropna(subset=["y"])
            if len(train_df) < 2:
                continue

            m = Prophet(
                growth="linear",
                changepoint_prior_scale=float(cp),
                seasonality_prior_scale=float(seas),
                seasonality_mode="additive",
                weekly_seasonality=True,
                yearly_seasonality=True,
                daily_seasonality=False
            )
            for c in countries:
                try:
                    m.add_country_holidays(country_name=c)
                except Exception:
                    pass
            for reg in ["is_weekend", "is_month_start", "is_month_end", "spike"]:
                m.add_regressor(reg, mode="additive")
            m.fit(train_df)

            # Evaluate
            if not test.empty:
                test_reg = build_regressors(test.index, series, window=spike_window, z=spike_z)
                yhat_test = m.predict(test_reg)["yhat"].values
                test_pred = np.expm1(yhat_test)
                rmse = float(np.sqrt(np.mean((test.values - test_pred)**2)))
                s = smape(test.values, test_pred)
                score = s if use_smape else rmse
            else:
                rmse = s = score = np.nan

            if not np.isnan(score) and score < best_score:
                best_score = score
                best_model = m
                best_meta = {"rmse": rmse, "smape": s, "cp": cp, "seas": seas}

    # Decide model vs baseline
    prophet_better = best_model is not None and not np.isnan(best_score) and not np.isnan(bl_smape) and (best_score <= bl_smape * 0.95)

    # Future regressors and forecast
    future_dates = pd.date_range(start=series.index[-1] + pd.Timedelta(days=1),
                                 periods=horizon_days, freq="D")
    future_reg = build_regressors(future_dates, series, window=spike_window, z=spike_z)
    future_reg["spike"] = 0

    if (best_model is None) or (not prophet_better):
        fc_baseline = seasonal_naive_forecast(train, horizon_days)
        ub = float(train.quantile(clip_quantile))
        fc = np.clip(fc_baseline, 0, ub)
        fc = np.maximum(0, np.round(fc).astype(int))
        return (pd.DataFrame({"Date": future_reg["ds"].values, "tickets": fc}),
                {"rmse": bl_rmse, "smape": bl_smape, "model": None, "chosen": "baseline"})

    fc_df = best_model.predict(future_reg)
    fc = np.expm1(fc_df["yhat"].values)
    ub = float(train.quantile(clip_quantile))
    fc = np.clip(fc, 0, ub)
    fc = np.maximum(0, np.round(fc).astype(int))
    return (pd.DataFrame({"Date": future_reg["ds"].values, "tickets": fc}),
            {"rmse": float(best_meta["rmse"]), "smape": float(best_meta["smape"]),
             "model": best_model, "chosen": f"prophet(cp={best_meta['cp']}, seas={best_meta['seas']})"})


## 3. Load and prepare data

In [34]:

INCOMING_PATH = CONFIG["incoming_path"]
df = pd.read_excel(INCOMING_PATH)

# Basic schema expectation
expected = {"Date", "vertical", "total_incoming"}
missing = expected - set(df.columns)
if missing:
    raise ValueError(f"Missing columns in Incoming_new.xlsx: {missing}")

# Filter verticals if configured
if CONFIG["verticals"]:
    df = df[df["vertical"].isin(CONFIG["verticals"])].copy()

# Build daily by vertical
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df = df.dropna(subset=["Date"]).sort_values("Date")

daily = (
    df.groupby(["vertical", pd.Grouper(key="Date", freq="D")])["total_incoming"]
      .sum()
      .rename("tickets")
      .reset_index()
)

# Fill missing days (safe)
daily = (
    daily.set_index("Date")
         .groupby("vertical", group_keys=False)
         .apply(lambda g: g.asfreq("D").fillna({"tickets": 0}))
         .reset_index()
)

# Clean types
daily["tickets"] = daily["tickets"].fillna(0).clip(lower=0).round().astype(int)
daily["vertical"] = daily["vertical"].astype(str).str.strip()

print("Audit incoming daily:", audit_dataframe(daily))
display(daily.tail())


Audit incoming daily: {'rows': 1131, 'cols': 3, 'dtypes': {'Date': 'datetime64[ns]', 'vertical': 'object', 'tickets': 'int64'}, 'nulls': {'Date': 0, 'vertical': 0, 'tickets': 0}, 'duplicates': 0}


Unnamed: 0,Date,vertical,tickets
1126,2026-01-08,Payments,350
1127,2026-01-09,Payments,346
1128,2026-01-10,Payments,94
1129,2026-01-11,Payments,94
1130,2026-01-12,Payments,401


## 4. Prophet forecast (incoming total) per vertical

In [35]:
results, metrics = [], {}
for v in sorted([x for x in daily["vertical"].unique() if isinstance(x, str)]):
    g = daily.loc[daily["vertical"] == v, ["Date", "tickets"]].copy()
    fc, meta = prophet_forecast_with_regressors(
        daily_v=g,
        horizon_days=CONFIG["horizon_days"],
        test_days=CONFIG["test_size_days"],
        countries=CONFIG["holiday_countries"],
        spike_window=CONFIG["spike_window"],
        spike_z=CONFIG["spike_z"],
        cap_extremes=CONFIG["cap_extremes"],
        cap_q=CONFIG["cap_quantile"],
        grid_cp=(0.3, 0.5, 0.8),
        grid_seas=(5.0, 10.0, 15.0),
        clip_quantile=CONFIG["clip_quantile"],
        use_smape=True
    )
    fc["vertical"] = v
    results.append(fc)
    metrics[v] = meta
    print(f"[{v}] chosen={meta.get('chosen','n/a')} | RMSE={meta.get('rmse',np.nan):.2f} | sMAPE={meta.get('smape',np.nan):.2f}%")

forecast_daily = pd.concat(results, ignore_index=True)
forecast_daily.rename(columns={"tickets":"tickets_total"}, inplace=True)

display(forecast_daily.head())


Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
09:17:01 - cmdstanpy - INFO - Chain [1] start processing
09:17:01 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
09:17:01 - cmdstanpy - INFO - Chain [1] start processing
09:17:02 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing 

[Hospitality] chosen=baseline | RMSE=151.73 | sMAPE=43.34%


09:17:05 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
09:17:05 - cmdstanpy - INFO - Chain [1] start processing
09:17:05 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
09:17:05 - cmdstanpy - INFO - Chain [1] start processing
09:17:05 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'

[Partners] chosen=baseline | RMSE=22.15 | sMAPE=41.73%


09:17:08 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
09:17:08 - cmdstanpy - INFO - Chain [1] start processing
09:17:08 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
09:17:08 - cmdstanpy - INFO - Chain [1] start processing
09:17:09 - cmdstanpy - INFO - Chain [1] done processing
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'

[Payments] chosen=baseline | RMSE=110.53 | sMAPE=33.34%


Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing country holidays from 'FR' to 'IT'.
Changing country holidays from 'IT' to 'PT'.
Changing country holidays from 'ES' to 'UK'.
Changing country holidays from 'UK' to 'US'.
Changing country holidays from 'US' to 'DE'.
Changing country holidays from 'DE' to 'FR'.
Changing c

[nan] chosen=prophet(cp=0.3, seas=5.0) | RMSE=0.00 | sMAPE=0.00%


Unnamed: 0,Date,tickets_total,vertical
0,2026-01-13,586,Hospitality
1,2026-01-14,321,Hospitality
2,2026-01-15,358,Hospitality
3,2026-01-16,351,Hospitality
4,2026-01-17,95,Hospitality


## 5. Allocate forecast to languages determinalistically (Hamilton)

In [36]:

alloc_rows = []
tmp = forecast_daily[["Date","vertical","tickets_total"]].copy()
tmp["tickets_total"] = tmp["tickets_total"].fillna(0).clip(lower=0).round().astype(int)

for _, row in tmp.iterrows():
    alloc = allocate_by_language(int(row["tickets_total"]), CONFIG["language_shares"])
    for lang, t in alloc.items():
        alloc_rows.append({
            "Date": row["Date"], "vertical": row["vertical"],
            "language": lang, "tickets": int(t)
        })

forecast_daily_lang = pd.DataFrame(alloc_rows)
display(forecast_daily_lang.head())


Unnamed: 0,Date,vertical,language,tickets
0,2026-01-13,Hospitality,English,377
1,2026-01-13,Hospitality,French,43
2,2026-01-13,Hospitality,German,50
3,2026-01-13,Hospitality,Italian,39
4,2026-01-13,Hospitality,Portuguese,10


## 6. Load call_performance.xlsx and derive ticket AHT per language

In [37]:
CALL_PATH = CONFIG["call_perf_path"]
cp_raw = pd.read_excel(CALL_PATH)

# Updated column mapping based on your real file:
# call_date        -> Date
# language_name    -> language
# AHT_sec or aht   -> aht_seconds
colmap = {
    "call_date": "Date",
    "language_name": "language",
    "AHT_sec": "aht_seconds",
    "aht": "aht_seconds"
}

# Apply renaming only if columns exist
cp_raw.rename(columns={k: v for k, v in colmap.items() if k in cp_raw.columns}, inplace=True)

# Validate required columns exist after rename
required_cols = {"Date", "language", "aht_seconds"}
missing = required_cols - set(cp_raw.columns)
if missing:
    raise ValueError(f"Missing expected columns after renaming: {missing}")

# Parse dates
cp_raw["Date"] = pd.to_datetime(cp_raw["Date"], errors="coerce")
cp_raw = cp_raw.dropna(subset=["Date","language","aht_seconds"]).copy()

# Clean and normalize language
cp_raw["language"] = cp_raw["language"].astype(str).str.strip()

# Ensure numeric AHT
cp_raw["aht_seconds"] = pd.to_numeric(cp_raw["aht_seconds"], errors="coerce")
cp_raw = cp_raw[cp_raw["aht_seconds"].notna()]

# Clamp AHT for calls to reasonable range (10s .. 3600s)
cp_raw["aht_seconds"] = cp_raw["aht_seconds"].clip(10, 3600)

# Compute median daily AHT per language
cp_lang_daily = (cp_raw
                 .groupby(["Date","language"], as_index=False)
                 .agg(aht_calls_sec=("aht_seconds","median")))

# Smooth AHT with rolling median (14-day)
cp_lang_daily = cp_lang_daily.sort_values(["language","Date"])
cp_lang_daily["aht_calls_sm"] = (
    cp_lang_daily
    .groupby("language")["aht_calls_sec"]
    .transform(lambda s: s.rolling(14, min_periods=5).median())
)

# Fallback if smoothing fails
cp_lang_daily["aht_calls_sm"] = cp_lang_daily["aht_calls_sm"].fillna(cp_lang_daily["aht_calls_sec"])

# Clamp to reasonable ticket analysis bounds (3 min .. 60 min) = 180s .. 3600s
cp_lang_daily["aht_calls_sm"] = cp_lang_daily["aht_calls_sm"].clip(180, 3600)

# Convert call AHT → ticket AHT
TICKET_OVER_CALL = CONFIG["ticket_over_call"]

recent_days = 30
recent = cp_lang_daily[cp_lang_daily["Date"] >= cp_lang_daily["Date"].max() - pd.Timedelta(days=recent_days)]
aht_call_by_lang = recent.groupby("language")["aht_calls_sm"].median().dropna().to_dict()

# Convert to ticket AHT
AHT_updated = {lang: int(round(float(aht) * TICKET_OVER_CALL)) for lang, aht in aht_call_by_lang.items()}

# Clamp ticket AHT (3 min .. 90 min) to avoid wild values
for lang in list(AHT_updated.keys()):
    AHT_updated[lang] = int(np.clip(AHT_updated[lang], 180, 5400))

# Ensure all languages in CONFIG exist in AHT_updated (fallback)
for lang in CONFIG["language_shares"]:
    if lang not in AHT_updated:
        AHT_updated[lang] = CONFIG["aht_ticket_lang_sec"].get(lang, 900)

print("Ticket AHT (seconds) by language after integration of call_performance:")
AHT_updated


Ticket AHT (seconds) by language after integration of call_performance:


{'Arabic': 597,
 'Czech': 687,
 'Danish': 585,
 'English': 642,
 'Finnish': 603,
 'French': 641,
 'German': 535,
 'Italian': 658,
 'Portuguese': 599,
 'Spanish': 654,
 'Swedish': 613}

## 7. Load einstein.xlsx and compute human-demand; compute agents (baseline)

In [38]:

EIN_PATH = CONFIG["einstein_path"]
try:
    ein = pd.read_excel(EIN_PATH)
    ein["Date"] = pd.to_datetime(ein["Date"], errors="coerce")
    ein = ein.dropna(subset=["Date"])
    ein_daily = ein.groupby("Date").size().rename("einstein_resolved").reset_index()
except Exception:
    ein_daily = pd.DataFrame(columns=["Date","einstein_resolved"])

# Build Einstein forecast (language-level)
def project_einstein_series(series: pd.Series, horizon_index: pd.DatetimeIndex) -> pd.Series:
    """Seasonal-naive 7-day projection; fallback median; else zeros."""
    series = series.sort_index().asfreq("D").fillna(0)
    if len(series) >= 7:
        last_week = series.iloc[-7:].values
        reps = int(np.ceil(len(horizon_index)/7))
        return pd.Series(np.tile(last_week, reps)[:len(horizon_index)], index=horizon_index)
    elif len(series) >= 3:
        med = float(series.rolling(7, min_periods=3).median().iloc[-1])
        return pd.Series(np.full(len(horizon_index), med), index=horizon_index)
    else:
        return pd.Series(np.zeros(len(horizon_index)), index=horizon_index)

incoming_lang = (forecast_daily_lang
                 .groupby(["Date","language"], as_index=False)["tickets"]
                 .sum()
                 .rename(columns={"tickets":"incoming_forecast"}))

future_dates = pd.date_range(start=incoming_lang["Date"].min(),
                             end=incoming_lang["Date"].max(), freq="D")

if not ein_daily.empty:
    # if ein has no language, allocate to languages by shares
    if "language" not in ein_daily.columns:
        alloc_rows_e = []
        for _, row in ein_daily.iterrows():
            alloc = allocate_by_language(int(row["einstein_resolved"]), CONFIG["language_shares"])
            for lang, t in alloc.items():
                alloc_rows_e.append({"Date": row["Date"], "language": lang, "einstein_resolved": int(t)})
        ein_hist = pd.DataFrame(alloc_rows_e)
    else:
        ein_hist = ein_daily.copy()  # already language-level

    ein_hist["Date"] = pd.to_datetime(ein_hist["Date"])
    ein_fc_rows = []
    for lang in incoming_lang["language"].unique():
        s = (ein_hist[ein_hist["language"] == lang].set_index("Date")["einstein_resolved"]
             if "language" in ein_hist.columns else
             ein_hist.set_index("Date")["einstein_resolved"])
        fc = project_einstein_series(s, future_dates)
        ein_fc_rows.append(pd.DataFrame({"Date": fc.index, "language": lang,
                                         "einstein_resolved_forecast": fc.values}))
    ein_fc = pd.concat(ein_fc_rows, ignore_index=True)
else:
    ein_fc = incoming_lang[["Date","language"]].copy()
    ein_fc["einstein_resolved_forecast"] = 0.0

# Merge and compute human tickets
dash = incoming_lang.merge(ein_fc, on=["Date","language"], how="left")
dash["einstein_resolved_forecast"] = dash["einstein_resolved_forecast"].fillna(0).clip(lower=0)
dash["tickets_human"] = (dash["incoming_forecast"] - dash["einstein_resolved_forecast"]).clip(lower=0).astype(int)

# Attach AHT and compute agents
AHT_MAP = AHT_updated.copy()
dash["aht_sec"] = dash["language"].map(AHT_MAP).fillna(900).astype(int)

WH = CONFIG["work_hours_effective"]
OCC = CONFIG["occupancy_target"]
SHR = CONFIG["shrinkage"]

dash["agents_needed"] = dash.apply(
    lambda r: agents_needed(r["tickets_human"], r["aht_sec"], WH, OCC, SHR),
    axis=1
)

dash = dash.sort_values(["Date","language"]).reset_index(drop=True)
display(dash.head())


Unnamed: 0,Date,language,incoming_forecast,einstein_resolved_forecast,tickets_human,aht_sec,agents_needed
0,2025-07-21,English,0,30.0,0,642,0
1,2025-07-21,French,0,4.0,0,641,0
2,2025-07-21,German,0,4.0,0,535,0
3,2025-07-21,Italian,0,3.0,0,658,0
4,2025-07-21,Portuguese,0,1.0,0,599,0


## 8. Build FTE planning scenarios and compute SLA gap

In [39]:
ensure_dir(CONFIG["export_dir"])

SCENARIOS = CONFIG["fte_scenarios"]  # e.g., [0.8, 0.9, 1.0, 1.1]
work_sec = WH * 3600.0

scenario_rows = []
for ratio in SCENARIOS:
    df = dash.copy()
    # Planned FTE: round up to ensure we don't undercount fractional heads
    df["fte_planned"] = np.ceil(df["agents_needed"] * float(ratio)).astype(int)
    # Closable tickets from planned FTE:
    df["closable_tickets"] = ((df["fte_planned"] * work_sec * OCC) / df["aht_sec"]) * (1.0 - SHR)
    df["closable_tickets"] = df["closable_tickets"].fillna(0).round().astype(int)
    # Gaps
    df["gap_tickets"] = (df["tickets_human"] - df["closable_tickets"]).clip(lower=0).astype(int)
    df["gap_agents"] = (df["agents_needed"] - df["fte_planned"]).clip(lower=0).astype(int)
    df["scenario"] = f"{int(ratio*100)}%"  # label
    scenario_rows.append(df)

dash_scen = pd.concat(scenario_rows, ignore_index=True)

# Export scenario results
out_csv = CONFIG["export_dir"] / "dashboard_daily_language_with_scenarios.csv"
dash_scen.to_csv(out_csv, index=False)
print("Saved scenarios dashboard:", out_csv)

# Quick aggregation per scenario (total gaps)
agg = (dash_scen.groupby("scenario")[["tickets_human","closable_tickets","gap_tickets","agents_needed","fte_planned"]]
                .sum()
                .reset_index())
display(agg)


Saved scenarios dashboard: outputs\dashboard_daily_language_with_scenarios.csv


Unnamed: 0,scenario,tickets_human,closable_tickets,gap_tickets,agents_needed,fte_planned
0,100%,111930,107848,8710,4550,4550
1,110%,111930,137878,0,4550,5798
2,80%,111930,95082,21294,4550,4004
3,90%,111930,103610,12948,4550,4368


## 09. Quick plots: total agents & gaps by scenario

In [40]:

try:
    # Total agents per day by scenario
    agents_daily = (dash_scen.groupby(["scenario","Date"])["fte_planned"].sum()
                            .reset_index())
    fig_a = px.line(agents_daily, x="Date", y="fte_planned", color="scenario",
                    title="Total planned FTE per day by scenario")
    fig_a.show()

    # Total gap tickets per day by scenario
    gap_daily = (dash_scen.groupby(["scenario","Date"])["gap_tickets"].sum()
                          .reset_index())
    fig_g = px.area(gap_daily, x="Date", y="gap_tickets", color="scenario",
                    title="Daily SLA gap (tickets) by scenario")
    fig_g.show()
except Exception as e:
    print("Skipping plots:", e)


## 10. Export base dashboard and quick summaries

In [41]:
base_csv = CONFIG["export_dir"] / "dashboard_daily_language.csv"
dash[["Date","language","incoming_forecast","einstein_resolved_forecast",
      "tickets_human","aht_sec","agents_needed"]].to_csv(base_csv, index=False)
print("Saved base dashboard:", base_csv)

sum_agents_by_lang = dash.groupby("language")["agents_needed"].sum().sort_values(ascending=False)
print("\nTotal agents over horizon by language (baseline):")
print(sum_agents_by_lang)


Saved base dashboard: outputs\dashboard_daily_language.csv

Total agents over horizon by language (baseline):
language
English       2886
Spanish        494
French         338
German         338
Italian        312
Portuguese     182
Name: agents_needed, dtype: int64


## 11. Partners weekly piple (Thu-Wed) and weekday profile

In [42]:
# Guard: ensure 'daily' exists and has Partners
if "daily" not in globals():
    raise RuntimeError("Missing 'daily'. Build it from Incoming_new.xlsx first.")
if "Partners" not in set(daily["vertical"].unique()):
    raise RuntimeError("'Partners' vertical not found in 'daily'.")

partners_daily_hist = (daily[daily["vertical"] == "Partners"]
                       .copy()
                       .sort_values("Date"))
partners_daily_hist["Date"] = pd.to_datetime(partners_daily_hist["Date"], errors="coerce")

# --- Weekly (Thu–Wed) totals
partners_weekly = (partners_daily_hist
                   .set_index("Date")["tickets"]
                   .asfreq("D").fillna(0)
                   .resample("W-WED")   # business week ends Wednesday → Thu–Wed
                   .sum()
                   .rename("tickets_week")
                   .to_frame())

# --- Weekday profile (weights within Thu–Wed week)
# Build weekday index with custom week ending on Wed:
ts = partners_daily_hist.set_index("Date")["tickets"].asfreq("D").fillna(0)
# Map each day to week id (ending on Wed) and weekday number (Thu=0 ... Wed=6)
week_id = ts.index.to_period("W-WED")  # weekly period ending on Wednesday
weekday_num = ((ts.index.weekday - 3) % 7)  # Thu=0, Fri=1, ... Wed=6

df_wd = pd.DataFrame({
    "week": week_id.astype(str),
    "weekday": weekday_num,
    "tickets": ts.values
})
wk_totals = df_wd.groupby("week", as_index=False)["tickets"].sum().rename(columns={"tickets":"wk_sum"})
df_wd = df_wd.merge(wk_totals, on="week", how="left")
df_wd["share"] = np.where(df_wd["wk_sum"] > 0, df_wd["tickets"] / df_wd["wk_sum"], 0.0)

# Average weekday shares over recent K weeks (exclude zero weeks)
K_WEEKS = 12
recent_weeks = (df_wd["week"].unique())[-K_WEEKS:] if len(df_wd["week"].unique()) >= K_WEEKS else df_wd["week"].unique()
wd_profile = (df_wd[df_wd["week"].isin(recent_weeks)]
              .groupby("weekday")["share"].mean().reindex(range(7)).fillna(0.0))

# Ensure the weights sum to 1.0 (handle all-zero edge case)
if wd_profile.sum() <= 0:
    wd_profile = pd.Series([1/7.0]*7, index=range(7))
else:
    wd_profile = wd_profile / wd_profile.sum()

