## 1. Imports & setups

In [18]:
from pathlib import Path
import warnings
import math
import numpy as np
import pandas as pd

try:
    from prophet import Prophet
except Exception:
    try:
        from fbprophet import Prophet  # legacy fallback
    except Exception:
        Prophet = None  # not strictly required for the pipeline

warnings.filterwarnings("ignore")

## 2. Configuration

In [19]:

CONFIG = {
    "language_shares": {
        "English": 0.6435,
        "French": 0.0741,
        "German": 0.0860,
        "Italian": 0.0667,
        "Portuguese": 0.0162,
        "Spanish": 0.1135
    },
    "work_hours_effective": 6.5,   # effective productive hours/day
    "occupancy_target": 0.85,      # target occupancy
    "shrinkage": 0.30,             # total shrinkage (breaks, coaching, PTO, etc.)
    "export_dir": "./_export"      # <-- added to avoid RuntimeError
}

# Local paths (adjust to your environment)
INCOMING_PATH = r"C:\Users\pt3canro\Desktop\CAPACITY\Incoming_new.xlsx"
CALL_PATH     = r"C:\Users\pt3canro\Desktop\CAPACITY\call_performance.xlsx"

Path(CONFIG["export_dir"]).mkdir(parents=True, exist_ok=True)

## 3. Helpers (allocation, staffing formulas)

In [20]:
def allocate_by_language(total: int, shares: dict) -> dict:
    """
    Deterministic apportionment (Hamilton method) ensuring exact total.
    """
    total = int(total) if pd.notna(total) else 0
    if total <= 0:
        return {k: 0 for k in shares}
    ssum = float(sum(shares.values()))
    use_shares = {k: (v/ssum) for k, v in shares.items()} if not (0.99 <= ssum <= 1.01) else shares
    raw  = {k: total * float(v) for k, v in use_shares.items()}
    base = {k: int(np.floor(v)) for k, v in raw.items()}
    remainder = int(total - sum(base.values()))
    if remainder > 0:
        fracs = sorted(((k, raw[k] - base[k]) for k in use_shares), key=lambda x: x[1], reverse=True)
        for k, _ in fracs[:remainder]:
            base[k] += 1
    return base

def agents_required(tickets: int, aht_sec: float,
                    work_hours_effective: float,
                    occupancy: float,
                    shrinkage: float) -> float:
    """
    Return required FTE (float, BEFORE ceiling) accounting for shrinkage consistently.
    Formula:
      base_FTE = (tickets * aht_sec) / (work_seconds * occupancy)
      FTE_with_shrink = base_FTE / (1 - shrinkage)
    """
    work_sec = float(work_hours_effective) * 3600.0
    if work_sec <= 0 or occupancy <= 0:
        return 0.0
    shrinkage = min(max(float(shrinkage), 0.0), 0.95)  # clamp to avoid division by zero
    base = (float(tickets) * float(aht_sec)) / (work_sec * float(occupancy))
    return base / (1.0 - shrinkage)

def ceil_agents(x: float) -> int:
    """Ceil positive FTE to next integer; never negative."""
    return int(math.ceil(max(0.0, float(x))))

## 4. Load Incoming.xlsx and build daily series

In [21]:
def build_daily_from_df(df_in: pd.DataFrame) -> pd.DataFrame:
    """
    Build a daily incoming series per vertical (tickets). 
    Missing calendar dates are filled with zeros per vertical.
    """
    expected = {"Date", "vertical", "total_incoming"}
    missing = expected - set(df_in.columns)
    if missing:
        raise ValueError(f"Missing columns in Incoming_new.xlsx: {missing}")

    g = (
        df_in
        .assign(Date=pd.to_datetime(df_in["Date"], errors="coerce"))
        .dropna(subset=["Date"])
        .groupby(["vertical", pd.Grouper(key="Date", freq="D")])["total_incoming"]
        .sum()
        .rename("tickets")
        .reset_index()
    )

    g = (
        g.set_index("Date")
         .groupby("vertical", group_keys=False)
         .apply(lambda x: x.asfreq("D").fillna({"tickets": 0}))
         .reset_index()
    )

    g["tickets"]  = g["tickets"].fillna(0).clip(lower=0).round().astype(int)
    g["vertical"] = g["vertical"].astype(str).str.strip()
    return g.sort_values(["vertical", "Date"]).reset_index(drop=True)

# Load Incoming
df_incoming_raw = pd.read_excel(INCOMING_PATH)
daily = build_daily_from_df(df_incoming_raw)
display(daily.tail())

Unnamed: 0,Date,vertical,tickets
3204,2026-01-17,,0
3205,2026-01-17,,0
3206,2026-01-17,,0
3207,2026-01-18,,0
3208,2026-01-18,,0


## 5. Call performance -> ticket AHT mapping per language

In [22]:
cp = pd.read_excel(CALL_PATH)
cp["call_date"] = pd.to_datetime(cp["call_date"], errors="coerce")
cp = cp.dropna(subset=["call_date"])

rename_map = {
    "lang": "language",
    "Language": "language",
    "language_name": "language",
    "call_date": "Date",
    "AHT_sec": "aht_seconds",
    "aht": "aht_seconds",
}
cp.rename(columns={k: v for k, v in rename_map.items() if k in cp.columns}, inplace=True)

cp = cp[["Date", "language", "aht_seconds"]].dropna()
cp["aht_seconds"] = cp["aht_seconds"].clip(10, 3600)  # sanity bounds

# 14-day rolling median smoothing
cp = cp.sort_values(["language", "Date"])
cp["aht_sm"] = cp.groupby("language")["aht_seconds"].transform(lambda s: s.rolling(14, min_periods=3).median())

# Calls -> Tickets AHT uplift
TICKET_OVER_CALL = 1.35
call_aht = cp.groupby("language")["aht_sm"].median().dropna().to_dict()
ticket_aht_from_calls = {lang: float(aht) * TICKET_OVER_CALL for lang, aht in call_aht.items()}

# Default fallback AHT if language not in mapping
DEFAULT_AHT_SEC = 900.0

## 6. Human demand & staffing by language from daily_with_bot (or fallback from daily)

In [23]:
LANG_SHARES = CONFIG["language_shares"]
WH  = float(CONFIG["work_hours_effective"])
OCC = float(CONFIG["occupancy_target"])
SHR = float(CONFIG["shrinkage"])

# Use daily_with_bot if provided externally; else fallback: assume all human tickets
if "daily_with_bot" in globals():
    df_staff_src = daily_with_bot.copy()
    needed_cols = {"Date", "vertical", "tickets_human"}
    missing = needed_cols - set(df_staff_src.columns)
    if missing:
        raise KeyError(f"daily_with_bot is missing required columns: {missing}")
else:
    # Fallback: derive from 'daily' (no bot deflection known)
    df_staff_src = (
        daily.rename(columns={"tickets": "tickets_human"})
             [["Date", "vertical", "tickets_human"]]
             .copy()
    )

# Integer tickets for allocation
df_staff_src["tickets_total"] = df_staff_src["tickets_human"].fillna(0).astype(int)

# Deterministic allocation by language
alloc_rows = []
for _, row in df_staff_src.iterrows():
    total = int(row["tickets_total"])
    if total <= 0:
        continue
    alloc = allocate_by_language(total, LANG_SHARES)
    for lang, t in alloc.items():
        if int(t) <= 0:
            continue
        alloc_rows.append({
            "Date": row["Date"],
            "vertical": row["vertical"],
            "language": lang,
            "tickets": int(t)
        })

df_lang_staff = pd.DataFrame(alloc_rows, columns=["Date", "vertical", "language", "tickets"])
if df_lang_staff.empty:
    df_lang_staff = pd.DataFrame([], columns=["Date", "vertical", "language", "tickets"])

# Attach AHT from calls (fallback to default)
df_lang_staff["aht_sec"] = df_lang_staff["language"].map(ticket_aht_from_calls).fillna(DEFAULT_AHT_SEC).astype(float)

# Compute FTE (float) and integer agents (ceil)
df_lang_staff["fte_required"] = df_lang_staff.apply(
    lambda r: agents_required(
        tickets=int(r["tickets"]),
        aht_sec=float(r["aht_sec"]),
        work_hours_effective=WH,
        occupancy=OCC,
        shrinkage=SHR
    ),
    axis=1
)

# Aggregate before ceiling to avoid rounding bias
df_lang_staff = (
    df_lang_staff
    .groupby(["Date", "vertical", "language"], as_index=False)
    .agg({
        "tickets": "sum",
        "aht_sec": "mean",      # safe average if duplicated rows
        "fte_required": "sum"   # sum continuous capacity BEFORE ceiling
    })
)

df_lang_staff["agents"] = df_lang_staff["fte_required"].apply(ceil_agents)
display(df_lang_staff.head())


Unnamed: 0,Date,vertical,language,tickets,aht_sec,fte_required,agents
0,2025-01-01,Finance,English,2,631.07775,0.090653,1
1,2025-01-01,Finance,Spanish,1,686.718,0.049323,1
2,2025-01-01,Hospitality,English,56,631.07775,2.538271,3
3,2025-01-01,Hospitality,French,6,654.50025,0.282051,1
4,2025-01-01,Hospitality,German,8,576.898875,0.33148,1


## 7. Dashboard by language (forecast + Einstein + staffing + SLA gap)

In [24]:
OUTDIR = Path(CONFIG["export_dir"])
LANG_SHARES = CONFIG["language_shares"]
WH  = float(CONFIG["work_hours_effective"])
OCC = float(CONFIG["occupancy_target"])
SHR = float(CONFIG["shrinkage"])

# 1) Incoming forecast by language
if "forecast_daily_lang" in globals():
    incoming_lang = (
        forecast_daily_lang
        .groupby(["Date", "language"], as_index=False)["tickets"]
        .sum()
        .rename(columns={"tickets": "incoming_forecast"})
    )
elif "forecast_daily" in globals():
    tmp = forecast_daily[["Date", "tickets_total"]].copy()
    tmp["tickets_total"] = tmp["tickets_total"].fillna(0).clip(lower=0).round().astype(int)
    alloc_rows = []
    for _, row in tmp.iterrows():
        alloc = allocate_by_language(int(row["tickets_total"]), LANG_SHARES)
        for lang, t in alloc.items():
            alloc_rows.append({"Date": row["Date"], "language": lang, "incoming_forecast": int(t)})
    incoming_lang = (
        pd.DataFrame(alloc_rows)
          .groupby(["Date", "language"], as_index=False)["incoming_forecast"]
          .sum()
    )
else:
    # Fallback: use 'daily' totals as a proxy forecast (copy of history)
    tmp = daily[["Date", "tickets"]].rename(columns={"tickets": "tickets_total"}).copy()
    alloc_rows = []
    for _, row in tmp.iterrows():
        alloc = allocate_by_language(int(row["tickets_total"]), LANG_SHARES)
        for lang, t in alloc.items():
            alloc_rows.append({"Date": row["Date"], "language": lang, "incoming_forecast": int(t)})
    incoming_lang = pd.DataFrame(alloc_rows)

# 2) Einstein baseline resolved by language (project to horizon)
def project_einstein_series(series: pd.Series, horizon_index: pd.DatetimeIndex) -> pd.Series:
    """
    Seasonal-naive (7-day) projection; if <7 obs -> rolling median fallback; else zeros.
    """
    series = series.sort_values().asfreq("D").fillna(0)
    if len(series) >= 7:
        last_week = series.iloc[-7:].values
        reps = int(np.ceil(len(horizon_index)/7))
        fc = np.tile(last_week, reps)[:len(horizon_index)]
        return pd.Series(fc, index=horizon_index)
    elif len(series) >= 3:
        med = float(series.rolling(7, min_periods=3).median().iloc[-1])
        return pd.Series(np.full(len(horizon_index), med), index=horizon_index)
    else:
        return pd.Series(np.zeros(len(horizon_index)), index=horizon_index)

if "ein_daily" in globals():
    ein_df = ein_daily.copy()
    if "language" in ein_df.columns:
        ein_hist = (
            ein_df.groupby(["Date", "language"]).size()
                  .rename("einstein_resolved")
                  .reset_index()
        )
    else:
        ein_hist_total = (
            ein_df.groupby("Date").size()
                  .rename("einstein_resolved")
                  .reset_index()
        )
        # Allocate per language using shares
        alloc_rows_e = []
        for _, row in ein_hist_total.iterrows():
            alloc = allocate_by_language(int(row["einstein_resolved"]), LANG_SHARES)
            for lang, t in alloc.items():
                alloc_rows_e.append({"Date": row["Date"], "language": lang, "einstein_resolved": int(t)})
        ein_hist = pd.DataFrame(alloc_rows_e)

    ein_hist["Date"] = pd.to_datetime(ein_hist["Date"])
    horizon = pd.to_datetime(incoming_lang["Date"].unique())
    min_h, max_h = horizon.min(), horizon.max()

    ein_fc_rows = []
    for lang in incoming_lang["language"].unique():
        hist_lang = (
            ein_hist[ein_hist["language"] == lang]
            .set_index("Date")["einstein_resolved"]
        )
        fc = project_einstein_series(hist_lang, pd.date_range(min_h, max_h, freq="D"))
        ein_fc_rows.append(pd.DataFrame({"Date": fc.index, "language": lang, "einstein_resolved_forecast": fc.values}))
    ein_fc = pd.concat(ein_fc_rows, ignore_index=True)
else:
    ein_fc = incoming_lang.copy()
    ein_fc["einstein_resolved_forecast"] = 0.0
    ein_fc = ein_fc[["Date", "language", "einstein_resolved_forecast"]]

# 3) Merge & compute human tickets
dash = incoming_lang.merge(ein_fc, on=["Date", "language"], how="left")
dash["einstein_resolved_forecast"] = dash["einstein_resolved_forecast"].fillna(0).clip(lower=0)
dash["tickets_human"] = (dash["incoming_forecast"] - dash["einstein_resolved_forecast"]).clip(lower=0).astype(int)

# 4) Attach AHT per language (prefer call-based mapping; fallback to default)
dash["aht_sec"] = dash["language"].map(ticket_aht_from_calls).fillna(DEFAULT_AHT_SEC).astype(float)

# 5) Compute FTE & agents
dash["fte_required"] = dash.apply(
    lambda r: agents_required(
        tickets=int(r["tickets_human"]),
        aht_sec=float(r["aht_sec"]),
        work_hours_effective=WH,
        occupancy=OCC,
        shrinkage=SHR
    ),
    axis=1
)
dash["agents_needed"] = dash["fte_required"].apply(ceil_agents)

# 6) SLA gap if planning available
if "df_planned_fte" in globals():
    plan = df_planned_fte.copy()
    plan["Date"] = pd.to_datetime(plan["Date"], errors="coerce")
    plan = plan.dropna(subset=["Date", "language", "fte_planned"])
    dash = dash.merge(plan[["Date", "language", "fte_planned"]], on=["Date", "language"], how="left")
    dash["fte_planned"] = dash["fte_planned"].fillna(0.0)

    work_sec = WH * 3600.0
    dash["closable_tickets"] = ((dash["fte_planned"] * work_sec * OCC) / dash["aht_sec"]) * (1.0 - SHR)
    dash["closable_tickets"] = dash["closable_tickets"].fillna(0).round().astype(int)

    dash["gap_tickets"] = (dash["tickets_human"] - dash["closable_tickets"]).clip(lower=0).astype(int)
    dash["gap_agents"]  = (dash["agents_needed"] - dash["fte_planned"]).clip(lower=0).round(0).astype(int)
else:
    dash["fte_planned"] = np.nan
    dash["closable_tickets"] = np.nan
    dash["gap_tickets"] = np.nan
    dash["gap_agents"] = np.nan

# 7) Order, export, preview
dash = dash.sort_values(["Date", "language"]).reset_index(drop=True)
cols = [
    "Date", "language", "incoming_forecast", "einstein_resolved_forecast",
    "tickets_human", "aht_sec", "fte_required", "agents_needed",
    "fte_planned", "closable_tickets", "gap_tickets", "gap_agents"
]
dash = dash[cols]

out_csv = OUTDIR / "dashboard_daily_language.csv"
dash.to_csv(out_csv, index=False)
print("Saved dashboard:", out_csv)

display(dash.head(20))


Saved dashboard: _export\dashboard_daily_language.csv


Unnamed: 0,Date,language,incoming_forecast,einstein_resolved_forecast,tickets_human,aht_sec,fte_required,agents_needed,fte_planned,closable_tickets,gap_tickets,gap_agents
0,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
1,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
2,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
3,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
4,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
5,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
6,2025-01-01,English,2,0.0,2,631.07775,0.090653,1,,,,
7,2025-01-01,English,56,0.0,56,631.07775,2.538271,3,,,,
8,2025-01-01,English,56,0.0,56,631.07775,2.538271,3,,,,
9,2025-01-01,English,56,0.0,56,631.07775,2.538271,3,,,,


## 8. Plots (stacked human tickets + total agents)

In [25]:
try:
    import plotly.express as px
    import plotly.graph_objects as go

    fig1 = px.area(
        dash, x="Date", y="tickets_human", color="language",
        title="Human tickets per day by language (stacked)"
    )
    fig1.show()

    agents_daily = dash.groupby("Date")["agents_needed"].sum().reset_index()
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(
        x=agents_daily["Date"], y=agents_daily["agents_needed"],
        mode="lines+markers", name="Agents needed (total)"
    ))
    fig2.update_layout(title="Total agents needed per day", xaxis_title="Date", yaxis_title="Agents")
    fig2.show()

except Exception as e:
    print("Plotly not available or rendering blocked. Skipping plots. Reason:", e)
