In [1]:
import numpy as np
import pandas as pd
import json
from datetime import datetime, timedelta

# -----------------------------
# 0) CONFIG
# -----------------------------
SEED = 42
np.random.seed(SEED)

AS_OF = pd.Timestamp("2026-01-15", tz="UTC")
HISTORY_DAYS = 180
HORIZON_DAYS = 7

plants = [
    {"plant_id": "MX-TOL", "name": "Toluca Plant", "lat": 19.2826, "lon": -99.6557},
    {"plant_id": "MX-IRA", "name": "Irapuato Plant", "lat": 20.6767, "lon": -101.3469},
    {"plant_id": "MX-VMX", "name": "Valley of Mexico Plant", "lat": 19.4326, "lon": -99.1332},
    {"plant_id": "MX-GDL", "name": "Guadalajara Plant", "lat": 20.6597, "lon": -103.3496},
    {"plant_id": "MX-SLP", "name": "San Luis Potosi Plant", "lat": 22.1565, "lon": -100.9855},
]

# Tus productos reales (los que listaste)
sku_names = [
    "Danone Bebible Natural Deslactosado",
    "Danone Bebible de Fresa Deslactosado",
    "Danone Bebible de Frutos Verdes Deslactosado",
    "Danone Bebible de Mango y Durazno Deslactosado",
    "Danone Natural Familiar sin azúcar",
    "Danone Natural con fermentos",
    "Danone Natural con miel",
    "Danone Familiar con trozos de durazno",
    "Danone Familiar con trozos de fresa y moras",
    "Danone Familiar con trozos de manzana",
    "Danone Familiar con trozos de piña y coco",
    "Danone Familiar con trozos de mango",
    "Danone Familiar miel y granola",
    "Danone Familiar coco y nuez con cereal",
    "Danone Familiar sabor durazno (sin trozos)",
    "Danone Familiar sabor manzana (sin trozos)",
    "Danone Familiar sabor fresa (sin trozos)",
    "Danone Griego sin azúcar",
    "Danone Griego endulzado",
    "Yoghurt con cereal de fresa con chocoarroz",
    "Yoghurt con cereal de vainilla con hojuelas de maíz",
    "Yoghurt con cereal de fresa con azucaradas",
    "Yoghurt con cereal de manzana con frutiaros",
    "Licuado de fresa y plátano con cereales",
    "Licuado de nuez con cereales",
]

# Construimos IDs simples estables
def slug(s):
    return (
        s.lower()
         .replace(" ", "-")
         .replace("á","a").replace("é","e").replace("í","i").replace("ó","o").replace("ú","u")
         .replace("ñ","n").replace("(","").replace(")","")
         .replace("/","-")
         .replace(",","")
    )

sku_catalog = []
for name in sku_names:
    sku_catalog.append({
        "sku_id": f"SKU-{slug(name)[:40]}",
        "display_name": name,
        "brand": "Danone"
    })

# -----------------------------
# 1) GENERATE SALES HISTORY
# -----------------------------
dates = pd.date_range(end=AS_OF, periods=HISTORY_DAYS, freq="D", tz="UTC")

rows = []
for p in plants:
    plant_multiplier = np.random.uniform(0.7, 1.3)  # tamaño de la planta
    for sku in sku_catalog:
        base = np.random.uniform(800, 12000) * plant_multiplier

        # Semana: patrón por día
        dow = np.array([d.dayofweek for d in dates])  # 0=Mon
        weekly = 1 + 0.10 * np.sin(2*np.pi * dow/7)

        # Tendencia suave
        t = np.arange(len(dates))
        trend = 1 + np.random.uniform(-0.0008, 0.0012) * t

        # Promo: 10% de días con uplift
        promo_flag = (np.random.rand(len(dates)) < 0.10).astype(int)
        promo_uplift = 1 + promo_flag * np.random.uniform(0.10, 0.35)

        noise = np.random.normal(1.0, 0.08, size=len(dates))  # variabilidad

        sales = np.maximum(0, base * weekly * trend * promo_uplift * noise).round().astype(int)

        # Precio demo
        price = np.random.uniform(8, 45)  # MXN por unidad (demo)
        net_sales = (sales * price).round(2)

        for i, d in enumerate(dates):
            rows.append({
                "date": d.date().isoformat(),
                "plant_id": p["plant_id"],
                "sku_id": sku["sku_id"],
                "sales_units": int(sales[i]),
                "net_sales_mxn": float(net_sales[i]),
                "promo_flag": int(promo_flag[i]),
                "price_mxn": float(round(price, 2))
            })

sales_history = pd.DataFrame(rows)

# -----------------------------
# 2) GENERATE INVENTORY HISTORY (simple stock flow)
# closing = opening + production + inbound - outbound - sales - scrap
# -----------------------------
inv_rows = []
for (plant_id, sku_id), grp in sales_history.groupby(["plant_id", "sku_id"]):
    grp = grp.sort_values("date").reset_index(drop=True)

    # Start stock ~ 10-20 days of average sales
    avg_sales = grp["sales_units"].mean()
    opening = int(avg_sales * np.random.uniform(10, 20))

    for _, r in grp.iterrows():
        sales = int(r["sales_units"])
        # production intenta cubrir un % de ventas, pero con variación
        production = int(max(0, sales * np.random.uniform(0.7, 1.15)))
        inbound = int(max(0, avg_sales * np.random.uniform(0.0, 0.25)))
        outbound = int(max(0, avg_sales * np.random.uniform(0.0, 0.15)))
        scrap = int(max(0, sales * np.random.uniform(0.0, 0.01)))

        closing = opening + production + inbound - outbound - sales - scrap
        stockout_flag = 1 if closing <= 0 else 0
        closing = max(0, closing)

        inv_rows.append({
            "date": r["date"],
            "plant_id": plant_id,
            "sku_id": sku_id,
            "opening_stock_units": opening,
            "production_units": production,
            "inbound_transfers_units": inbound,
            "outbound_transfers_units": outbound,
            "sales_units": sales,
            "scrap_units": scrap,
            "closing_stock_units": closing,
            "stockout_flag": stockout_flag
        })

        opening = closing

inventory_history = pd.DataFrame(inv_rows)

# -----------------------------
# 3) FORECAST (baseline: moving average 14d) + STOCK PROJECTION
# -----------------------------
def moving_avg_forecast(series, window=14, horizon=7):
    last = series.iloc[-window:].mean()
    return np.repeat(last, horizon)

forecast_rows = []
future_dates = pd.date_range(start=AS_OF + pd.Timedelta(days=1), periods=HORIZON_DAYS, freq="D", tz="UTC")

for (plant_id, sku_id), sales_grp in sales_history.groupby(["plant_id", "sku_id"]):
    sales_grp = sales_grp.sort_values("date")
    inv_grp = inventory_history[(inventory_history.plant_id==plant_id) & (inventory_history.sku_id==sku_id)].sort_values("date")

    hist_sales = sales_grp["sales_units"]
    fc = moving_avg_forecast(hist_sales, window=14, horizon=HORIZON_DAYS)

    # stock projection using last closing + assume planned production covers 90% of forecast
    last_stock = int(inv_grp["closing_stock_units"].iloc[-1])
    proj_stock = last_stock

    for i, d in enumerate(future_dates):
        forecast_sales = int(round(fc[i]))
        planned_prod = int(round(forecast_sales * 0.90))
        # simplistic: no transfers
        proj_stock = max(0, proj_stock + planned_prod - forecast_sales)

        # days of coverage = current projected stock / forecast daily sales
        doc = float(proj_stock / max(1, forecast_sales))
        stockout_prob = float(min(0.95, max(0.05, 1 - (doc/5))))  # demo: <5 days -> higher prob

        forecast_rows.append({
            "date": d.date().isoformat(),
            "plant_id": plant_id,
            "sku_id": sku_id,
            "model_id": "baseline_moving_avg_14d",
            "forecast_sales_units": forecast_sales,
            "predicted_closing_stock_units": int(proj_stock),
            "predicted_days_of_coverage": round(doc, 2),
            "stockout_probability": round(stockout_prob, 2)
        })

forecast_output = pd.DataFrame(forecast_rows)

# -----------------------------
# 4) LIVE RISK SCORING (today snapshot)
# -----------------------------
# Use min predicted DoC in horizon and stockout_probability peak to define risk
live_rows = []
for (plant_id, sku_id), fgrp in forecast_output.groupby(["plant_id","sku_id"]):
    min_doc = fgrp["predicted_days_of_coverage"].min()
    max_prob = fgrp["stockout_probability"].max()

    # Risk score demo: weighted prob + inverse coverage
    risk = 100 * (0.65 * max_prob + 0.35 * min(1.0, (5 - min_doc)/5))
    risk = int(round(np.clip(risk, 0, 100)))

    next_stockout = None
    # first day where predicted stock hits 0
    zeros = fgrp[fgrp["predicted_closing_stock_units"] == 0]
    if len(zeros) > 0:
        next_stockout = zeros.iloc[0]["date"]

    live_rows.append({
        "plant_id": plant_id,
        "sku_id": sku_id,
        "risk_score": risk,
        "min_predicted_days_of_coverage": float(min_doc),
        "max_stockout_probability": float(max_prob),
        "next_stockout_date": next_stockout
    })

live_sku_risk = pd.DataFrame(live_rows)

def band(score):
    if score >= 90: return "Critical"
    if score >= 70: return "High"
    if score >= 40: return "Medium"
    return "Low"

live_sku_risk["risk_band"] = live_sku_risk["risk_score"].apply(band)

# Plant-level aggregation
plant_summary = (live_sku_risk
                 .groupby("plant_id")
                 .agg(global_risk_score=("risk_score","max"),
                      skus_at_risk=("risk_score", lambda s: int((s>=40).sum())),
                      critical_skus=("risk_score", lambda s: int((s>=90).sum())))
                 .reset_index())

# -----------------------------
# 5) BUILD JSON PAYLOAD
# -----------------------------
sku_lookup = {s["sku_id"]: s for s in sku_catalog}
plant_lookup = {p["plant_id"]: p for p in plants}

plants_payload = []
for _, pr in plant_summary.iterrows():
    pid = pr["plant_id"]
    pinfo = plant_lookup[pid]
    plant_risk = int(pr["global_risk_score"])
    plant_band = band(plant_risk)

    # Top 10 SKUs by risk for this plant
    top = (live_sku_risk[live_sku_risk.plant_id==pid]
           .sort_values(["risk_score","max_stockout_probability"], ascending=False)
           .head(10))

    top_risks = []
    for rank, (_, r) in enumerate(top.iterrows(), start=1):
        top_risks.append({
            "rank": rank,
            "sku_id": r["sku_id"],
            "sku_name": sku_lookup[r["sku_id"]]["display_name"],
            "risk_score": int(r["risk_score"]),
            "risk_band": r["risk_band"],
            "predicted_min_days_of_coverage": round(float(r["min_predicted_days_of_coverage"]), 2),
            "max_stockout_probability": round(float(r["max_stockout_probability"]), 2),
            "expected_stockout_date": r["next_stockout_date"]
        })

    plants_payload.append({
        "plant_id": pid,
        "name": pinfo["name"],
        "location": {"lat": pinfo["lat"], "lon": pinfo["lon"]},
        "global_risk_score": plant_risk,
        "risk_band": plant_band,
        "skus_at_risk_count": int(pr["skus_at_risk"]),
        "critical_skus_count": int(pr["critical_skus"]),
        "top_risks": top_risks
    })

payload = {
    "meta": {
        "product": "Risk Intelligence",
        "pilot": "MX Pilot",
        "language": "en",
        "horizon_days": HORIZON_DAYS,
        "as_of_utc": AS_OF.isoformat(),
        "currency": "MXN",
        "seed": SEED
    },
    "tabs": [
        {"id": "live_risk", "label": "Live Risk"},
        {"id": "historical_forecast", "label": "Historical & Forecast"}
    ],
    "catalog": {
        "skus": sku_catalog,
        "plants": [{"plant_id": p["plant_id"], "name": p["name"]} for p in plants]
    },
    "live_risk": {
        "plants": plants_payload
    },
    "historical_forecast": {
        "granularity": "daily",
        "default_history_days": HISTORY_DAYS,
        "datasets": {
            "sales_history": sales_history.to_dict(orient="records"),
            "inventory_history": inventory_history.to_dict(orient="records"),
            "forecast_output": forecast_output.to_dict(orient="records")
        },
        "models": [
            {"model_id":"baseline_moving_avg_14d", "type":"baseline", "description":"14-day moving average sales forecast + stock projection"}
        ]
    }
}

with open("demo_payload.json", "w", encoding="utf-8") as f:
    json.dump(payload, f, ensure_ascii=False, indent=2)

sales_history.to_csv("sales_history_demo.csv", index=False)
inventory_history.to_csv("inventory_history_demo.csv", index=False)
forecast_output.to_csv("forecast_output_demo.csv", index=False)

print("Saved: demo_payload.json, sales_history_demo.csv, inventory_history_demo.csv, forecast_output_demo.csv")


Saved: demo_payload.json, sales_history_demo.csv, inventory_history_demo.csv, forecast_output_demo.csv
