In [36]:
import pandas as pd
import numpy as np
from pathlib import Path

M5_DIR = Path("../data/raw/m5")   # change if needed

def build_df_base(store_ids, max_ids=3000):
    """
    Builds the base dataframe used by all agents.
    """
    sales = pd.read_csv(M5_DIR / "sales_train_validation.csv")
    calendar = pd.read_csv(M5_DIR / "calendar.csv")
    prices = pd.read_csv(M5_DIR / "sell_prices.csv")

    # Filter to selected stores (regions)
    sales = sales[sales["store_id"].isin(store_ids)].copy()

    # Limit number of SKU-store series (for stability)
    if max_ids is not None:
    # Take max_ids per store
        keep_ids = (
            sales.groupby("store_id")["id"]
            .unique()
            .apply(lambda x: x[:max_ids])
        )
        # Flatten list
        keep_ids = [item for sublist in keep_ids for item in sublist]
        sales = sales[sales["id"].isin(keep_ids)].copy()

    # Convert wide -> long
    d_cols = [c for c in sales.columns if c.startswith("d_")]
    id_cols = ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]

    df = sales[id_cols + d_cols].melt(
        id_vars=id_cols,
        var_name="d",
        value_name="demand"
    )

    # Merge calendar (date + week)
    calendar["date"] = pd.to_datetime(calendar["date"])
    df = df.merge(
        calendar[["d", "date", "wm_yr_wk"]],
        on="d",
        how="left"
    )

    # Merge prices
    prices = prices.rename(columns={"sell_price": "price"})
    df = df.merge(
        prices[["store_id", "item_id", "wm_yr_wk", "price"]],
        on=["store_id", "item_id", "wm_yr_wk"],
        how="left"
    )

    # Clean types
    df["date"] = pd.to_datetime(df["date"])
    df["demand"] = df["demand"].astype(np.float32)
    df["price"] = df["price"].astype(np.float32)

    # Sort properly
    df = df.sort_values(["id", "date"]).reset_index(drop=True)

    return df

In [13]:
sales = pd.read_csv(M5_DIR / "sales_train_validation.csv")

In [17]:
store_ids=["CA_1", "TX_1", "WI_1"]
sales = sales[sales["store_id"].isin(store_ids)].copy()


In [45]:
df_base = build_df_base(
    store_ids=["CA_1", "TX_1", "WI_1"],  # all regions
    max_ids=3000                         # safe size
)

print(df_base.shape)

(17217000, 11)


In [46]:
df_base['store_id'].value_counts()

store_id
CA_1    5739000
TX_1    5739000
WI_1    5739000
Name: count, dtype: int64

In [38]:
avg_price = (
    df_base[
        (df_base["item_id"] == "FOODS_3_090") &
        (df_base["store_id"] == "CA_1")
    ]["price"].mean()
)

In [47]:
import sys
from pathlib import Path
project_root = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
sys.path.insert(0, str(project_root))
from backend.forecast_agent import ForecastAgent


In [49]:
from google import genai
import os
# Set env var first in Colab:
# os.environ["GEMINI_API_KEY"] = "YOUR_KEY"


agent = ForecastAgent(
    model_dir="../models",
    df_base=df_base
)

out = agent.forecast(
    item_ids=["FOODS_3_090", "HOBBIES_1_001"],
    store_id="CA_1",
    start_date="2016-03-28",
    end_date="2016-04-24"
)

print(out)

{'start_date': '2016-03-28', 'end_date': '2016-04-24', 'results': [{'item_id': 'FOODS_3_090', 'store_id': 'CA_1', 'series_id': 'FOODS_3_090_CA_1_validation', 'daily_forecast':            date  forecast
1885 2016-03-28      25.0
1886 2016-03-29      47.0
1887 2016-03-30      23.0
1888 2016-03-31      21.0
1889 2016-04-01      72.0
1890 2016-04-02      58.0
1891 2016-04-03      54.0
1892 2016-04-04      42.0
1893 2016-04-05      29.0
1894 2016-04-06      53.0
1895 2016-04-07      39.0
1896 2016-04-08      42.0
1897 2016-04-09      83.0
1898 2016-04-10      83.0
1899 2016-04-11      48.0
1900 2016-04-12      26.0
1901 2016-04-13      41.0
1902 2016-04-14      44.0
1903 2016-04-15      47.0
1904 2016-04-16      82.0
1905 2016-04-17      83.0
1906 2016-04-18      30.0
1907 2016-04-19      45.0
1908 2016-04-20      29.0
1909 2016-04-21      53.0
1910 2016-04-22      87.0
1911 2016-04-23      95.0
1912 2016-04-24      42.0, 'total_units': 1423.0}, {'item_id': 'HOBBIES_1_001', 'store_id': 'CA_

In [54]:
from backend.simulation_agent import SimulationAgent

In [52]:
forecast_df = out['results'][0]["daily_forecast"]

In [8]:
sim_agent = SimulationAgent(
    n_simulations=500,
    demand_cv=0.25,
    lead_time_days=2
)

sim_result = sim_agent.simulate(
    forecast_df=forecast_df,
    item_id="FOODS_3_090",
    store_id="CA_1",
    s=80,
    Q=120
)

sim_result

{'item_id': 'FOODS_3_090',
 'store_id': 'CA_1',
 'policy': {'s': 80, 'Q': 120},
 'lead_time_days': 2,
 'initial_inventory': 80,
 'results': {'expected_fill_rate': 0.8617199243804855,
  'stockout_probability': 1.0,
  'avg_stockout_days': 5.592,
  'expected_lost_units': 198.29348497883825,
  'avg_inventory': 48.05153724327553,
  'avg_orders': 10.708},
 'scenario_summary': {'mean_total_demand': 1423.6870711196088,
  'p95_total_demand': 1539.8132012016497}}

In [53]:
from backend.optimization_agent import OptimizationAgent

In [15]:
sim_agent = SimulationAgent(
    n_simulations=500,
    demand_cv=0.25,
    lead_time_days=2
)

sim_result = sim_agent.simulate(
    forecast_df=out['results'][0]["daily_forecast"],
    item_id="FOODS_3_090",
    store_id="CA_1",
    s=80,
    Q=120
)

sim_result

{'item_id': 'FOODS_3_090',
 'store_id': 'CA_1',
 'policy': {'s': 80, 'Q': 120},
 'lead_time_days': 2,
 'initial_inventory': 80,
 'results': {'expected_fill_rate': 0.8617199243804855,
  'stockout_probability': 1.0,
  'avg_stockout_days': 5.592,
  'expected_lost_units': 198.29348497883825,
  'avg_inventory': 48.05153724327553,
  'avg_orders': 10.708},
 'scenario_summary': {'mean_total_demand': 1423.6870711196088,
  'p95_total_demand': 1539.8132012016497}}

In [16]:
opt_agent = OptimizationAgent(
    simulation_agent=sim_agent,
    holding_cost_rate=0.25,        # 25% annually
    order_cost=20.0,               # $20 per order
    stockout_cost_multiplier=3.0,  # lost sales penalty
    target_fill_rate=0.95
)

# Candidate policies to try
s_candidates = range(50, 201, 25)
Q_candidates = range(100, 401, 50)

opt_result = opt_agent.optimize(
    forecast_df=out['results'][0]["daily_forecast"],
    item_id="FOODS_3_090",
    store_id="CA_1",
    avg_price=avg_price,
    s_candidates=s_candidates,
    Q_candidates=Q_candidates
)

In [17]:
opt_result["best_policy"]

{'item_id': 'FOODS_3_090',
 'store_id': 'CA_1',
 's': 200,
 'Q': 400,
 'fill_rate': 0.999617803249471,
 'total_cost': 90.07352447509766,
 'holding_cost': 7.709774971008301,
 'ordering_cost': 80.03999999999999,
 'stockout_cost': 2.323746681213379}

In [13]:
# 3. Build payload for LLM
llm_payload = {
    "store_id": "CA_1",
    "start_date": "2016-03-28",
    "end_date": "2016-04-24",
    "service_level_target": 95,
    "mode": "auto_optimized",
    "products": [{
        "item_id": opt_result["best_policy"]["item_id"],
        "total_units": int(forecast_df["forecast"].sum()),
        "avg_daily_units": forecast_df["forecast"].mean(),
        "s": opt_result["best_policy"]["s"],
        "Q": opt_result["best_policy"]["Q"],
        "fill_rate": opt_result["best_policy"]["fill_rate"] * 100,
        "total_cost": opt_result["best_policy"]["total_cost"],
        "holding_cost": opt_result["best_policy"]["holding_cost"],
        "ordering_cost": opt_result["best_policy"]["ordering_cost"],
        "stockout_cost": opt_result["best_policy"]["stockout_cost"]
    }]
}


In [55]:
client = genai.Client(api_key="AIzaSyBocPfqs2l5STBBraEzKASioqBuCiEu1xw")


In [None]:
# 4. Generate final explanation
final_summary = opt_agent.generate_llm_summary(
    llm_client=client,
    llm_model="gemini-3-flash-preview",
    payload=llm_payload
)

print(final_summary)

NameError: name 'opt_agent' is not defined

In [57]:
from backend.agent_pipeline import run_inventory_planner

In [58]:
agent = ForecastAgent(
    model_dir="../models",
    df_base=df_base
)

In [59]:
results = run_inventory_planner(
    store_id="CA_1",
    item_ids=["FOODS_3_090"],
    start_date="2016-03-28",
    end_date="2016-04-24",
    service_level_target=0.95,
    mode="auto",
    df_base=df_base,
    forecast_agent=agent,
    llm_client=client
)

print(results["llm_summaries"])
results["policies"]

{'FOODS_3_090': 'For the upcoming month, we expect strong sales for FOODS_3_090, with demand reaching over 1,400 units. To maintain consistent availability, we recommend ordering 500 units whenever your stock levels dip below 200. This strategy ensures the product is on the shelf nearly 100% of the time while keeping your storage and ordering costs balanced at a low total of approximately $73. By ordering in these specific quantities, we minimize the frequency of deliveries and the associated labor costs without overstocking the store. To get started, please update your inventory alerts to the 200-unit threshold and ensure your backroom has the capacity for 500-unit shipments. Finally, monitor daily sales closely to confirm that the high-volume demand stays on track with our projections.'}


[{'item_id': 'FOODS_3_090',
  'store_id': 'CA_1',
  's': 200,
  'Q': 500,
  'fill_rate': 0.9998816637256905,
  'total_cost': 72.58704376220703,
  'holding_cost': 9.355648040771484,
  'ordering_cost': 62.519999999999996,
  'stockout_cost': 0.7113962173461914}]