
# Scan Strategy Demo — Noon-only (OOS Count), Aisle vs Shelf vs RL

This notebook simulates a 30‑aisle grocery store where each aisle has **exactly 20 shelves**. Inventory evolves independently of scanning; **scans are observational** and do **not** affect stockouts or restocking.

**One scan time per day:** noon (12:00).  
**Budget:** 200 shelves per day.

**Reward (coverage):** the number of **OOS items observed at noon** among the scanned set.

We compare four strategies under identical budget:
1. **Random Aisle** – pick 10 aisles and scan **all 20 shelves** in each (= 200 shelves).  
2. **Bandit (Aisle TS)** – Thompson Sampling over aisles; pick 10 aisles, scan all shelves in each.  
3. **Bandit (Shelf TS)** – Thompson Sampling over **shelves**; pick **any 200 shelves** across the store.  
4. **RL (Q-learning, Shelf)** – per‑shelf Q values (optimistic initialization); pick **any 200 shelves** by Q.

At the end, you’ll get:
- **Cumulative OOS captured** plots (overlay + per‑strategy),
- A **totals** bar chart,
- Optional **MP4 animations** and a side-by-side **HTML page**.


In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, writers

# Global matplotlib defaults
plt.rcParams['figure.figsize'] = (7.2, 4.2)

# --- Simulation horizon ---
STEPS_PER_DAY = 48
DAYS = 30
NOON_STEP = 24  # scan happens at noon

# --- Store ---
N_AISLES = 30
SHELVES_PER_AISLE = 20  # fixed for clarity; total shelves = 600

# --- Categories ---
CATS = {
    "produce":   {"perish": True,  "base_oos": 0.020},
    "meat":      {"perish": True,  "base_oos": 0.018},
    "dairy":     {"perish": True,  "base_oos": 0.015},
    "frozen":    {"perish": True,  "base_oos": 0.012},
    "bakery":    {"perish": True,  "base_oos": 0.016},
    "snacks":    {"perish": False, "base_oos": 0.006},
    "beverages": {"perish": False, "base_oos": 0.006},
    "household": {"perish": False, "base_oos": 0.004},
    "pantry":    {"perish": False, "base_oos": 0.005},
}
CAT_LIST = list(CATS.keys())

# --- Aisle hotness multipliers (for heterogeneity) ---
# Spread signal beyond 10 aisles so shelf-level targeting wins clearly.
HOTNESS = np.array([3.0]*8 + [2.0]*10 + [1.2]*8 + [0.7]*4)  # length 30


In [None]:

def base_tod_mult(step_mod):
    hour = step_mod / (STEPS_PER_DAY / 24.0)
    if 22 <= hour or hour < 6:   return 0.6
    if 6 <= hour < 11:           return 1.0
    if 11 <= hour < 14:          return 1.7  # noon surge
    if 14 <= hour < 18:          return 1.6
    return 1.4

def cat_time_mult(is_perish, step_mod):
    hour = step_mod / (STEPS_PER_DAY / 24.0)
    if is_perish:
        if 9 <= hour < 13:      return 2.5
        if 13 <= hour < 16:     return 1.3
        else:                   return 0.8
    else:
        if 7 <= hour < 12:      return 0.9
        if 12 <= hour < 16:     return 1.1
        else:                   return 0.8

def restock_hazard(is_perish, step_mod):
    hour = step_mod / (STEPS_PER_DAY / 24.0)
    if is_perish:
        if 14 <= hour < 18:   return 0.40
        if 18 <= hour < 22:   return 0.18
        if 6 <= hour < 14:    return 0.05
        else:                 return 0.05
    else:
        if 22 <= hour or hour < 6: return 0.28
        else:                      return 0.03


### Build a synthetic store

In [None]:

def build_store(seed=2026):
    rs = np.random.default_rng(seed)

    # Assign aisle-level categories (roughly 50/50 perish vs non-perish)
    is_perish_aisle = rs.random(N_AISLES) < 0.5
    cat_per_aisle = np.empty(N_AISLES, dtype=object)
    for a in range(N_AISLES):
        choices = [c for c in CAT_LIST if CATS[c]["perish"] == bool(is_perish_aisle[a])]
        cat_per_aisle[a] = rs.choice(choices)

    # Shuffle hotness across aisles
    hotness = HOTNESS.copy()
    rs.shuffle(hotness)

    # Expand to shelves
    S = N_AISLES * SHELVES_PER_AISLE
    shelf_to_aisle = np.repeat(np.arange(N_AISLES), SHELVES_PER_AISLE)
    shelf_is_perish = np.repeat([CATS[cat_per_aisle[a]]["perish"] for a in range(N_AISLES)], SHELVES_PER_AISLE)
    shelf_hot_mult  = np.repeat(hotness, SHELVES_PER_AISLE)
    shelf_cat       = np.repeat(cat_per_aisle, SHELVES_PER_AISLE)
    shelf_base_oos  = np.zeros(S, dtype=float)

    for a in range(N_AISLES):
        base = CATS[cat_per_aisle[a]]["base_oos"]
        lo = a*SHELVES_PER_AISLE; hi = lo + SHELVES_PER_AISLE
        # per-shelf heterogeneity within aisle
        shelf_base_oos[lo:hi] = np.clip(
            rs.normal(base, base*0.30, size=SHELVES_PER_AISLE),
            base*0.4, base*1.8
        )

    store = {
        "shelf_to_aisle": shelf_to_aisle,
        "shelf_is_perish": shelf_is_perish,
        "shelf_base_oos": shelf_base_oos,
        "shelf_hot_mult": shelf_hot_mult,
        "shelf_cat": shelf_cat,
    }
    return store

store = build_store(seed=2026)
S = len(store["shelf_to_aisle"])
print(f"Total shelves: {S}  |  Aisles: {N_AISLES}  |  Shelves/aisle: {SHELVES_PER_AISLE}")


### Simulate 30 days of inventory (scans are observational)

In [None]:

def simulate_month(store, seed=5001):
    rs = np.random.default_rng(seed)
    S = len(store["shelf_to_aisle"])
    instock = np.ones(S, dtype=bool)
    age = np.zeros(S, dtype=int)
    noon_oos = np.zeros((DAYS, S), dtype=bool)

    for day in range(DAYS):
        for step in range(STEPS_PER_DAY):
            step_mod = step

            # In-stock -> OOS
            instock_idx = instock
            if instock_idx.any():
                base = store["shelf_base_oos"][instock_idx]
                hot  = store["shelf_hot_mult"][instock_idx]
                perish = store["shelf_is_perish"][instock_idx]
                p = base * hot * np.array([base_tod_mult(step_mod)] * np.sum(instock_idx))
                p *= np.array([cat_time_mult(per, step_mod) for per in perish])
                p = np.clip(p, 0.0, 0.7)
                flip = rs.random(np.sum(instock_idx)) < p
                if flip.any():
                    idxs = np.where(instock_idx)[0][flip]
                    instock[idxs] = False
                    age[idxs] = 1

            # OOS evolution (restock independent of scans)
            oos_idx = ~instock
            if oos_idx.any():
                age[oos_idx] += 1
                perish = store["shelf_is_perish"][oos_idx]
                hz = np.array([restock_hazard(per, step_mod) for per in perish])
                restock = rs.random(np.sum(oos_idx)) < hz
                if restock.any():
                    idxs2 = np.where(oos_idx)[0][restock]
                    instock[idxs2] = True
                    age[idxs2] = 0

            # noon snapshot (OOS boolean)
            if step == NOON_STEP:
                noon_oos[day, :] = ~instock

    return noon_oos

noon_oos = simulate_month(store, seed=5001)
noon_oos.shape, noon_oos.mean()


### Policies

In [None]:

RS = np.random.default_rng(777)

def random_aisle_select(day):
    return RS.choice(N_AISLES, size=10, replace=False)

# Aisle TS: Beta-Binomial over noon OOS rate per shelf
aisle_alpha = np.ones(N_AISLES)
aisle_beta  = np.ones(N_AISLES)

def ts_aisle_select(day):
    theta = RS.beta(aisle_alpha, aisle_beta)  # expected OOS per shelf at noon
    score = theta * SHELVES_PER_AISLE        # aisle expected OOS count
    idx = np.argpartition(-score, 10)[:10]
    return idx

def ts_aisle_update(day, aisles, noon_oos):
    global aisle_alpha, aisle_beta
    for a in aisles:
        lo, hi = a*SHELVES_PER_AISLE, (a+1)*SHELVES_PER_AISLE
        oos = int(noon_oos[day, lo:hi].sum())
        aisle_alpha[a] += oos
        aisle_beta[a]  += (SHELVES_PER_AISLE - oos)

# Shelf TS with informative priors (noon prior)
def noon_prior_prob(store):
    step = NOON_STEP
    base = store["shelf_base_oos"]
    hot  = store["shelf_hot_mult"]
    perish = store["shelf_is_perish"]
    p = base * hot * base_tod_mult(step) * np.array([cat_time_mult(per, step) for per in perish])
    return np.clip(p, 0.01, 0.6)

def init_shelf_ts_priors(store, prior_strength=20):
    p0 = noon_prior_prob(store)
    alpha0 = 1.0 + prior_strength * p0
    beta0  = 1.0 + prior_strength * (1.0 - p0)
    return alpha0, beta0

shelf_alpha, shelf_beta = init_shelf_ts_priors(store, prior_strength=20)

def ts_shelf_select(day, K=200):
    theta = RS.beta(shelf_alpha, shelf_beta)
    idx = np.argpartition(-theta, K)[:K]
    return idx

def ts_shelf_update(day, shelves, noon_oos):
    global shelf_alpha, shelf_beta
    obs = noon_oos[day, shelves].astype(int)
    shelf_alpha[shelves] += obs
    shelf_beta[shelves]  += (1 - obs)

# RL per-shelf Q (optimistic init from prior p0), TD(0) update
def init_Q_per_shelf(store):
    return noon_prior_prob(store).copy()

def select_topK_by_Q(Qs, K=200):
    return np.argpartition(-Qs, K)[:K]

def update_Q(Qs, shelves, noon_oos, day, alpha_q=0.35):
    obs = noon_oos[day, shelves].astype(float)
    Qs[shelves] = (1 - alpha_q) * Qs[shelves] + alpha_q * obs
    return Qs


### Run the 30‑day experiment

In [None]:

def run_experiment(store, noon_oos, prior_strength_shelf_ts=20, alpha_q=0.35):
    # Reset priors
    global aisle_alpha, aisle_beta, shelf_alpha, shelf_beta
    aisle_alpha = np.ones(N_AISLES); aisle_beta = np.ones(N_AISLES)
    shelf_alpha, shelf_beta = init_shelf_ts_priors(store, prior_strength=prior_strength_shelf_ts)
    Qs = init_Q_per_shelf(store)

    results = {"Random Aisle": [], "Bandit (Aisle TS)": [], "Bandit (Shelf TS)": [], "RL (Q-learning, Shelf)": []}

    for day in range(DAYS):
        # Random Aisle (scan ALL shelves in 10 aisles)
        ais_rand = random_aisle_select(day)
        shelves_rand = np.concatenate([np.arange(a*SHELVES_PER_AISLE, (a+1)*SHELVES_PER_AISLE) for a in ais_rand])
        r_rand = int(noon_oos[day, shelves_rand].sum())
        results["Random Aisle"].append(r_rand)

        # Aisle TS
        ais_ts = ts_aisle_select(day)
        shelves_ts_a = np.concatenate([np.arange(a*SHELVES_PER_AISLE, (a+1)*SHELVES_PER_AISLE) for a in ais_ts])
        r_ts_a = int(noon_oos[day, shelves_ts_a].sum())
        results["Bandit (Aisle TS)"].append(r_ts_a)
        ts_aisle_update(day, ais_ts, noon_oos)

        # Shelf TS
        shelves_ts = ts_shelf_select(day, K=200)
        r_ts_s = int(noon_oos[day, shelves_ts].sum())
        results["Bandit (Shelf TS)"].append(r_ts_s)
        ts_shelf_update(day, shelves_ts, noon_oos)

        # RL (per-shelf Q)
        shelves_q = select_topK_by_Q(Qs, K=200)
        r_q = int(noon_oos[day, shelves_q].sum())
        results["RL (Q-learning, Shelf)"].append(r_q)
        Qs = update_Q(Qs, shelves_q, noon_oos, day, alpha_q=alpha_q)

    return results

daily = run_experiment(store, noon_oos, prior_strength_shelf_ts=20, alpha_q=0.35)
summary = pd.DataFrame({
    "strategy": list(daily.keys()),
    "total_OOS_captured": [int(np.sum(daily[k])) for k in daily.keys()]
}).sort_values("total_OOS_captured", ascending=False).reset_index(drop=True)
summary


### Visualize results (overlay + totals bar)

In [None]:

# Overlay cumulative
fig, ax = plt.subplots()
for k in daily.keys():
    ax.plot(np.cumsum(daily[k]), label=k)
ax.set_xlabel("Day"); ax.set_ylabel("Cumulative OOS captured"); ax.set_title("Noon-only — Equal budget (200 shelves/day)")
ax.legend(loc="lower right")
plt.show()

# Totals bar chart
fig, ax = plt.subplots()
summary2 = pd.DataFrame({
    "strategy": list(daily.keys()),
    "total_OOS_captured": [int(np.sum(daily[k])) for k in daily.keys()]
}).sort_values("total_OOS_captured", ascending=False)
ax.bar(summary2["strategy"], summary2["total_OOS_captured"])
ax.set_ylabel("Total OOS captured at noon (30 days)"); ax.set_title("Higher is better")
plt.xticks(rotation=20, ha='right')
plt.tight_layout()
plt.show()



### (Optional) Generate MP4 animations and a side-by-side HTML page

> Requires `ffmpeg` for MP4. If unavailable, you can skip this section or adapt to GIF.


In [None]:

def animate_series(daily_values, title, outfile_base):
    cum = np.cumsum(daily_values)
    days = np.arange(1, len(daily_values)+1)
    fig, ax = plt.subplots()
    line, = ax.plot([], [])
    ax.set_xlim(1, len(days))
    ax.set_ylim(0, float(cum.max()) * 1.05)
    ax.set_xlabel("Day")
    ax.set_ylabel("Cumulative OOS captured")
    ax.set_title(title)
    def init():
        line.set_data([], []); return (line,)
    def update(frame):
        x = days[:frame+1]; y = cum[:frame+1]
        line.set_data(x, y); return (line,)
    anim = FuncAnimation(fig, update, frames=len(days), init_func=init, blit=True, interval=200)
    mp4_path = f"anim_oos_{outfile_base}.mp4"
    try:
        Writer = writers['ffmpeg']
        writer = Writer(fps=6, metadata=dict(artist='scan-strategy-demo'), bitrate=2200)
        anim.save(mp4_path, writer=writer)
    except Exception:
        print("ffmpeg not available; skipping MP4 render for", outfile_base)
    plt.close(fig)

for key, base in [
    ("Random Aisle", "random"),
    ("Bandit (Aisle TS)", "ts_aisle"),
    ("Bandit (Shelf TS)", "ts_shelf"),
    ("RL (Q-learning, Shelf)", "q_shelf"),
]:
    animate_series(daily[key], f"{key} — Cumulative OOS captured", base)

# Overlay animation
def animate_overlay(daily_dict, title, outfile_base):
    days = np.arange(1, DAYS+1)
    keys = list(daily_dict.keys())
    cum = {k: np.cumsum(daily_dict[k]) for k in keys}
    fig, ax = plt.subplots()
    lines = [ax.plot([], [])[0] for _ in keys]
    ax.set_xlim(1, DAYS)
    ymax = max(float(v[-1]) for v in cum.values()) * 1.05
    ax.set_ylim(0, ymax)
    ax.set_xlabel("Day"); ax.set_ylabel("Cumulative OOS captured"); ax.set_title(title)
    ax.legend(keys, loc="lower right")
    def init():
        for ln in lines: ln.set_data([], [])
        return tuple(lines)
    def update(frame):
        x = days[:frame+1]
        for i, k in enumerate(keys):
            y = cum[k][:frame+1]; lines[i].set_data(x, y)
        return tuple(lines)
    anim = FuncAnimation(fig, update, frames=DAYS, init_func=init, blit=True, interval=200)
    mp4_path = f"anim_oos_overlay.mp4"
    try:
        Writer = writers['ffmpeg']
        writer = Writer(fps=6, metadata=dict(artist='scan-strategy-demo'), bitrate=2200)
        anim.save(mp4_path, writer=writer)
    except Exception:
        print("ffmpeg not available; skipping MP4 render for overlay")
    plt.close(fig)

animate_overlay(daily, "Comparison (Noon-only; OOS captured)", "overlay")


### Export daily results to CSV (for slides/dashboards)

In [None]:

df_daily = pd.DataFrame(daily)
df_daily.index.name = "day"
csv_path = "daily_oos_captured.csv"
df_daily.to_csv(csv_path, index=True)
print("Saved:", csv_path)
df_daily.head()



### Knobs to tweak
- **Heterogeneity:** edit `HOTNESS` composition or per‑category base rates.
- **Learning behavior:** change shelf‑TS `prior_strength` or RL `alpha_q`.
- **Budget:** keep equal across strategies (always 200 shelves/day) for fair comparison.
- **Scan time:** change `NOON_STEP` if you want a different daily scan time.
