# Prop 47 (CA) — State-level Synthetic Control (2010–2024)

This notebook is a **clean replication** of the project:
- Builds / loads a **state-month covered-population panel**
- Runs **Synthetic Control** for CA with **in-space placebos** and **RMSPE filtering**
- Fits weights using **pre-COVID window** (through 2019-12) and evaluates:
  - **Post1:** 2014-11..2019-12 (or spec-defined)
  - **COVID:** 2020-03..2021-12 (descriptive only)
  - **Post2:** 2022-01..2024-12 (descriptive only)

> Recommended workflow: keep `data/processed/state_month_covered.parquet` in the repo and set `REBUILD_PANEL=False`.


In [None]:
# ============================================================
# 0) Setup
# ============================================================
from __future__ import annotations

from pathlib import Path
from typing import List, Dict, Any, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cvxpy as cp

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 160)


In [None]:
# ============================================================
# 1) Config
# ============================================================
REPO_ROOT = Path(".")
DATA_DIR = REPO_ROOT / "data"
RAW_DIR = DATA_DIR / "raw"   # optional: only used if REBUILD_PANEL=True
PROCESSED_PATH = DATA_DIR / "processed" / "state_month_covered.parquet"

OUT_DIR = REPO_ROOT / "outputs"
FIG_DIR = OUT_DIR / "figures"
TAB_DIR = OUT_DIR / "tables"
FIG_DIR.mkdir(parents=True, exist_ok=True)
TAB_DIR.mkdir(parents=True, exist_ok=True)

# Toggle: rebuild processed panel from raw yearly parquet files
REBUILD_PANEL = False

# Expected raw pattern if rebuild:
# data/raw/offenses_known_monthly_{year}.parquet
RAW_FILE_PATTERN = "offenses_known_monthly_{year}.parquet"
YEARS = range(2010, 2025)

# Columns (raw parquet)
RAW_COLUMNS = [
    "state_abb", "ori", "year", "month", "number_of_months_reported",
    "population", "actual_theft_total", "actual_index_violent"
]

# State code column names (processed)
STATE_COL = "state_abb"
DATE_COL = "date"

# Outcomes (processed)
OUTCOME_THEFT = "theft_per_100k_coveredpop"
OUTCOME_VIOL  = "violent_per_100k_coveredpop"
COVERAGE_COL  = "coverage_rate"

# Treated
TREATED = "CA"

# Donor exclusions (DQ + non-states / territories)
DQ_EXCLUDED = set(["AR","HI","IN","MI","MS","MT","NE","NH","NY","OH","PA","SD","UT","WV","OR","CZ","PR","GU"])

# Time windows
DATE_MIN = pd.Timestamp("2010-01-01")
FIT_END  = pd.Timestamp("2019-12-01")   # fit weights only through 2019
FULL_END = pd.Timestamp("2024-12-01")   # evaluate through 2024

# COVID segmentation (descriptive)
COVID_START = pd.Timestamp("2020-03-01")
COVID_END   = pd.Timestamp("2021-12-01")
POST2_START = pd.Timestamp("2022-01-01")

# Specs: (spec_id, outcome_col, t0, pre_start)
SPECS = [
    ("S0", OUTCOME_THEFT, pd.Timestamp("2014-11-01"), pd.Timestamp("2010-01-01")),
    ("S1", OUTCOME_THEFT, pd.Timestamp("2015-01-01"), pd.Timestamp("2010-01-01")),
    ("S2", OUTCOME_THEFT, pd.Timestamp("2014-11-01"), pd.Timestamp("2012-01-01")),
    ("N0", OUTCOME_VIOL,  pd.Timestamp("2014-11-01"), pd.Timestamp("2010-01-01")),
]

# Placebo filter multipliers (tighter = stricter)
PRE_RMSPE_MULTS = [2.0, 1.5]

MIN_DONORS = 5


In [None]:
# ============================================================
# 2) Helpers (panel build + validation)
# ============================================================
MISSING_TOKENS = {"", "none", "nan", "null"}

def mstart(x) -> pd.Timestamp:
    return pd.to_datetime(x).to_period("M").to_timestamp()

def normalize_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = df.columns.astype(str).str.strip()
    df[DATE_COL] = pd.to_datetime(df[DATE_COL]).dt.to_period("M").dt.to_timestamp()
    return df

def require_cols(df: pd.DataFrame, cols: List[str]) -> None:
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise KeyError(f"Missing required columns: {missing}")

def flag_missing(group: pd.DataFrame) -> pd.DataFrame:
    """Mark n_missing months within an ORI-year as missing using number_of_months_reported."""
    n_missing = int(12 - group["number_of_months_reported"].iloc[0])
    n_missing = max(0, min(n_missing, len(group)))
    group = group.sort_values(["actual_theft_total", "actual_index_violent"], ascending=True).copy()
    group["month_missing"] = False
    if n_missing > 0:
        group.iloc[:n_missing, group.columns.get_loc("month_missing")] = True
    return group

def build_state_month_covered_from_raw(raw_dir: Path, years=range(2010, 2025)) -> pd.DataFrame:
    frames = []
    for year in years:
        fp = raw_dir / RAW_FILE_PATTERN.format(year=year)
        if not fp.exists():
            raise FileNotFoundError(f"Missing raw file: {fp}")
        df = pd.read_parquet(fp, columns=RAW_COLUMNS)

        # Drop bad state codes / non-states / nulls
        s = df["state_abb"].astype("string").str.strip()
        s_lower = s.str.lower()
        bad = s.isna() | s_lower.isin(MISSING_TOKENS) | s.isin(DQ_EXCLUDED)
        df = df.loc[~bad].copy()

        # Flag missing months within ORI-year
        df = df.groupby(["ori", "year"], group_keys=False).apply(flag_missing)
        frames.append(df)

    raw = pd.concat(frames, ignore_index=True)

    # Date
    # (Raw files store month as full month name; if numeric, this still works via fallback)
    dt = pd.to_datetime(
        raw["year"].astype(int).astype(str) + " " + raw["month"].astype(str),
        format="%Y %B",
        errors="coerce",
    )
    if dt.isna().any():
        # fallback: try month number
        dt2 = pd.to_datetime(
            raw["year"].astype(int).astype(str) + "-" + raw["month"].astype(str).str.zfill(2) + "-01",
            errors="coerce",
        )
        dt = dt.fillna(dt2)
    raw["date"] = dt.dt.to_period("M").dt.to_timestamp()

    # Covered population
    raw["pop_covered"] = raw["population"] * (~raw["month_missing"]).astype(int)

    # Aggregate to state-month
    sm = (
        raw.groupby(["state_abb", "date"], as_index=False)
           .agg(
               total_pop=("population", "sum"),
               covered_pop=("pop_covered", "sum"),
               theft=("actual_theft_total", "sum"),
               violent=("actual_index_violent", "sum"),
           )
    )
    sm["coverage_rate"] = sm["covered_pop"] / sm["total_pop"]

    # Rates (avoid Inf)
    sm["theft_per_100k_coveredpop"] = np.where(
        sm["covered_pop"] > 0, (sm["theft"] / sm["covered_pop"]) * 100000.0, np.nan
    )
    sm["violent_per_100k_coveredpop"] = np.where(
        sm["covered_pop"] > 0, (sm["violent"] / sm["covered_pop"]) * 100000.0, np.nan
    )
    return sm

def clean_nonfinite(series: pd.Series) -> pd.Series:
    s = pd.to_numeric(series, errors="coerce")
    s = s.replace([np.inf, -np.inf], np.nan)
    return s


In [None]:
# ============================================================
# 3) Load (or build) processed panel
# ============================================================
if REBUILD_PANEL:
    print("[Build] Rebuilding processed panel from raw ...")
    df_sm = build_state_month_covered_from_raw(RAW_DIR, years=YEARS)
    (DATA_DIR / "processed").mkdir(parents=True, exist_ok=True)
    df_sm.to_parquet(PROCESSED_PATH, index=False)
    print(f"[Build] Wrote {PROCESSED_PATH}  rows={len(df_sm):,}")
else:
    print("[Load] Loading processed panel ...")
    if not PROCESSED_PATH.exists():
        raise FileNotFoundError(
            f"Processed panel not found at {PROCESSED_PATH}. "
            "Either add it to the repo, or set REBUILD_PANEL=True and point RAW_DIR to your raw files."
        )
    df_sm = pd.read_parquet(PROCESSED_PATH)

df_sm = normalize_df(df_sm)

# Basic schema checks
require_cols(df_sm, [STATE_COL, DATE_COL, OUTCOME_THEFT, OUTCOME_VIOL, COVERAGE_COL])

# Drop DQ excluded states (safety)
df_sm = df_sm[~df_sm[STATE_COL].isin(DQ_EXCLUDED)].copy()

# Clean non-finite outcomes (should be rare at state level)
df_sm[OUTCOME_THEFT] = clean_nonfinite(df_sm[OUTCOME_THEFT])
df_sm[OUTCOME_VIOL]  = clean_nonfinite(df_sm[OUTCOME_VIOL])
df_sm[COVERAGE_COL]  = clean_nonfinite(df_sm[COVERAGE_COL])

print("States:", df_sm[STATE_COL].nunique(), "Date range:", df_sm[DATE_COL].min(), "→", df_sm[DATE_COL].max())
df_sm.head()


## Donor pool diagnostics (quick)
This is a lightweight sanity check. Your more detailed QC lives in `docs/`.

In [None]:
# ============================================================
# 4) Quick donor pool QC (optional but recommended)
# ============================================================
pre_period = (df_sm[DATE_COL] >= pd.Timestamp("2010-01-01")) & (df_sm[DATE_COL] < pd.Timestamp("2014-11-01"))

coverage_by_state = (
    df_sm.loc[pre_period]
        .groupby(STATE_COL)[COVERAGE_COL]
        .agg(["mean", "std", "min", "max", "count"])
        .sort_values("mean")
)

coverage_by_state.tail(10)


In [None]:
# Keep donors with mean pre coverage >= 0.95 (baseline rule)
eligible_states = coverage_by_state[coverage_by_state["mean"] >= 0.95].index.tolist()
eligible_states = sorted([s for s in eligible_states if s != TREATED])
print("Eligible donors (>=0.95 mean pre coverage):", len(eligible_states))
print("Example donors:", eligible_states[:10])


## SCM implementation
Weights are fit on `[DATE_MIN..FIT_END]` and projected through `FULL_END`.

Inference uses **in-space placebos** filtered by `pre_rmspe_placebo <= m × pre_rmspe_treated`.

In [None]:
# ============================================================
# 5) SCM core (stable solver + fit + placebos)
# ============================================================
def build_panel(df: pd.DataFrame, states: List[str], outcome: str,
                date_min: pd.Timestamp, date_max: pd.Timestamp) -> pd.DataFrame:
    sub = df[df[STATE_COL].isin(states)].copy()
    Y = (sub.pivot_table(index=DATE_COL, columns=STATE_COL, values=outcome, aggfunc="mean")
           .sort_index())
    return Y.loc[mstart(date_min):mstart(date_max)]

def solve_scm_weights(y_pre: np.ndarray, X_pre: np.ndarray) -> Tuple[np.ndarray, str]:
    y_pre = np.asarray(y_pre).reshape(-1)
    X_pre = np.asarray(X_pre)

    if X_pre.ndim != 2:
        raise ValueError(f"X_pre must be 2D, got {X_pre.shape}")
    T0, J = X_pre.shape
    if T0 == 0 or J == 0:
        raise ValueError(f"Empty pre matrices: T0={T0}, J={J}")

    if not np.all(np.isfinite(y_pre)) or not np.all(np.isfinite(X_pre)):
        raise ValueError("Non-finite values in pre matrices (NaN/Inf).")

    # scale for numerical stability
    scale = float(np.std(y_pre))
    if not np.isfinite(scale) or scale <= 0:
        scale = 1.0
    y = y_pre / scale
    X = X_pre / scale

    w = cp.Variable(J)
    prob = cp.Problem(cp.Minimize(cp.sum_squares(y - X @ w)), [w >= 0, cp.sum(w) == 1])

    status = "unknown"
    try:
        prob.solve(solver=cp.OSQP, verbose=False, max_iter=200000, eps_abs=1e-8, eps_rel=1e-8)
        status = prob.status
    except Exception:
        prob.solve(solver=cp.SCS, verbose=False, max_iters=200000, eps=1e-6)
        status = prob.status

    if w.value is None:
        raise ValueError(f"SCM optimization failed: status={status}")

    wv = np.array(w.value).reshape(-1)
    wv[wv < 0] = 0.0
    sm = float(wv.sum())
    if sm <= 0:
        raise ValueError("Degenerate weights.")
    return wv / sm, status

def segment_metrics(dates: pd.DatetimeIndex, gap: np.ndarray,
                    start: pd.Timestamp, end: pd.Timestamp) -> Tuple[float, float, int]:
    mask = (dates >= start) & (dates <= end)
    g = gap[mask]
    g = g[np.isfinite(g)]
    if g.size == 0:
        return np.nan, np.nan, 0
    rmspe = float(np.sqrt(np.mean(g**2)))
    avg = float(np.mean(g))
    return rmspe, avg, int(g.size)

def fit_one(df: pd.DataFrame, treated: str, outcome: str, donors: List[str],
            pre_start, t0, date_min, fit_end, full_end,
            min_donors: int = 5) -> Dict[str, Any]:

    t0 = mstart(t0); pre_start=mstart(pre_start)
    date_min=mstart(date_min); fit_end=mstart(fit_end); full_end=mstart(full_end)

    # Fit window (pre-COVID)
    Y_fit = build_panel(df, [treated] + donors, outcome, date_min, fit_end)
    if treated not in Y_fit.columns:
        raise ValueError(f"Treated '{treated}' missing after pivot.")

    pre_mask = (Y_fit.index >= pre_start) & (Y_fit.index < t0)
    if pre_mask.sum() == 0:
        raise ValueError("No pre rows in fit window.")

    # treated must be finite in pre
    if not np.all(np.isfinite(Y_fit.loc[pre_mask, treated].to_numpy())):
        raise ValueError("Treated has non-finite values in pre.")

    # donors complete in pre and fit window
    donors_complete = []
    for d in donors:
        if d not in Y_fit.columns:
            continue
        col = Y_fit[d].to_numpy()
        if np.all(np.isfinite(Y_fit.loc[pre_mask, d].to_numpy())) and np.all(np.isfinite(col)):
            donors_complete.append(d)

    if len(donors_complete) < min_donors:
        raise ValueError(f"Too few complete donors: {len(donors_complete)} (<{min_donors})")

    y_pre = Y_fit.loc[pre_mask, treated].to_numpy()
    X_pre = Y_fit.loc[pre_mask, donors_complete].to_numpy()

    w, status = solve_scm_weights(y_pre, X_pre)
    w_ser = pd.Series(w, index=donors_complete).sort_values(ascending=False)

    active = w_ser[w_ser > 1e-6].index.tolist()
    if len(active) == 0:
        active = donors_complete

    # Full window evaluation (drop rows with any missing among treated+active)
    Y_full = build_panel(df, [treated] + active, outcome, date_min, full_end).dropna(axis=0, how="any")
    dates = Y_full.index

    y = Y_full[treated].to_numpy()
    X = Y_full[active].to_numpy()

    w_active = w_ser.reindex(active).fillna(0.0).to_numpy()
    w_active = w_active / (w_active.sum() + 1e-12)

    y_synth = X @ w_active
    gap = y - y_synth

    # Segment windows
    pre_end = (t0 - pd.offsets.MonthBegin(1))
    pre_rmspe, _, n_pre = segment_metrics(dates, gap, pre_start, pre_end)
    post1_rmspe, avg_post1, n_post1 = segment_metrics(dates, gap, t0, fit_end)
    covid_rmspe, avg_covid, n_covid = segment_metrics(dates, gap, COVID_START, COVID_END)
    post2_rmspe, avg_post2, n_post2 = segment_metrics(dates, gap, POST2_START, full_end)

    ratio_post1 = post1_rmspe / (pre_rmspe + 1e-12) if np.isfinite(post1_rmspe) and np.isfinite(pre_rmspe) else np.nan
    ratio_post2 = post2_rmspe / (pre_rmspe + 1e-12) if np.isfinite(post2_rmspe) and np.isfinite(pre_rmspe) else np.nan

    return {
        "treated": treated,
        "outcome": outcome,
        "t0": t0,
        "pre_start": pre_start,
        "date_min": date_min,
        "fit_end": fit_end,
        "full_end": full_end,

        "donors_requested": donors,
        "donors_complete_pre": donors_complete,
        "donors_active": active,

        "weights": pd.Series(w_active, index=active).sort_values(ascending=False),
        "solver_status": status,

        "dates": dates,
        "y": y,
        "y_synth": y_synth,
        "gap": gap,

        "pre_rmspe": float(pre_rmspe),
        "post1_rmspe": float(post1_rmspe),
        "post2_rmspe": float(post2_rmspe),
        "ratio_post1": float(ratio_post1),
        "ratio_post2": float(ratio_post2),

        "avg_gap_post1": float(avg_post1) if np.isfinite(avg_post1) else np.nan,
        "avg_gap_covid": float(avg_covid) if np.isfinite(avg_covid) else np.nan,
        "avg_gap_post2": float(avg_post2) if np.isfinite(avg_post2) else np.nan,

        "n_pre": int(n_pre),
        "n_post1": int(n_post1),
        "n_covid": int(n_covid),
        "n_post2": int(n_post2),
    }

def placebo_loop(df: pd.DataFrame, treated_res: Dict[str, Any], donors_base: List[str],
                 pre_rmspe_mult: float = 2.0, min_donors: int = 5, verbose: bool=False) -> Tuple[pd.DataFrame, pd.DataFrame, float, float]:
    rows = []
    for s in donors_base:
        donors_s = [d for d in donors_base if d != s]
        try:
            r = fit_one(
                df, treated=s, outcome=treated_res["outcome"], donors=donors_s,
                pre_start=treated_res["pre_start"], t0=treated_res["t0"],
                date_min=treated_res["date_min"], fit_end=treated_res["fit_end"], full_end=treated_res["full_end"],
                min_donors=min_donors,
            )
            rows.append({"state": s, "pre_rmspe": r["pre_rmspe"], "ratio_post1": r["ratio_post1"], "ratio_post2": r["ratio_post2"]})
        except Exception as e:
            if verbose:
                print("Skipping", s, ":", e)
            continue

    all_df = pd.DataFrame(rows).dropna()
    thr = float(pre_rmspe_mult) * float(treated_res["pre_rmspe"])
    filt = all_df[all_df["pre_rmspe"] <= thr].copy()

    def pval(col: str, treated_val: float) -> float:
        vals = filt[col].to_numpy()
        vals = vals[np.isfinite(vals)]
        if vals.size == 0 or not np.isfinite(treated_val):
            return np.nan
        return float((1 + np.sum(vals >= treated_val)) / (1 + vals.size))

    p1 = pval("ratio_post1", treated_res["ratio_post1"])
    p2 = pval("ratio_post2", treated_res["ratio_post2"])
    return all_df, filt, p1, p2


In [None]:
# ============================================================
# 6) Plot helpers
# ============================================================
def plot_treated_vs_synth(res: Dict[str, Any], title: str, outpath: Path) -> None:
    plt.figure()
    plt.plot(res["dates"], res["y"], label=res["treated"])
    plt.plot(res["dates"], res["y_synth"], label="Synthetic")
    plt.axvline(res["t0"], linestyle="--")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    outpath.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(outpath, dpi=200)
    plt.close()

def plot_gap(res: Dict[str, Any], title: str, outpath: Path) -> None:
    plt.figure()
    plt.plot(res["dates"], res["gap"])
    plt.axhline(0, linewidth=1)
    plt.axvline(res["t0"], linestyle="--")
    plt.title(title)
    plt.tight_layout()
    outpath.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(outpath, dpi=200)
    plt.close()

def plot_placebo_hist(filt: pd.DataFrame, treated_val: float, col: str, title: str, outpath: Path) -> None:
    vals = filt[col].to_numpy()
    vals = vals[np.isfinite(vals)]
    plt.figure()
    plt.hist(vals, bins=20)
    if np.isfinite(treated_val):
        plt.axvline(treated_val, linestyle="--")
    plt.title(title)
    plt.tight_layout()
    outpath.parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(outpath, dpi=200)
    plt.close()


## Run all specs (state-level)

In [None]:
# ============================================================
# 7) Run specs
# ============================================================
summary_rows = []

for spec_id, outcome, t0, pre_start in SPECS:
    print("Running", spec_id, outcome, "t0=", t0.date(), "pre_start=", pre_start.date())

    # donors: eligible by coverage rule (>=0.95 mean pre coverage)
    donors = eligible_states.copy()

    # Fit treated
    tr = fit_one(
        df_sm, treated=TREATED, outcome=outcome, donors=donors,
        pre_start=pre_start, t0=t0,
        date_min=DATE_MIN, fit_end=FIT_END, full_end=FULL_END,
        min_donors=MIN_DONORS
    )

    # Save weights + plots
    tr["weights"].to_csv(TAB_DIR / f"{spec_id}_weights.csv", header=["weight"])
    plot_treated_vs_synth(tr, f"{spec_id}: {TREATED} vs Synthetic ({outcome})", FIG_DIR / f"{spec_id}_treated_vs_synth.png")
    plot_gap(tr, f"{spec_id}: Gap (Treated - Synth) ({outcome})", FIG_DIR / f"{spec_id}_gap.png")

    # Placebos for each multiplier
    for m in PRE_RMSPE_MULTS:
        pl_all, pl_filt, p1, p2 = placebo_loop(df_sm, tr, tr["donors_complete_pre"], pre_rmspe_mult=m, min_donors=MIN_DONORS)
        pl_all.to_csv(TAB_DIR / f"{spec_id}_placebos_all_m{m}.csv", index=False)
        pl_filt.to_csv(TAB_DIR / f"{spec_id}_placebos_filt_m{m}.csv", index=False)

        if len(pl_filt) > 0:
            plot_placebo_hist(pl_filt, tr["ratio_post1"], "ratio_post1",
                              f"{spec_id}: Placebo ratios post1 (m={m}) p={p1:.3f}", FIG_DIR / f"{spec_id}_hist_ratio_post1_m{m}.png")
            plot_placebo_hist(pl_filt, tr["ratio_post2"], "ratio_post2",
                              f"{spec_id}: Placebo ratios post2 (m={m}) p={p2:.3f}", FIG_DIR / f"{spec_id}_hist_ratio_post2_m{m}.png")

        summary_rows.append({
            "spec_id": f"{spec_id}_m{m}",
            "outcome": outcome,
            "t0": str(tr["t0"].date()),
            "pre_start": str(tr["pre_start"].date()),
            "date_min": str(tr["date_min"].date()),
            "fit_end": str(tr["fit_end"].date()),
            "full_end": str(tr["full_end"].date()),
            "n_donors_requested": len(donors),
            "n_donors_complete_pre": len(tr["donors_complete_pre"]),
            "n_donors_active": len(tr["donors_active"]),
            "pre_rmspe": tr["pre_rmspe"],
            "post1_rmspe": tr["post1_rmspe"],
            "post2_rmspe": tr["post2_rmspe"],
            "ratio_post1": tr["ratio_post1"],
            "ratio_post2": tr["ratio_post2"],
            "avg_gap_post1": tr["avg_gap_post1"],
            "avg_gap_covid": tr["avg_gap_covid"],
            "avg_gap_post2": tr["avg_gap_post2"],
            "n_months_pre": tr["n_pre"],
            "n_months_post1": tr["n_post1"],
            "n_months_covid": tr["n_covid"],
            "n_months_post2": tr["n_post2"],
            "n_placebos": len(pl_all),
            "n_placebos_filtered": len(pl_filt),
            "pre_rmspe_mult": float(m),
            "pval_ratio_post1": p1,
            "pval_ratio_post2": p2,
            "solver_status": tr["solver_status"],
        })

summary = pd.DataFrame(summary_rows).sort_values(["spec_id"]).reset_index(drop=True)
summary.to_csv(TAB_DIR / "all_specs_summary.csv", index=False)
summary


## Quick view: top donor weights (S0 theft)

In [None]:
# ============================================================
# 8) Inspect key artifact
# ============================================================
try:
    w = pd.read_csv(TAB_DIR / "S0_weights.csv", index_col=0)["weight"].sort_values(ascending=False)
    w.head(15)
except Exception as e:
    print("No S0_weights.csv found yet:", e)


## Notes
- If you want to include county-level work later, do it as a *separate notebook* to avoid polluting the replication artifact.
- If you extend the donor QC beyond coverage, add it in section 4 and keep it documented in `docs/data_qc_report.md`.