In [6]:
from pathlib import Path
import pandas as pd

ROOT = Path.cwd().resolve().parent  # notebook in /notebooks
PROCESSED = ROOT / "data" / "01_caldashboard" / "processed"
ASSETS = ROOT / "projects" / "01_california-dashboard-replica" / "assets"

ok = True
def status(msg, cond):
    global ok
    print(("✅ " if cond else "❌ ") + msg)
    ok &= bool(cond)

# --- files exist ---
wide_fp = PROCESSED / "kpi_chronic_wide_all_levels.csv"
long_fp = PROCESSED / "kpi_chronic_long_all_levels.csv"
status("Processed (wide) exists", wide_fp.exists())
status("Processed (long) exists", long_fp.exists())

# at least some figures in assets
pngs = sorted(ASSETS.glob("*.png"))
status("Figures exported to assets/", len(pngs) >= 3)  # expect 3+ PNGs

# --- load & schema checks ---
if wide_fp.exists():
    wide = pd.read_csv(wide_fp)
    need_cols = {"geo_level","year","subgroup","cohort","chronic_absent_count","chronic_absent_rate"}
    status("Wide has required columns", need_cols.issubset(wide.columns))

    # rates are fractions, not percent points
    if "chronic_absent_rate" in wide:
        mx = pd.to_numeric(wide["chronic_absent_rate"], errors="coerce").dropna().max()
        status("Rates look like fractions (<=1.05)", (mx is not None) and (mx <= 1.05))

    # small-N suppression respected
    if {"cohort","chronic_absent_rate"} <= set(wide.columns):
        bad = wide[(wide["cohort"] < 10) & wide["cohort"].notna() & wide["chronic_absent_rate"].notna()]
        status("Small-N rows suppressed (no rate when cohort<10)", bad.empty)

    # “All Students” coverage (district level)
    if {"geo_level","year","subgroup"} <= set(wide.columns):
        dist = wide[wide["geo_level"].astype(str).str.lower().eq("district")]
        by = dist.groupby(["year"]).apply(
            lambda g: g["subgroup"].astype(str).str.lower().isin(["all","all students"]).any()
        )
        status("All Students present for district/year groups", by.all())

    # equity gap plausibility (gap ~ 0 for All Students if column exists)
    if "gap_vs_all" in wide.columns:
        all_rows = wide[wide["subgroup"].astype(str).str.lower().isin(["all","all students"])]
        if not all_rows.empty:
            near_zero = (all_rows["gap_vs_all"].fillna(0).abs() <= 0.001).all()
            status("gap_vs_all ≈ 0 for All Students", near_zero)

# --- final verdict ---
print("\n🎯 Project 01 status:", "READY ✅" if ok else "NEEDS FIXES ❗")
if not ok:
    print("Open the ❌ items above and rerun 02/03 where relevant.")


✅ Processed (wide) exists
✅ Processed (long) exists
✅ Figures exported to assets/
✅ Wide has required columns
✅ Rates look like fractions (<=1.05)
✅ Small-N rows suppressed (no rate when cohort<10)
✅ All Students present for district/year groups
❌ gap_vs_all ≈ 0 for All Students

🎯 Project 01 status: NEEDS FIXES ❗
Open the ❌ items above and rerun 02/03 where relevant.
