In [35]:
import pandas as pd

DISTRICT = "San Francisco County Office of Education"  # set yours
YEAR = 2024

wide = pd.read_csv("/Users/ms/Projects/acoe-edu-data-portfolio/data/01_caldashboard/processed/kpi_chronic_wide_all_levels.csv")

row = wide[
    (wide["geo_level"].str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR) &
    (wide["subgroup"].astype(str).str.lower().isin(["all","all students"]))
]

if row.empty:
    print("No All Students row found—check district name/year or the label for 'All'.")
else:
    all_rate = float(row["chronic_absent_rate"].iloc[0])         # fraction, e.g., 0.173
    all_cohort = int(row["cohort"].iloc[0])                      # student count
    print("ALL_RATE_2024:", f"{all_rate:.1%}")
    print("ALL_COHORT:", f"{all_cohort:,}")


ALL_RATE_2024: 61.4%
ALL_COHORT: 321


In [42]:
from pathlib import Path
import pandas as pd

# ------------------ CONFIG ------------------
DISTRICT = "San Francisco County Office of Education"
YEAR = 2024
ROOT = Path.cwd().resolve().parent  # if this notebook is in /notebooks
PROCESSED = ROOT / "data" /"01_caldashboard" / "processed"
WIDE_FP = PROCESSED / "kpi_chronic_wide_all_levels.csv"

# 90-day goal heuristics (tweak if you want different targets)
ANNUAL_REL_REDUCTION_ALL = 0.15      # aim to lower All Students by 15% over a year
ANNUAL_GAP_REDUCTION_PP = 0.02       # aim to shrink subgroup gaps by 2.0 percentage points over a year
REL_90D = ANNUAL_REL_REDUCTION_ALL * 0.25     # quarter of annual target
GAP_90D_PP = ANNUAL_GAP_REDUCTION_PP * 0.25   # quarter of annual gap reduction

def pct(x): return f"{x*100:.1f}%" if pd.notna(x) else "—"
def pp(x):  return f"{x*100:.1f} pp" if pd.notna(x) else "—"

# ------------------ LOAD & FILTER ------------------
wide = pd.read_csv(WIDE_FP)

df = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR)
].copy()

if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}. Check names/casing and rerun 02_kpi_calculations.")

# All Students row
is_all = df["subgroup"].astype(str).str.lower().isin(["all", "all students"])
all_row = df.loc[is_all]
if all_row.empty and "reporting_category" in df.columns:
    all_row = df[df["reporting_category"].astype(str).str.upper().eq("TA")]

if all_row.empty:
    raise ValueError("All Students row not found; verify 'subgroup' labeling or 'reporting_category' == 'TA' exists.")

ALL_RATE = float(all_row["chronic_absent_rate"].iloc[0])
ALL_COHORT = int(float(all_row["cohort"].iloc[0]))

# Prior year delta (if prior exists)
prior = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR - 1)
]
if not prior.empty:
    prior_all = prior[prior["subgroup"].astype(str).str.lower().isin(["all","all students"])]
    if not prior_all.empty:
        ALL_DELTA_PP = (ALL_RATE - float(prior_all["chronic_absent_rate"].iloc[0]))  # fraction diff
    else:
        ALL_DELTA_PP = None
else:
    ALL_DELTA_PP = None

# Top 3 subgroups (highest rates), excluding All
sub = df[~is_all].dropna(subset=["chronic_absent_rate"]).copy()
sub = sub.sort_values("chronic_absent_rate", ascending=False)
top3 = sub[["subgroup","chronic_absent_rate"]].head(3).reset_index(drop=True)

# Largest 3 gaps (vs All Students). Use precomputed gap if available; compute if not.
if "gap_vs_all" in df.columns:
    sub["gap_vs_all"] = sub["gap_vs_all"]
else:
    sub["gap_vs_all"] = sub["chronic_absent_rate"] - ALL_RATE
gaps3 = sub.dropna(subset=["gap_vs_all"]).sort_values("gap_vs_all", ascending=False)[["subgroup","gap_vs_all"]].head(3).reset_index(drop=True)

# 90-day targets (relative decrease for All; pp decrease for gaps)
target_all_90d_rate = max(ALL_RATE * (1 - REL_90D), 0.0)
TARGET_PP_90D = (ALL_RATE - target_all_90d_rate)  # fraction diff
GAP_TARGET_PP_90D = GAP_90D_PP

# ------------------ BUILD EXEC SUMMARY TEXT ------------------
headline = (
    f"In {YEAR}, the All Students chronic absenteeism rate is {pct(ALL_RATE)} "
    f"(cohort {ALL_COHORT:,})."
    if ALL_DELTA_PP is None
    else f"From {YEAR-1} to {YEAR}, All Students chronic absenteeism {'increased' if ALL_DELTA_PP>0 else 'decreased'} "
         f"by {pp(abs(ALL_DELTA_PP))} to {pct(ALL_RATE)} (cohort {ALL_COHORT:,})."
)

# Top impacted groups
sg_lines = []
for i in range(len(top3)):
    sg_lines.append(f"{i+1}) {top3.loc[i,'subgroup']}: {pct(float(top3.loc[i,'chronic_absent_rate']))}")
impacted = " | ".join(sg_lines) if sg_lines else "No subgroup rows found."

# Largest gaps
gap_lines = []
for i in range(len(gaps3)):
    gap_lines.append(f"{gaps3.loc[i,'subgroup']}: +{pp(float(gaps3.loc[i,'gap_vs_all']))}")
gaps_txt = " | ".join(gap_lines) if gap_lines else "No gaps available."

# Recommended levers (standard, edit if needed)
levers = (
    "1) Tier-1 family outreach & texting (broad, low-lift nudges); "
    "2) 3/6/9 absence check-ins + case management (time-bound Tier-2); "
    "3) Targeted supports for focus groups (e.g., transportation for SED, EL tutoring, SPED case-manager cadence)."
)

# Monitoring & 90-day success
monitor = (
    f"Monthly dashboard (All + focus subgroups); biweekly attendance-team reviews of 3/6/9 thresholds. "
    f"**90-day success:** All Students decreases by {pp(TARGET_PP_90D)} from baseline; "
    f"priority subgroup gaps shrink by {pp(GAP_TARGET_PP_90D)}; "
    f"process metrics: ≥90% on-time 3/6/9 check-ins and ≥85% family contact coverage."
)

exec_summary = f"""## 1) Executive Summary

- **Headline.** {headline}
- **Who is most impacted.** {impacted}
- **Largest equity gaps (vs All Students).** {gaps_txt}
- **Top recommended levers (why these).** {levers}
- **Monthly monitoring & 90-day success.** {monitor}
"""

print(exec_summary)

# Also save as a Markdown snippet to Project 02
OUT_MD = ROOT / "projects" / "02_equity-data-story" / "assets" / f"exec_summary_{YEAR}_{DISTRICT.lower().replace(' ','_')}.md"
OUT_MD.parent.mkdir(parents=True, exist_ok=True)
OUT_MD.write_text(exec_summary, encoding="utf-8")
print("Saved:", OUT_MD.as_posix())


## 1) Executive Summary

- **Headline.** In 2024, the All Students chronic absenteeism rate is 61.4% (cohort 321).
- **Who is most impacted.** 1) GR78: 95.0% | 2) Black/African American: 94.3% | 3) GRTK8: 91.3%
- **Largest equity gaps (vs All Students).** GR78: +33.6 pp | Black/African American: +33.0 pp | GRTK8: +29.9 pp
- **Top recommended levers (why these).** 1) Tier-1 family outreach & texting (broad, low-lift nudges); 2) 3/6/9 absence check-ins + case management (time-bound Tier-2); 3) Targeted supports for focus groups (e.g., transportation for SED, EL tutoring, SPED case-manager cadence).
- **Monthly monitoring & 90-day success.** Monthly dashboard (All + focus subgroups); biweekly attendance-team reviews of 3/6/9 thresholds. **90-day success:** All Students decreases by 2.3 pp from baseline; priority subgroup gaps shrink by 0.5 pp; process metrics: ≥90% on-time 3/6/9 check-ins and ≥85% family contact coverage.

Saved: /Users/ms/Projects/acoe-edu-data-portfolio/projects/02_equi

In [48]:
from pathlib import Path
import pandas as pd

# --------- CONFIG ---------
DISTRICT = "San Francisco County Office of Education"
YEAR = 2024          # set to None to auto-pick latest available year

# paths (assumes this notebook is in /notebooks)
ROOT = Path.cwd().resolve().parent
PROCESSED = ROOT / "data" / "01_caldashboard" / "processed"
WIDE_FP = PROCESSED / "kpi_chronic_wide_all_levels.csv"

def pct(x): return f"{x*100:.1f}%"
def pp_change(x):
    if x is None: return "n/a (baseline)"
    if abs(x) < 1e-9: return "0.0 pp (no change)"
    direction = "increase" if x > 0 else "decrease"
    return f"{abs(x)*100:.1f} pp {direction}"

# --------- LOAD & FILTER ---------
wide = pd.read_csv(WIDE_FP)

# If YEAR=None, pick the latest year available for this district
if YEAR is None:
    subset = wide[(wide["geo_level"].str.lower()=="district") & (wide["district_name"]==DISTRICT)]
    YEAR = int(subset["year"].max())

df = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR)
].copy()
if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}. Check names/casing and rerun 02_kpi_calculations.")

# All Students row
is_all = df["subgroup"].astype(str).str.lower().isin(["all","all students"])
all_row = df.loc[is_all]
if all_row.empty and "reporting_category" in df.columns:
    all_row = df[df["reporting_category"].astype(str).str.upper().eq("TA")]
if all_row.empty:
    raise ValueError("All Students row not found (try checking 'subgroup' or 'reporting_category' == 'TA').")

ALL_RATE = float(all_row["chronic_absent_rate"].iloc[0])
ALL_COHORT = int(float(all_row["cohort"].iloc[0]))

# Prior-year change (pp). If no prior year, returns None → prints "baseline".
prev = wide[
    (wide["geo_level"].astype(str).str.lower()=="district") &
    (wide["district_name"]==DISTRICT) &
    (wide["year"]==YEAR-1)
]
if not prev.empty:
    prev_all = prev[prev["subgroup"].astype(str).str.lower().isin(["all","all students"])]
    ALL_DELTA = (ALL_RATE - float(prev_all["chronic_absent_rate"].iloc[0])) if not prev_all.empty else None
else:
    ALL_DELTA = None

# Largest subgroup gap (vs All Students)
subs = df[~is_all].copy()
if "gap_vs_all" not in subs.columns:
    subs["gap_vs_all"] = subs["chronic_absent_rate"] - ALL_RATE
subs = subs.dropna(subset=["gap_vs_all"]).sort_values("gap_vs_all", ascending=False)

if not subs.empty:
    TOP_GAP_GROUP = str(subs.iloc[0]["subgroup"])
    TOP_GAP_VALUE = float(subs.iloc[0]["gap_vs_all"])
    top_gap_line = f"Largest subgroup gap: {TOP_GAP_GROUP} ({pct(TOP_GAP_VALUE)})"
else:
    top_gap_line = "Largest subgroup gap: n/a"

# --------- PRINT READY-TO-PASTE LINES ---------
print(f"District/Agency: {DISTRICT}")
print(f"Year: {YEAR}")
print(f"All Students rate: {pct(ALL_RATE)}")
print(f"Change vs prior year: {pp_change(ALL_DELTA)}")
print(f"Cohort (students included): {ALL_COHORT:,}")
print(top_gap_line)


District/Agency: San Francisco County Office of Education
Year: 2024
All Students rate: 61.4%
Change vs prior year: n/a (baseline)
Cohort (students included): 321
Largest subgroup gap: GR78 (33.6%)


If your file labels “All Students” as All, the code already handles it. If your district name is slightly different, run:
[d for d in wide.loc[wide["geo_level"].str.lower()=="district","district_name"].unique() if "san francisco" in str(d).lower()]

After adding prior years (2–4 year view) refer to option B

from pathlib import Path
import pandas as pd

DISTRICT = "San Francisco County Office of Education"
ROOT = Path.cwd().resolve().parent          # if notebook lives in /notebooks
WIDE = ROOT / "data" / "processed" / "kpi_chronic_wide_all_levels.csv"

wide = pd.read_csv(WIDE)

# --- All Students rows for the district ---
df = wide[
    (wide["geo_level"].str.lower()=="district") &
    (wide["district_name"]==DISTRICT) &
    (wide["subgroup"].astype(str).str.lower().isin(["all","all students"]))
].dropna(subset=["year","chronic_absent_rate"])

if df.empty:
    raise ValueError("No All Students rows found—check district name or run 02_kpi again.")

# Latest year values
row_latest = df.loc[df["year"].idxmax()]
ALL_RATE_2024 = f"{float(row_latest['chronic_absent_rate']):.1%}"
ALL_COHORT = f"{int(float(row_latest['cohort'])):,}" if "cohort" in df.columns else "—"

print("{{ALL_RATE_2024}} =", ALL_RATE_2024)
print("{{ALL_COHORT}} =", ALL_COHORT)

# If you have multiple years, compute 2–4 year narrative pieces
if df["year"].nunique() >= 2:
    df = df.sort_values("year")
    START_YEAR = int(df["year"].iloc[0])
    END_YEAR = int(df["year"].iloc[-1])
    START_RATE = float(df["chronic_absent_rate"].iloc[0])
    END_RATE = float(df["chronic_absent_rate"].iloc[-1])
    DELTA = END_RATE - START_RATE
    DIRECTION = "increased" if DELTA > 0 else ("decreased" if DELTA < 0 else "held steady")
    DELTA_PP = f"{abs(DELTA)*100:.1f} pp"
    START_RATE_TXT = f"{START_RATE:.1%}"
    END_RATE_TXT = f"{END_RATE:.1%}"

    # largest single-year jump
    dd = df[["year","chronic_absent_rate"]].set_index("year").diff().dropna()
    y2 = int(dd["chronic_absent_rate"].abs().idxmax())
    y1 = y2 - 1
    MAX_PA_PP = f"{abs(float(dd.loc[y2,'chronic_absent_rate']))*100:.1f} pp"

    print("{{START_YEAR}} =", START_YEAR)
    print("{{END_YEAR}} =", END_YEAR)
    print("{{START_RATE}} =", START_RATE_TXT)
    print("{{END_RATE}} =", END_RATE_TXT)
    print("{{DELTA_PP}} =", DELTA_PP)
    print("{{DIRECTION}} =", DIRECTION)
    print("{{JUMP_Y1}} =", y1)
    print("{{JUMP_Y2}} =", y2)
    print("{{MAX_PA_PP}} =", MAX_PA_PP)
else:
    print("Only one year present — use the baseline-only wording (Option A).")


In [57]:
from pathlib import Path
import pandas as pd
import re

# ---------- CONFIG ----------
DISTRICT = "San Francisco County Office of Education"  # change if needed
YEAR = 2024                                            # or set to None to use latest
ROOT = Path.cwd().resolve().parent                     # if notebook is in /notebooks
WIDE = ROOT / "data" / "01_caldashboard" / "processed" / "kpi_chronic_wide_all_levels.csv"
OUT_DIR = ROOT / "projects" / "02_equity-data-story" / "assets"
OUT_DIR.mkdir(parents=True, exist_ok=True)

def pct(x): return f"{x*100:.1f}%"
def slugify(s): return re.sub(r"[^a-z0-9]+","-", str(s).lower()).strip("-")

# ---------- LOAD & FILTER ----------
wide = pd.read_csv(WIDE)

# Choose year
if YEAR is None:
    yopts = wide[(wide["geo_level"].str.lower()=="district") & (wide["district_name"]==DISTRICT)]["year"].dropna()
    YEAR = int(yopts.max())

df = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR)
].copy()

if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}. Check names/casing or re-run 02_kpi.")

# exclude All Students; drop suppressed/NaN rates
mask_all = df["subgroup"].astype(str).str.lower().isin(["all", "all students"])
sub = df[~mask_all].dropna(subset=["chronic_absent_rate"]).copy()

# if fewer than 6 subgroups exist, the code will gracefully show whatever is available
top3 = sub.sort_values("chronic_absent_rate", ascending=False).head(3).reset_index(drop=True)
low3 = sub.sort_values("chronic_absent_rate", ascending=True).head(3).reset_index(drop=True)

# ---------- PRINT READY BULLETS ----------
print(f"Insert chart: projects/01_california-dashboard-replica/assets/subgroup_profile_{slugify(DISTRICT)}_{YEAR}.png\n")

print("Top 3 highest subgroup rates:")
for i, r in top3.iterrows():
    print(f"- {i+1}) {r['subgroup']}: {pct(float(r['chronic_absent_rate']))}")

print("\nTop 3 lowest subgroup rates (bright spots):")
for i, r in low3.iterrows():
    print(f"- {i+1}) {r['subgroup']}: {pct(float(r['chronic_absent_rate']))}")

# ---------- SAVE A SMALL MARKDOWN SNIPPET ----------
lines = []
lines.append(f"**Insert chart:** `projects/01_california-dashboard-replica/assets/subgroup_profile_{slugify(DISTRICT)}_{YEAR}.png`  \n")
lines.append("**Top 3 highest subgroup rates**\n")
for i, r in top3.iterrows():
    lines.append(f"- {i+1}) **{r['subgroup']}** — {pct(float(r['chronic_absent_rate']))}\n")
lines.append("\n**Top 3 lowest subgroup rates (bright spots)**\n")
for i, r in low3.iterrows():
    lines.append(f"- {i+1}) **{r['subgroup']}** — {pct(float(r['chronic_absent_rate']))}\n")

out_md = OUT_DIR / f"section3_subgroups_{YEAR}_{slugify(DISTRICT)}.md"
out_md.write_text("".join(lines), encoding="utf-8")
print("\nSaved snippet:", out_md.as_posix())


Insert chart: projects/01_california-dashboard-replica/assets/subgroup_profile_san-francisco-county-office-of-education_2024.png

Top 3 highest subgroup rates:
- 1) GR78: 95.0%
- 2) Black/African American: 94.3%
- 3) GRTK8: 91.3%

Top 3 lowest subgroup rates (bright spots):
- 1) Asian: 26.7%
- 2) Male: 50.0%
- 3) English Learners: 50.9%

Saved snippet: /Users/ms/Projects/acoe-edu-data-portfolio/projects/02_equity-data-story/assets/section3_subgroups_2024_san-francisco-county-office-of-education.md


In [59]:
# ---- Section 3: Who Is Most Impacted (finished bullets) ----
from pathlib import Path
import pandas as pd
import re

DISTRICT = "San Francisco County Office of Education"  # change if needed
YEAR = 2024                                            # or set to None for latest
ROOT = Path.cwd().resolve().parent                     # notebook in /notebooks
WIDE = ROOT / "data" / "01_caldashboard" / "processed" / "kpi_chronic_wide_all_levels.csv"

def pct(x): 
    try: return f"{float(x)*100:.1f}%"
    except: return "—"
def slugify(s): return re.sub(r"[^a-z0-9]+","-", str(s).lower()).strip("-")

wide = pd.read_csv(WIDE)

# choose year
if YEAR is None:
    yopts = wide[(wide["geo_level"].str.lower()=="district") & (wide["district_name"]==DISTRICT)]["year"].dropna()
    YEAR = int(yopts.max())

df = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR)
].copy()
if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}.")

mask_all = df["subgroup"].astype(str).str.lower().isin(["all","all students"])
sub = df[~mask_all].dropna(subset=["chronic_absent_rate"]).copy()

top3 = sub.sort_values("chronic_absent_rate", ascending=False).head(3).reset_index(drop=True)
low3 = sub.sort_values("chronic_absent_rate", ascending=True).head(3).reset_index(drop=True)

# ----- PRINT FINISHED BULLETS -----
print(f"Insert chart: projects/01_california-dashboard-replica/assets/subgroup_profile_{slugify(DISTRICT)}_{YEAR}.png\n")

print("Top 3 highest subgroup rates:")
for i, r in top3.iterrows():
    print(f"• {i+1}) {r['subgroup']}: {pct(r['chronic_absent_rate'])}")

print("\nTop 3 lowest subgroup rates (bright spots):")
for i, r in low3.iterrows():
    print(f"• {i+1}) {r['subgroup']}: {pct(r['chronic_absent_rate'])}")


Insert chart: projects/01_california-dashboard-replica/assets/subgroup_profile_san-francisco-county-office-of-education_2024.png

Top 3 highest subgroup rates:
• 1) GR78: 95.0%
• 2) Black/African American: 94.3%
• 3) GRTK8: 91.3%

Top 3 lowest subgroup rates (bright spots):
• 1) Asian: 26.7%
• 2) Male: 50.0%
• 3) English Learners: 50.9%


In [63]:
from pathlib import Path
import pandas as pd
import re

# -------- CONFIG --------
DISTRICT = "San Francisco County Office of Education"   # change if needed
YEAR = 2024                                             # or set to None for latest
TOP_N = 5                                               # how many largest gaps to show

def find_root(start=Path.cwd()):
    for p in [start, *start.parents]:
        if (p / "data" / "01_caldashboard" / "processed").exists():
            return p
    return start

ROOT = find_root()
WIDE = ROOT / "data" / "01_caldashboard" / "processed" / "kpi_chronic_wide_all_levels.csv"
OUT_DIR = ROOT / "projects" / "02_equity-data-story" / "assets"
OUT_DIR.mkdir(parents=True, exist_ok=True)

def pct(x): return f"{float(x)*100:.1f}%"
def pp(x):  return f"{float(x)*100:.1f} pp"
def slugify(s): return re.sub(r"[^a-z0-9]+","-", str(s).lower()).strip("-")

# -------- LOAD --------
wide = pd.read_csv(WIDE)

# pick year (if None)
if YEAR is None:
    years = wide[(wide["geo_level"].str.lower()=="district") & (wide["district_name"]==DISTRICT)]["year"]
    YEAR = int(pd.to_numeric(years, errors="coerce").max())

# district-year filter
df = wide[
    (wide["geo_level"].astype(str).str.lower()=="district") &
    (wide["district_name"]==DISTRICT) &
    (wide["year"]==YEAR)
].copy()
if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}.")

# All Students rate
is_all = df["subgroup"].astype(str).str.lower().isin(["all","all students"])
all_row = df.loc[is_all]
if all_row.empty and "reporting_category" in df.columns:
    all_row = df[df["reporting_category"].astype(str).str.upper().eq("TA")]
if all_row.empty:
    raise ValueError("Could not find 'All Students' row.")

ALL_RATE = float(all_row.iloc[0]["chronic_absent_rate"])

# compute gap_vs_all if missing
sub = df[~is_all].copy()
if "gap_vs_all" not in sub.columns:
    sub["gap_vs_all"] = sub["chronic_absent_rate"] - ALL_RATE

# largest positive gaps (higher than All Students)
gaps = sub.dropna(subset=["gap_vs_all"]).sort_values("gap_vs_all", ascending=False)
top_gaps = gaps.head(TOP_N).reset_index(drop=True)

# also (optional) bright spots with negative gaps
bright = gaps[gaps["gap_vs_all"] < 0].sort_values("gap_vs_all").head(3).reset_index(drop=True)

# -------- PRINT FINISHED BULLETS --------
print(f"Insert chart: projects/01_california-dashboard-replica/assets/largest_equity_gaps_{YEAR}.png\n")

print("Largest equity gaps (vs All Students):")
for i, r in top_gaps.iterrows():
    print(f"• {r['subgroup']}: +{pp(r['gap_vs_all'])} (rate {pct(r['chronic_absent_rate'])})")

if not bright.empty:
    print("\nBright spots (below All Students):")
    for i, r in bright.iterrows():
        print(f"• {r['subgroup']}: {pp(r['gap_vs_all'])} (rate {pct(r['chronic_absent_rate'])})")

# Interpretation suggestions (lightly tailored to common subgroup names)
hints = []
sg_lower = " ".join(str(s).lower() for s in top_gaps["subgroup"].astype(str).tolist())
if any(k in sg_lower for k in ["students with disabilities","sped"]):
    hints.append("Targeted SPED case-manager cadence; proactive 3/6/9 absence outreach.")
if any(k in sg_lower for k in ["english learner","el"]):
    hints.append("EL family outreach in home languages; after-school EL tutoring with attendance incentives.")
if any(k in sg_lower for k in ["homeless"]):
    hints.append("McKinney-Vento coordination: transportation support, flexible check-in options.")
if any(k in sg_lower for k in ["foster"]):
    hints.append("Foster youth liaisons: cross-agency coordination; predictable check-ins.")
if any(k in sg_lower for k in ["socioeconomically disadvantaged","sed"]):
    hints.append("Tier-1 messaging + transportation passes; early referral pathways to supports.")
if not hints:
    hints.append("Focus Tier-1 communication + Tier-2 3/6/9 check-ins for the top-gap subgroups.")

print("\nInterpretation (use 1–2 bullets):")
for h in hints[:2]:
    print(f"• {h}")

# -------- SAVE MARKDOWN SNIPPET --------
lines = []
lines.append(f"**Insert chart:** `projects/01_california-dashboard-replica/assets/largest_equity_gaps_{YEAR}.png`  \n")
lines.append("**Largest equity gaps (vs All Students)**\n")
for _, r in top_gaps.iterrows():
    lines.append(f"- **{r['subgroup']}** — +{pp(r['gap_vs_all'])} (rate {pct(r['chronic_absent_rate'])})\n")
if not bright.empty:
    lines.append("\n**Bright spots (below All Students)**\n")
    for _, r in bright.iterrows():
        lines.append(f"- **{r['subgroup']}** — {pp(r['gap_vs_all'])} (rate {pct(r['chronic_absent_rate'])})\n")
lines.append("\n**Interpretation (pick 1–2):**\n")
for h in hints[:2]:
    lines.append(f"- {h}\n")

out_md = OUT_DIR / f"section4_gaps_{YEAR}_{slugify(DISTRICT)}.md"
out_md.write_text("".join(lines), encoding="utf-8")
print("\nSaved snippet:", out_md.as_posix())


Insert chart: projects/01_california-dashboard-replica/assets/largest_equity_gaps_2024.png

Largest equity gaps (vs All Students):
• GR78: +33.6 pp (rate 95.0%)
• Black/African American: +33.0 pp (rate 94.3%)
• GRTK8: +29.9 pp (rate 91.3%)
• Two or More Races: +27.5 pp (rate 88.9%)
• Female: +14.0 pp (rate 75.3%)

Bright spots (below All Students):
• Asian: -34.7 pp (rate 26.7%)
• Male: -11.4 pp (rate 50.0%)
• English Learners: -10.5 pp (rate 50.9%)

Interpretation (use 1–2 bullets):
• Focus Tier-1 communication + Tier-2 3/6/9 check-ins for the top-gap subgroups.

Saved snippet: /Users/ms/Projects/acoe-edu-data-portfolio/projects/02_equity-data-story/assets/section4_gaps_2024_san-francisco-county-office-of-education.md


In [65]:
from pathlib import Path
import pandas as pd
import numpy as np
import re

# ---------------- CONFIG ----------------
DISTRICT = "San Francisco County Office of Education"  # change if needed
YEAR = 2024                                            # or set to None for latest available

# Target knobs (tweak if your org prefers pp targets)
ANNUAL_REL_REDUCTION_ALL = 0.15    # 15% relative reduction over a year for All Students
ANNUAL_GAP_REDUCTION_PP   = 0.02   # 2.0 percentage-point gap reduction over a year
REL_90D   = ANNUAL_REL_REDUCTION_ALL * 0.25   # quarter-year target (≈90 days)
GAP_90DPP = ANNUAL_GAP_REDUCTION_PP * 0.25    # = 0.5 pp by default

FOCUS_MODE = "top_gaps"  # "top_gaps" or "top_rates"
FOCUS_N = 3

# ---------------- PATHS ----------------
def find_root(start=Path.cwd()):
    for p in [start, *start.parents]:
        if (p / "data" / "01_caldashboard" / "processed").exists():
            return p
    return start

ROOT = find_root()
WIDE = ROOT / "data" / "01_caldashboard" / "processed" / "kpi_chronic_wide_all_levels.csv"
OUT = ROOT / "projects" / "02_equity-data-story" / "assets"
OUT.mkdir(parents=True, exist_ok=True)

def pct(x):  return f"{float(x)*100:.1f}%"
def pp(x):   return f"{float(x)*100:.1f} pp"
def slug(s): return re.sub(r"[^a-z0-9]+","-", str(s).lower()).strip("-")

# ---------------- LOAD ----------------
wide = pd.read_csv(WIDE)

# Pick year if None
if YEAR is None:
    YEAR = int(wide[(wide["geo_level"].str.lower()=="district") &
                    (wide["district_name"]==DISTRICT)]["year"].max())

df = wide[
    (wide["geo_level"].astype(str).str.lower()=="district") &
    (wide["district_name"]==DISTRICT) &
    (wide["year"]==YEAR)
].copy()
if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}.")

# All Students baseline
is_all = df["subgroup"].astype(str).str.lower().isin(["all","all students"])
all_row = df[is_all].copy()
if all_row.empty and "reporting_category" in df.columns:
    all_row = df[df["reporting_category"].astype(str).str.upper().eq("TA")]
if all_row.empty:
    raise ValueError("Missing All Students row.")

ALL_RATE = float(all_row["chronic_absent_rate"].iloc[0])

# Subgroups (exclude All)
sub = df[~is_all].dropna(subset=["chronic_absent_rate"]).copy()
if "gap_vs_all" not in sub.columns:
    sub["gap_vs_all"] = sub["chronic_absent_rate"] - ALL_RATE

# Focus selection
if FOCUS_MODE == "top_rates":
    focus_df = sub.sort_values("chronic_absent_rate", ascending=False).head(FOCUS_N).copy()
else:  # "top_gaps"
    focus_df = sub.sort_values("gap_vs_all", ascending=False).head(FOCUS_N).copy()

focus_df = focus_df[["subgroup","chronic_absent_rate","gap_vs_all"]].reset_index(drop=True)

# 90-day targets
target_all_90d = max(ALL_RATE * (1 - REL_90D), 0.0)  # relative reduction
focus_df["target_rate_90d"] = np.clip(
    target_all_90d + np.maximum(focus_df["gap_vs_all"] - GAP_90DPP, 0.0),
    0, 1
)

# ---------------- PRINT BULLETS ----------------
print("Implementation & Monitoring (30/60/90)")
print("• Monthly: update chronic absenteeism KPIs; share with principals.")
print("• Biweekly: attendance team reviews students crossing 3/6/9 thresholds.")
print("• Quarterly: subgroup gap review with recommended pivots.\n")

print("Mini-dashboard (tracked monthly):")
print(f"– All Students rate: {pct(ALL_RATE)} → {pct(target_all_90d)} (target)")

for i, r in focus_df.iterrows():
    print(f"– {r['subgroup']}: {pct(r['chronic_absent_rate'])} → {pct(r['target_rate_90d'])}")

# ---------------- SAVE SNIPPET ----------------
lines = []
lines.append("**Implementation & Monitoring (30/60/90)**\n")
lines.append("- Monthly: update chronic absenteeism KPIs; share with principals.\n")
lines.append("- Biweekly: attendance team reviews students crossing 3/6/9 thresholds.\n")
lines.append("- Quarterly: subgroup gap review with recommended pivots.\n\n")
lines.append("**Mini-dashboard (tracked monthly)**\n")
lines.append(f"- All Students rate: {pct(ALL_RATE)} → {pct(target_all_90d)} (target)\n")
for _, r in focus_df.iterrows():
    lines.append(f"- {r['subgroup']}: {pct(r['chronic_absent_rate'])} → {pct(r['target_rate_90d'])}\n")

out_md = OUT / f"section7_implementation_{YEAR}_{slug(DISTRICT)}.md"
Path(out_md).write_text("".join(lines), encoding="utf-8")
print("\nSaved snippet:", out_md.as_posix())


Implementation & Monitoring (30/60/90)
• Monthly: update chronic absenteeism KPIs; share with principals.
• Biweekly: attendance team reviews students crossing 3/6/9 thresholds.
• Quarterly: subgroup gap review with recommended pivots.

Mini-dashboard (tracked monthly):
– All Students rate: 61.4% → 59.1% (target)
– GR78: 95.0% → 92.2%
– Black/African American: 94.3% → 91.5%
– GRTK8: 91.3% → 88.5%

Saved snippet: /Users/ms/Projects/acoe-edu-data-portfolio/projects/02_equity-data-story/assets/section7_implementation_2024_san-francisco-county-office-of-education.md
