In [35]:
import pandas as pd

DISTRICT = "San Francisco County Office of Education"  # set yours
YEAR = 2024

wide = pd.read_csv("/Users/ms/Projects/acoe-edu-data-portfolio/data/01_caldashboard/processed/kpi_chronic_wide_all_levels.csv")

row = wide[
    (wide["geo_level"].str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR) &
    (wide["subgroup"].astype(str).str.lower().isin(["all","all students"]))
]

if row.empty:
    print("No All Students row found—check district name/year or the label for 'All'.")
else:
    all_rate = float(row["chronic_absent_rate"].iloc[0])         # fraction, e.g., 0.173
    all_cohort = int(row["cohort"].iloc[0])                      # student count
    print("ALL_RATE_2024:", f"{all_rate:.1%}")
    print("ALL_COHORT:", f"{all_cohort:,}")


ALL_RATE_2024: 61.4%
ALL_COHORT: 321


In [42]:
from pathlib import Path
import pandas as pd

# ------------------ CONFIG ------------------
DISTRICT = "San Francisco County Office of Education"
YEAR = 2024
ROOT = Path.cwd().resolve().parent  # if this notebook is in /notebooks
PROCESSED = ROOT / "data" /"01_caldashboard" / "processed"
WIDE_FP = PROCESSED / "kpi_chronic_wide_all_levels.csv"

# 90-day goal heuristics (tweak if you want different targets)
ANNUAL_REL_REDUCTION_ALL = 0.15      # aim to lower All Students by 15% over a year
ANNUAL_GAP_REDUCTION_PP = 0.02       # aim to shrink subgroup gaps by 2.0 percentage points over a year
REL_90D = ANNUAL_REL_REDUCTION_ALL * 0.25     # quarter of annual target
GAP_90D_PP = ANNUAL_GAP_REDUCTION_PP * 0.25   # quarter of annual gap reduction

def pct(x): return f"{x*100:.1f}%" if pd.notna(x) else "—"
def pp(x):  return f"{x*100:.1f} pp" if pd.notna(x) else "—"

# ------------------ LOAD & FILTER ------------------
wide = pd.read_csv(WIDE_FP)

df = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR)
].copy()

if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}. Check names/casing and rerun 02_kpi_calculations.")

# All Students row
is_all = df["subgroup"].astype(str).str.lower().isin(["all", "all students"])
all_row = df.loc[is_all]
if all_row.empty and "reporting_category" in df.columns:
    all_row = df[df["reporting_category"].astype(str).str.upper().eq("TA")]

if all_row.empty:
    raise ValueError("All Students row not found; verify 'subgroup' labeling or 'reporting_category' == 'TA' exists.")

ALL_RATE = float(all_row["chronic_absent_rate"].iloc[0])
ALL_COHORT = int(float(all_row["cohort"].iloc[0]))

# Prior year delta (if prior exists)
prior = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR - 1)
]
if not prior.empty:
    prior_all = prior[prior["subgroup"].astype(str).str.lower().isin(["all","all students"])]
    if not prior_all.empty:
        ALL_DELTA_PP = (ALL_RATE - float(prior_all["chronic_absent_rate"].iloc[0]))  # fraction diff
    else:
        ALL_DELTA_PP = None
else:
    ALL_DELTA_PP = None

# Top 3 subgroups (highest rates), excluding All
sub = df[~is_all].dropna(subset=["chronic_absent_rate"]).copy()
sub = sub.sort_values("chronic_absent_rate", ascending=False)
top3 = sub[["subgroup","chronic_absent_rate"]].head(3).reset_index(drop=True)

# Largest 3 gaps (vs All Students). Use precomputed gap if available; compute if not.
if "gap_vs_all" in df.columns:
    sub["gap_vs_all"] = sub["gap_vs_all"]
else:
    sub["gap_vs_all"] = sub["chronic_absent_rate"] - ALL_RATE
gaps3 = sub.dropna(subset=["gap_vs_all"]).sort_values("gap_vs_all", ascending=False)[["subgroup","gap_vs_all"]].head(3).reset_index(drop=True)

# 90-day targets (relative decrease for All; pp decrease for gaps)
target_all_90d_rate = max(ALL_RATE * (1 - REL_90D), 0.0)
TARGET_PP_90D = (ALL_RATE - target_all_90d_rate)  # fraction diff
GAP_TARGET_PP_90D = GAP_90D_PP

# ------------------ BUILD EXEC SUMMARY TEXT ------------------
headline = (
    f"In {YEAR}, the All Students chronic absenteeism rate is {pct(ALL_RATE)} "
    f"(cohort {ALL_COHORT:,})."
    if ALL_DELTA_PP is None
    else f"From {YEAR-1} to {YEAR}, All Students chronic absenteeism {'increased' if ALL_DELTA_PP>0 else 'decreased'} "
         f"by {pp(abs(ALL_DELTA_PP))} to {pct(ALL_RATE)} (cohort {ALL_COHORT:,})."
)

# Top impacted groups
sg_lines = []
for i in range(len(top3)):
    sg_lines.append(f"{i+1}) {top3.loc[i,'subgroup']}: {pct(float(top3.loc[i,'chronic_absent_rate']))}")
impacted = " | ".join(sg_lines) if sg_lines else "No subgroup rows found."

# Largest gaps
gap_lines = []
for i in range(len(gaps3)):
    gap_lines.append(f"{gaps3.loc[i,'subgroup']}: +{pp(float(gaps3.loc[i,'gap_vs_all']))}")
gaps_txt = " | ".join(gap_lines) if gap_lines else "No gaps available."

# Recommended levers (standard, edit if needed)
levers = (
    "1) Tier-1 family outreach & texting (broad, low-lift nudges); "
    "2) 3/6/9 absence check-ins + case management (time-bound Tier-2); "
    "3) Targeted supports for focus groups (e.g., transportation for SED, EL tutoring, SPED case-manager cadence)."
)

# Monitoring & 90-day success
monitor = (
    f"Monthly dashboard (All + focus subgroups); biweekly attendance-team reviews of 3/6/9 thresholds. "
    f"**90-day success:** All Students decreases by {pp(TARGET_PP_90D)} from baseline; "
    f"priority subgroup gaps shrink by {pp(GAP_TARGET_PP_90D)}; "
    f"process metrics: ≥90% on-time 3/6/9 check-ins and ≥85% family contact coverage."
)

exec_summary = f"""## 1) Executive Summary

- **Headline.** {headline}
- **Who is most impacted.** {impacted}
- **Largest equity gaps (vs All Students).** {gaps_txt}
- **Top recommended levers (why these).** {levers}
- **Monthly monitoring & 90-day success.** {monitor}
"""

print(exec_summary)

# Also save as a Markdown snippet to Project 02
OUT_MD = ROOT / "projects" / "02_equity-data-story" / "assets" / f"exec_summary_{YEAR}_{DISTRICT.lower().replace(' ','_')}.md"
OUT_MD.parent.mkdir(parents=True, exist_ok=True)
OUT_MD.write_text(exec_summary, encoding="utf-8")
print("Saved:", OUT_MD.as_posix())


## 1) Executive Summary

- **Headline.** In 2024, the All Students chronic absenteeism rate is 61.4% (cohort 321).
- **Who is most impacted.** 1) GR78: 95.0% | 2) Black/African American: 94.3% | 3) GRTK8: 91.3%
- **Largest equity gaps (vs All Students).** GR78: +33.6 pp | Black/African American: +33.0 pp | GRTK8: +29.9 pp
- **Top recommended levers (why these).** 1) Tier-1 family outreach & texting (broad, low-lift nudges); 2) 3/6/9 absence check-ins + case management (time-bound Tier-2); 3) Targeted supports for focus groups (e.g., transportation for SED, EL tutoring, SPED case-manager cadence).
- **Monthly monitoring & 90-day success.** Monthly dashboard (All + focus subgroups); biweekly attendance-team reviews of 3/6/9 thresholds. **90-day success:** All Students decreases by 2.3 pp from baseline; priority subgroup gaps shrink by 0.5 pp; process metrics: ≥90% on-time 3/6/9 check-ins and ≥85% family contact coverage.

Saved: /Users/ms/Projects/acoe-edu-data-portfolio/projects/02_equi

In [48]:
from pathlib import Path
import pandas as pd

# --------- CONFIG ---------
DISTRICT = "San Francisco County Office of Education"
YEAR = 2024          # set to None to auto-pick latest available year

# paths (assumes this notebook is in /notebooks)
ROOT = Path.cwd().resolve().parent
PROCESSED = ROOT / "data" / "01_caldashboard" / "processed"
WIDE_FP = PROCESSED / "kpi_chronic_wide_all_levels.csv"

def pct(x): return f"{x*100:.1f}%"
def pp_change(x):
    if x is None: return "n/a (baseline)"
    if abs(x) < 1e-9: return "0.0 pp (no change)"
    direction = "increase" if x > 0 else "decrease"
    return f"{abs(x)*100:.1f} pp {direction}"

# --------- LOAD & FILTER ---------
wide = pd.read_csv(WIDE_FP)

# If YEAR=None, pick the latest year available for this district
if YEAR is None:
    subset = wide[(wide["geo_level"].str.lower()=="district") & (wide["district_name"]==DISTRICT)]
    YEAR = int(subset["year"].max())

df = wide[
    (wide["geo_level"].astype(str).str.lower() == "district") &
    (wide["district_name"] == DISTRICT) &
    (wide["year"] == YEAR)
].copy()
if df.empty:
    raise ValueError(f"No rows for {DISTRICT} in {YEAR}. Check names/casing and rerun 02_kpi_calculations.")

# All Students row
is_all = df["subgroup"].astype(str).str.lower().isin(["all","all students"])
all_row = df.loc[is_all]
if all_row.empty and "reporting_category" in df.columns:
    all_row = df[df["reporting_category"].astype(str).str.upper().eq("TA")]
if all_row.empty:
    raise ValueError("All Students row not found (try checking 'subgroup' or 'reporting_category' == 'TA').")

ALL_RATE = float(all_row["chronic_absent_rate"].iloc[0])
ALL_COHORT = int(float(all_row["cohort"].iloc[0]))

# Prior-year change (pp). If no prior year, returns None → prints "baseline".
prev = wide[
    (wide["geo_level"].astype(str).str.lower()=="district") &
    (wide["district_name"]==DISTRICT) &
    (wide["year"]==YEAR-1)
]
if not prev.empty:
    prev_all = prev[prev["subgroup"].astype(str).str.lower().isin(["all","all students"])]
    ALL_DELTA = (ALL_RATE - float(prev_all["chronic_absent_rate"].iloc[0])) if not prev_all.empty else None
else:
    ALL_DELTA = None

# Largest subgroup gap (vs All Students)
subs = df[~is_all].copy()
if "gap_vs_all" not in subs.columns:
    subs["gap_vs_all"] = subs["chronic_absent_rate"] - ALL_RATE
subs = subs.dropna(subset=["gap_vs_all"]).sort_values("gap_vs_all", ascending=False)

if not subs.empty:
    TOP_GAP_GROUP = str(subs.iloc[0]["subgroup"])
    TOP_GAP_VALUE = float(subs.iloc[0]["gap_vs_all"])
    top_gap_line = f"Largest subgroup gap: {TOP_GAP_GROUP} ({pct(TOP_GAP_VALUE)})"
else:
    top_gap_line = "Largest subgroup gap: n/a"

# --------- PRINT READY-TO-PASTE LINES ---------
print(f"District/Agency: {DISTRICT}")
print(f"Year: {YEAR}")
print(f"All Students rate: {pct(ALL_RATE)}")
print(f"Change vs prior year: {pp_change(ALL_DELTA)}")
print(f"Cohort (students included): {ALL_COHORT:,}")
print(top_gap_line)


District/Agency: San Francisco County Office of Education
Year: 2024
All Students rate: 61.4%
Change vs prior year: n/a (baseline)
Cohort (students included): 321
Largest subgroup gap: GR78 (33.6%)


If your file labels “All Students” as All, the code already handles it. If your district name is slightly different, run:
[d for d in wide.loc[wide["geo_level"].str.lower()=="district","district_name"].unique() if "san francisco" in str(d).lower()]

After adding prior years (2–4 year view) refer to option B

from pathlib import Path
import pandas as pd

DISTRICT = "San Francisco County Office of Education"
ROOT = Path.cwd().resolve().parent          # if notebook lives in /notebooks
WIDE = ROOT / "data" / "processed" / "kpi_chronic_wide_all_levels.csv"

wide = pd.read_csv(WIDE)

# --- All Students rows for the district ---
df = wide[
    (wide["geo_level"].str.lower()=="district") &
    (wide["district_name"]==DISTRICT) &
    (wide["subgroup"].astype(str).str.lower().isin(["all","all students"]))
].dropna(subset=["year","chronic_absent_rate"])

if df.empty:
    raise ValueError("No All Students rows found—check district name or run 02_kpi again.")

# Latest year values
row_latest = df.loc[df["year"].idxmax()]
ALL_RATE_2024 = f"{float(row_latest['chronic_absent_rate']):.1%}"
ALL_COHORT = f"{int(float(row_latest['cohort'])):,}" if "cohort" in df.columns else "—"

print("{{ALL_RATE_2024}} =", ALL_RATE_2024)
print("{{ALL_COHORT}} =", ALL_COHORT)

# If you have multiple years, compute 2–4 year narrative pieces
if df["year"].nunique() >= 2:
    df = df.sort_values("year")
    START_YEAR = int(df["year"].iloc[0])
    END_YEAR = int(df["year"].iloc[-1])
    START_RATE = float(df["chronic_absent_rate"].iloc[0])
    END_RATE = float(df["chronic_absent_rate"].iloc[-1])
    DELTA = END_RATE - START_RATE
    DIRECTION = "increased" if DELTA > 0 else ("decreased" if DELTA < 0 else "held steady")
    DELTA_PP = f"{abs(DELTA)*100:.1f} pp"
    START_RATE_TXT = f"{START_RATE:.1%}"
    END_RATE_TXT = f"{END_RATE:.1%}"

    # largest single-year jump
    dd = df[["year","chronic_absent_rate"]].set_index("year").diff().dropna()
    y2 = int(dd["chronic_absent_rate"].abs().idxmax())
    y1 = y2 - 1
    MAX_PA_PP = f"{abs(float(dd.loc[y2,'chronic_absent_rate']))*100:.1f} pp"

    print("{{START_YEAR}} =", START_YEAR)
    print("{{END_YEAR}} =", END_YEAR)
    print("{{START_RATE}} =", START_RATE_TXT)
    print("{{END_RATE}} =", END_RATE_TXT)
    print("{{DELTA_PP}} =", DELTA_PP)
    print("{{DIRECTION}} =", DIRECTION)
    print("{{JUMP_Y1}} =", y1)
    print("{{JUMP_Y2}} =", y2)
    print("{{MAX_PA_PP}} =", MAX_PA_PP)
else:
    print("Only one year present — use the baseline-only wording (Option A).")
