In [2]:
import pandas as pd

# ---- Load data ----
df = pd.read_csv("../data/synthetic_students_v1.csv")

# ---- Basic validation (defensive, professional) ----
expected_cols = [
    "student_id",
    "program_type",
    "career_path",
    "week_in_program",
    "milestone_stage",
    "observable_engagement"
]

missing = set(expected_cols) - set(df.columns)
if missing:
    raise ValueError(f"Missing columns in CSV: {missing}")

df["week_in_program"] = df["week_in_program"].astype(int)
df["observable_engagement"] = df["observable_engagement"].astype(int)

# ---- Status classification logic (V1) ----
def classify_status(row):
    path = row["career_path"]
    w = row["week_in_program"]
    stage = row["milestone_stage"]
    engaged = row["observable_engagement"] == 1

    # Default assumption
    status = "On Track"

    if path == "Consulting":
        if w >= 9 and stage in ("None", "Applying") and not engaged:
            status = "At Risk"
        elif w >= 7 and stage == "None":
            status = "Behind"

    elif path == "Tech":
        if w >= 13 and stage in ("None", "Applying") and not engaged:
            status = "At Risk"
        elif w >= 11 and stage == "None":
            status = "Behind"

    elif path == "Healthcare":
        if w >= 15 and stage in ("None", "Applying") and not engaged:
            status = "At Risk"
        elif w >= 13 and stage == "None":
            status = "Behind"

    elif path == "Finance":
        if w >= 11 and stage in ("None", "Applying") and not engaged:
            status = "At Risk"
        elif w >= 9 and stage == "None":
            status = "Behind"

    elif path == "Undecided":
        if w >= 9 and stage == "None" and not engaged:
            status = "At Risk"
        elif w >= 7 and stage == "None" and not engaged:
            status = "Behind"

    return status

# ---- Recommended action logic ----
def recommend_action(row):
    path = row["career_path"]
    status = row["status"]
    stage = row["milestone_stage"]
    engaged = row["observable_engagement"] == 1

    if stage == "Offer":
        return "No action: Celebrate + optional offer evaluation resources"

    if stage == "Interviewing":
        return "Recommend: Interview prep / mock interview resources"

    if status == "On Track":
        if stage == "Applying":
            return f"Recommend: {path} recruiting tips + next relevant workshop"
        return "Recommend: Light-touch resource roundup"

    if status == "Behind":
        if not engaged:
            return f"Send: {path} timeline reminder + top 2 workshops to attend"
        return f"Recommend: Next-step checklist for {path}"

    # At Risk
    if not engaged:
        return f"Send: High-urgency nudge + 'start here' resource path for {path}"
    return f"Recommend: Targeted support bundle for {path}"

# ---- Apply rules ----
df["status"] = df.apply(classify_status, axis=1)
df["recommended_action"] = df.apply(recommend_action, axis=1)

# ---- Display results ----
df.sort_values(["career_path", "week_in_program", "student_id"])


Unnamed: 0,student_id,program_type,career_path,week_in_program,milestone_stage,observable_engagement,status,recommended_action
0,S001,Specialized_MS,Consulting,5,Applying,1,On Track,Recommend: Consulting recruiting tips + next r...
21,S022,Specialized_MS,Consulting,6,Applying,1,On Track,Recommend: Consulting recruiting tips + next r...
1,S002,Specialized_MS,Consulting,8,,0,Behind,Send: Consulting timeline reminder + top 2 wor...
2,S003,Specialized_MS,Consulting,9,Applying,0,At Risk,Send: High-urgency nudge + 'start here' resour...
3,S004,Specialized_MS,Consulting,10,Interviewing,1,On Track,Recommend: Interview prep / mock interview res...
12,S013,Specialized_MS,Finance,6,Applying,1,On Track,Recommend: Finance recruiting tips + next rele...
22,S023,Specialized_MS,Finance,8,Interviewing,1,On Track,Recommend: Interview prep / mock interview res...
13,S014,Specialized_MS,Finance,9,,0,Behind,Send: Finance timeline reminder + top 2 worksh...
14,S015,Specialized_MS,Finance,11,Applying,0,At Risk,Send: High-urgency nudge + 'start here' resour...
15,S016,Specialized_MS,Finance,12,Interviewing,1,On Track,Recommend: Interview prep / mock interview res...


In [3]:
# ---- Export outputs ----
df_out = df.sort_values(["career_path", "week_in_program", "student_id"]).copy()

output_path = "../outputs/intervention_recommendations_v1.csv"
df_out.to_csv(output_path, index=False)

output_path

'../outputs/intervention_recommendations_v1.csv'

In [4]:
# ---- Executive summary: counts by career path and status ----
status_summary = (
    df.groupby(["career_path", "status"])
      .size()
      .reset_index(name="student_count")
      .sort_values(["career_path", "status"])
)

status_summary

Unnamed: 0,career_path,status,student_count
0,Consulting,At Risk,1
1,Consulting,Behind,1
2,Consulting,On Track,3
3,Finance,At Risk,1
4,Finance,Behind,1
5,Finance,On Track,3
6,Healthcare,At Risk,1
7,Healthcare,Behind,1
8,Healthcare,On Track,3
9,Tech,Behind,2


In [5]:
# ---- Export executive summary ----
summary_path = "../outputs/status_summary_v1.csv"
status_summary.to_csv(summary_path, index=False)

summary_path

'../outputs/status_summary_v1.csv'