<a href="https://colab.research.google.com/github/Zhan5ik/ai-student-coach/blob/main/notebooks/student_risk_decision_support.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import zipfile
import urllib.request

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00320/student.zip"
urllib.request.urlretrieve(url, "student.zip")

with zipfile.ZipFile("student.zip", "r") as zip_ref:
    zip_ref.extractall()

df = pd.read_csv("student-mat.csv", sep=";")
df.head()


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['at_risk'] = (df['G3'] < 10).astype(int)

df['at_risk'].value_counts()

In [None]:
import pandas as pd
import zipfile
import urllib.request

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00320/student.zip"
urllib.request.urlretrieve(url, "student.zip")

with zipfile.ZipFile("student.zip", "r") as zip_ref:
    zip_ref.extractall()

df = pd.read_csv("student-mat.csv", sep=";")

df.head()


In [None]:
df['at_risk'] = (df['G3'] < 10).astype(int)

df['at_risk'].value_counts()


In [None]:
features = [
    'studytime',
    'failures',
    'absences',
    'schoolsup',
    'famsup',
    'internet',
    'health',
    'Dalc',
    'Walc',
    'freetime',
    'goout'
]

X = df[features]
y = df['at_risk']


In [None]:
X = pd.get_dummies(X, drop_first=True)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)


In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


In [None]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
model = LogisticRegression(
    max_iter=1000,
    class_weight={0: 1, 1: 2}
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))


In [None]:
!pip install shap


In [None]:
import shap

explainer = shap.LinearExplainer(model, X_train)
shap_values = explainer.shap_values(X_test)


In [None]:
import shap
import numpy as np

background = X_train.sample(50, random_state=42)

explainer = shap.KernelExplainer(
    model.predict_proba,
    background
)

shap_values = explainer.shap_values(X_test[:50])


In [None]:
type(shap_values), len(shap_values)


In [None]:
shap.summary_plot(
    shap_values,
    X_test.iloc[:50],
    feature_names=X_test.columns
)


In [None]:
student = X_test.iloc[[0]]
student

In [None]:
pred = model.predict(student)[0]
prob = model.predict_proba(student)[0][1]

print("At risk:", pred)
print("Risk probability:", round(prob, 2))

In [None]:
shap_values_student = explainer(student)

In [None]:
shap.plots.waterfall(shap_values_student[0, :, 1])

In [None]:
def generate_recommendations(shap_row, feature_names):
    recs = []
    for value, feature in zip(shap_row, feature_names):
        if feature == 'failures' and value > 0:
            recs.append("High number of past failures — academic counseling recommended.")
        if feature == 'studytime' and value > 0:
            recs.append("Low study time — suggest structured study schedule.")
    return recs


In [None]:
recs = generate_recommendations(
    shap_values_student.values[0, :, 1],
    student.columns
)

recs


In [None]:
base_student = student.copy()

In [None]:
def predict_risk(df_row):
    prob = model.predict_proba(df_row)[0][1]
    return round(prob, 3)

In [None]:
base_risk = predict_risk(base_student)

In [None]:
what_if_scenarios = {
    "Increase study time": {"studytime": base_student["studytime"].values[0] + 2},
    "Reduce failures": {"failures": max(0, base_student["failures"].values[0] - 1)},
    "Less going out": {"goout": max(1, base_student["goout"].values[0] - 2)},
    "Improve health": {"health": min(5, base_student["health"].values[0] + 2)}
}

results = []

In [None]:
for scenario, changes in what_if_scenarios.items():
    modified = base_student.copy()

    for feature, new_value in changes.items():
        modified[feature] = new_value

    new_risk = predict_risk(modified)

    results.append({
        "Scenario": scenario,
        "Original risk": base_risk,
        "New risk": new_risk,
        "Risk change": round(new_risk - base_risk, 3)
    })

what_if_df = pd.DataFrame(results)
what_if_df

In [None]:
explainer = shap.Explainer(model, X_test)

shap_base = explainer(base_student)
shap_modified = explainer(modified)

shap.plots.waterfall(shap_base[0])
shap.plots.waterfall(shap_modified[0])

In [None]:
def run_batch_what_if(
    model,
    base_student,
    scenarios,
    feature_columns
):

    original_risk = model.predict_proba(
        base_student[feature_columns]
    )[0, 1]

    results = []

    for scenario_name, changes in scenarios.items():
        modified_student = base_student.copy()

        for feature, new_value in changes.items():
            modified_student[feature] = new_value

        new_risk = model.predict_proba(
            modified_student[feature_columns]
        )[0, 1]

        results.append({
            "Scenario": scenario_name,
            "Original risk": round(original_risk, 3),
            "New risk": round(new_risk, 3),
            "Risk change": round(new_risk - original_risk, 3)
        })

    return pd.DataFrame(results)

In [None]:
batch_results = run_batch_what_if(
    model=model,
    base_student=base_student,
    scenarios=what_if_scenarios,
    feature_columns=X.columns
)

batch_results

In [None]:
probs = model.predict_proba(X)[:, 1]

batch_results = X.copy()
batch_results["risk_probability"] = probs
batch_results["at_risk"] = (probs > 0.5).astype(int)


In [None]:
shap_values = explainer(X)
shap_at_risk = shap_values.values

In [None]:
def decision_logic(shap_row, features, prob):
    actions = []

    if prob > 0.75:
        actions.append("URGENT academic intervention")

    for value, feature in zip(shap_row, features):
        if feature == "failures" and value > 0:
            actions.append("Academic counseling")
        if feature == "studytime" and value > 0:
            actions.append("Structured study plan")
        if feature == "absences" and value > 0:
            actions.append("Attendance monitoring")
        if feature == "goout" and value > 0:
            actions.append("Lifestyle balance coaching")

    return list(set(actions))

In [None]:
batch_results["recommended_actions"] = [
    decision_logic(
        shap_at_risk[i],
        X.columns,
        batch_results.iloc[i]["risk_probability"]
    )
    for i in range(len(X))
]

In [None]:
priority_students = (
    batch_results
    .sort_values("risk_probability", ascending=False)
    .head(20)
)

priority_students[[
    "risk_probability",
    "recommended_actions"
]]

In [None]:
def decision_logic(shap_row, features, prob):
    actions = []
    priority = "LOW"

    if prob >= 0.85:
        priority = "CRITICAL"
    elif prob >= 0.7:
        priority = "HIGH"
    elif prob >= 0.55:
        priority = "MEDIUM"

    for value, feature in zip(shap_row, features):
        if value <= 0:
            continue

        if feature == "failures":
            actions.append("Academic counseling")

        elif feature == "studytime":
            actions.append("Structured study plan")

        elif feature == "absences":
            actions.append("Attendance monitoring")

        elif feature == "goout":
            actions.append("Lifestyle balance coaching")

    if priority == "CRITICAL":
        actions.append("URGENT academic intervention")

    return {
        "priority": priority,
        "actions": list(set(actions))
    }

In [None]:
batch_results["decision"] = [
    decision_logic(
        shap_at_risk[i],
        X.columns,
        batch_results.iloc[i]["risk_probability"]
    )
    for i in range(len(X))
]

In [None]:
i = 19
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
i = 59
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
i = 0
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
shap_row = shap_at_risk[i]

for feature, value in zip(X.columns, shap_row):
    if abs(value) > 0.05:
        print(feature, round(value, 3))


In [None]:
i = 23
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
i = 21
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
i = 22
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
i = 25
print("Risk probability:", batch_results.iloc[i]["risk_probability"])
print("At risk:", batch_results.iloc[i]["at_risk"])
print("Recommended actions:", batch_results.iloc[i]["recommended_actions"])


In [None]:
shap_row = shap_at_risk[i]

for feature, value in zip(X.columns, shap_row):
    if abs(value) > 0.05:
        print(feature, round(value, 3))