In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

from ipywidgets import interact_manual, widgets
from IPython.display import display, Markdown

sns.set_theme(style="whitegrid", palette="muted")
plt.rcParams["figure.figsize"] = (10, 5)

def find_project_root(start: Path | None = None) -> Path:
    start = (start or Path.cwd()).resolve()
    for p in [start] + list(start.parents):
        if (p / "data" / "processed" / "stroke_clean_v1.csv").exists():
            return p
        if (p / "data" / "raw" / "healthcare-dataset-stroke-data.csv").exists():
            return p
    raise FileNotFoundError(
        "Could not find project root. Expected to find:\n"
        "  data/processed/stroke_clean_v1.csv  or  data/raw/healthcare-dataset-stroke-data.csv\n"
        f"Starting from: {start}"
    )

PROJECT_ROOT = find_project_root()
PROCESSED_PATH = PROJECT_ROOT / "data" / "processed" / "stroke_clean_v1.csv"
df = pd.read_csv(PROCESSED_PATH)

target = "stroke"
num_features = ["age", "avg_glucose_level", "bmi"]
bin_features = ["hypertension", "heart_disease"]
cat_features = ["gender", "ever_married", "Residence_type", "work_type", "smoking_status"]
features = num_features + bin_features + cat_features

X = df[features].copy()
y = df[target].astype(int)

preprocess = ColumnTransformer(
    transformers=[
        ("num", "passthrough", num_features + bin_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features),
    ],
    remainder="drop"
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

model = Pipeline(steps=[
    ("prep", preprocess),
    ("clf", LogisticRegression(max_iter=3000, class_weight="balanced"))
])

model.fit(X_train, y_train)

DISCLAIMER = (
    "Demo only: This score is generated from a simple statistical model trained on this dataset. "
    "It is not a medical tool and should not be used for diagnosis or clinical decisions."
)

def predict_score(user_row: pd.DataFrame) -> float:
    prob = model.predict_proba(user_row)[0, 1]
    return float(prob * 100)

gender_opts = sorted(df["gender"].dropna().unique().tolist())
married_opts = sorted(df["ever_married"].dropna().unique().tolist())
res_opts = sorted(df["Residence_type"].dropna().unique().tolist())
work_opts = sorted(df["work_type"].dropna().unique().tolist())
smoke_opts = sorted(df["smoking_status"].dropna().unique().tolist())

yes_no_map = {"No": 0, "Yes": 1}

@interact_manual(
    age=widgets.BoundedIntText(value=50, min=0, max=100, description="Age"),
    avg_glucose_level=widgets.BoundedFloatText(value=100, min=0, max=500, description="Glucose"),
    bmi=widgets.BoundedFloatText(value=25, min=0, max=80, description="BMI"),
    hypertension=widgets.Dropdown(options=["No", "Yes"], description="Hypertension"),
    heart_disease=widgets.Dropdown(options=["No", "Yes"], description="Heart Disease"),
    gender=widgets.Dropdown(options=gender_opts, description="Gender"),
    ever_married=widgets.Dropdown(options=married_opts, description="Married"),
    Residence_type=widgets.Dropdown(options=res_opts, description="Residence"),
    work_type=widgets.Dropdown(options=work_opts, description="Work Type"),
    smoking_status=widgets.Dropdown(options=smoke_opts, description="Smoking"),
)
def risk_demo_form(age, avg_glucose_level, bmi, hypertension, heart_disease,
                   gender, ever_married, Residence_type, work_type, smoking_status):

    user = pd.DataFrame([{
        "age": age,
        "avg_glucose_level": avg_glucose_level,
        "bmi": bmi,
        "hypertension": yes_no_map[hypertension],
        "heart_disease": yes_no_map[heart_disease],
        "gender": gender,
        "ever_married": ever_married,
        "Residence_type": Residence_type,
        "work_type": work_type,
        "smoking_status": smoking_status
    }])

    score = predict_score(user)

    display(Markdown(f"**{DISCLAIMER}**"))

    if score < 5:
        level = "Low"
        color = "green"
    elif score < 15:
        level = "Moderate"
        color = "orange"
    else:
        level = "High"
        color = "red"

    display(Markdown(f"### Predicted Stroke Risk Score: **{score:.1f} / 100**"))
    display(Markdown(f"**Risk Level:** <span style='color:{color}'>{level}</span>"))

FileNotFoundError: Could not find project root. Expected to find:
  data/processed/stroke_clean_v1.csv  or  data/raw/healthcare-dataset-stroke-data.csv
Starting from: C:\Users\USER\AppData\Local\Packages\5319275A.WhatsAppDesktop_cv1g1gvanyjgm\LocalState\sessions\770D9287370B556BB1AC7E7813FEA0E8AECA0921\transfers\2026-04