In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('stroke_balanced_sample.csv')


target = df['stroke']
features = df.drop(columns=['stroke'])


In [3]:
features_sample = features.head(100)
target_sample = target.head(100)

In [4]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model = "llama3.1:latest")

def predict_stroke_llm(row, llm):
    prompt = f"""
You are a medical AI assistant helping assess stroke risk. You will analyze patient data, assign a numerical risk score to each feature, sum the total risk, and then classify stroke likelihood as 0 (no stroke) or 1 (likely stroke). Think step-by-step and explain your reasoning.

### Instructions:
Step 1: Assign risk scores (0 to 2) for each of these:
- Age (0 = under 50, 1 = 50–65, 2 = over 65)
- Hypertension (0 = no, 2 = yes)
- Heart Disease (0 = no, 2 = yes)
- Average Glucose Level (0 = < 140, 1 = 140–200, 2 = > 200)
- BMI (0 = under 25, 1 = 25–30, 2 = over 30)
- Smoking Status (0 = never smoked, 1 = formerly smoked, 2 = smokes)

Step 2: Sum total risk score (0–12).
Step 3: If total score ≥ 5 → Prediction = 1 (stroke likely). Else → Prediction = 0.
Step 4: Format your final answer: "Prediction: [0 or 1] - [reason]"

Patient data:
- Gender: {row['gender']}
- Age: {row['age']}
- Hypertension: {row['hypertension']}
- Heart Disease: {row['heart_disease']}
- Ever Married: {row['ever_married']}
- Work Type: {row['work_type']}
- Residence Type: {row['Residence_type']}
- Average Glucose Level: {row['avg_glucose_level']}
- BMI: {row['bmi']}
- Smoking Status: {row['smoking_status']}

Now perform the feature-based scoring, sum the risk, and provide your prediction.
"""

    response = llm.invoke([
        {"role": "system", "content": "You are a medical AI expert in stroke risk assessment using structured reasoning."},
        {"role": "user", "content": prompt}
    ])

    output = response.content.strip()

    # Extract the final prediction
    if output.startswith("Prediction:"):
        label_line = output.splitlines()[0]
    else:
        label_line = next((line for line in output.splitlines() if "Prediction" in line), "")

    return int('1' in label_line.split()[1]) if 'Prediction' in label_line else -1


In [5]:
preds = features_sample.apply(lambda row: predict_stroke_llm(row, llm), axis=1)

In [7]:
acc = accuracy_score(target_sample, preds)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.55
