In [6]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [7]:
df = pd.read_csv('stroke_balanced_sample.csv')


target = df['stroke']
features = df.drop(columns=['stroke'])


In [8]:
features_sample = features.head(100)
target_sample = target.head(100)

In [9]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model = "llama3.1:latest")

def predict_stroke_llm(row, llm):
    prompt = f"""
You are a medical AI expert. You must predict stroke risk based on patient features. For each patient, follow this process:

### Instructions:
Step 1: Assign a risk score (0 to 2) for each of the following:
- Age: 0 = under 50, 1 = 50–65, 2 = over 65
- Hypertension: 0 = no, 2 = yes
- Heart Disease: 0 = no, 2 = yes
- Average Glucose Level: 0 = <140, 1 = 140–200, 2 = >200
- BMI: 0 = <25, 1 = 25–30, 2 = >30
- Smoking: 0 = never smoked, 1 = formerly smoked, 2 = smokes

Step 2: Sum all risk scores. If total score is 5 or more → stroke likely (1), otherwise not likely (0).

Step 3: Output your prediction in this format:
"Prediction: [0 or 1] - [explanation of reasoning]"

---

### Example Patient:
- Gender: Female
- Age: 67
- Hypertension: 0
- Heart Disease: 1
- Ever Married: Yes
- Work Type: Private
- Residence Type: Urban
- Average Glucose Level: 228.69
- BMI: 36.6
- Smoking Status: formerly smoked

Scoring:
- Age: 2 (over 65)
- Hypertension: 0
- Heart Disease: 2
- Glucose: 2 (>200)
- BMI: 2 (>30)
- Smoking: 1 (formerly smoked)

Total Risk Score: 9  
Prediction: 1 - Age, heart disease, and high glucose/BMI suggest high stroke risk.

---

### Now analyze this patient:
- Gender: {row['gender']}
- Age: {row['age']}
- Hypertension: {row['hypertension']}
- Heart Disease: {row['heart_disease']}
- Ever Married: {row['ever_married']}
- Work Type: {row['work_type']}
- Residence Type: {row['Residence_type']}
- Average Glucose Level: {row['avg_glucose_level']}
- BMI: {row['bmi']}
- Smoking Status: {row['smoking_status']}

Scoring:
"""

    response = llm.invoke([
        {"role": "system", "content": "You are a clinical decision support expert. Follow the risk scoring method exactly and provide your prediction based on the instructions."},
        {"role": "user", "content": prompt}
    ])

    output = response.content.strip()

    # Extract final prediction
    if output.startswith("Prediction:"):
        label_line = output.splitlines()[0]
    else:
        label_line = next((line for line in output.splitlines() if "Prediction" in line), "")

    return int('1' in label_line.split()[1]) if 'Prediction' in label_line else -1


In [None]:
preds = features_sample.apply(lambda row: predict_stroke_llm(row, llm), axis=1)

In [None]:
acc = accuracy_score(target_sample, preds)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.61
