In [5]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [6]:
df = pd.read_csv('stroke_balanced_sample.csv')


target = df['stroke']
features = df.drop(columns=['stroke'])


In [7]:
features_sample = features.head(100)
target_sample = target.head(100)

In [8]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model = "llama3.1:latest")

def predict_stroke_llm(row, llm):
    one_shot_prompt = f"""
You are a medical AI assistant. Your task is to assess a patient's risk of having a stroke based on their health and lifestyle features.

Please follow these steps:
Step 1: Evaluate individual risk factors like age, hypertension, heart disease, glucose level, BMI, and smoking.
Step 2: Consider how combinations of these increase stroke likelihood.
Step 3: Based on your analysis, give a prediction: 0 (low risk, no stroke) or 1 (high risk, stroke likely).
Step 4: Return your answer in the format: "Prediction: [0 or 1] - [reasoning]"

### Example Patient:
- Gender: Female
- Age: 67
- Hypertension: 0
- Heart Disease: 1
- Ever Married: Yes
- Work Type: Private
- Residence Type: Urban
- Average Glucose Level: 228.69
- BMI: 36.6
- Smoking Status: formerly smoked

Reasoning:
Step 1: Patient is elderly (67), has heart disease, and a very high glucose level (228.69).
Step 2: The combination of age, heart condition, and metabolic risk suggests high stroke risk.
Prediction: 1 - Multiple major risk factors indicate high likelihood of stroke.

### Now evaluate this patient:
- Gender: {row['gender']}
- Age: {row['age']}
- Hypertension: {row['hypertension']}
- Heart Disease: {row['heart_disease']}
- Ever Married: {row['ever_married']}
- Work Type: {row['work_type']}
- Residence Type: {row['Residence_type']}
- Average Glucose Level: {row['avg_glucose_level']}
- BMI: {row['bmi']}
- Smoking Status: {row['smoking_status']}

Reasoning:
"""

    response = llm.invoke([
        {"role": "system", "content": "You are a medical expert specialized in stroke risk prediction. Think step by step and explain your reasoning before the final answer."},
        {"role": "user", "content": one_shot_prompt}
    ])

    output = response.content.strip()

    # Naive binary extraction (adjust based on your LLM's style)
    return int('1' in output.split('\n')[0])


In [9]:
preds = features_sample.apply(lambda row: predict_stroke_llm(row, llm), axis=1)

In [10]:
acc = accuracy_score(target_sample, preds)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.53
