In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('stroke_balanced_sample.csv')


target = df['stroke']
features = df.drop(columns=['stroke'])


In [3]:
features_sample = features.head(100)
target_sample = target.head(100)

In [4]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model = "mistral:latest")

def predict_stroke_llm(row, llm):
    prompt = f"""
You are a medical AI assistant. Your task is to assess whether a patient is at risk of stroke using clinical and lifestyle data. 
Follow these reasoning steps for each patient:
Step 1: Assess each risk factor: age, hypertension, heart disease, glucose, BMI, smoking.
Step 2: Identify combinations of risk factors that increase stroke risk.
Step 3: Make a prediction: 1 = high risk (stroke likely), 0 = low risk (stroke unlikely).
Step 4: Return prediction in this format: "Prediction: [0 or 1] - [reasoning]"

### Example 1:
- Gender: Female
- Age: 67
- Hypertension: 0
- Heart Disease: 1
- Ever Married: Yes
- Work Type: Private
- Residence Type: Urban
- Average Glucose Level: 228.69
- BMI: 36.6
- Smoking Status: formerly smoked

Reasoning:
Step 1: Elderly (67), heart disease, very high glucose (228.69).
Step 2: Combination of age + heart disease + glucose strongly increases stroke risk.
Prediction: 1 - Multiple high-risk factors.

### Example 2:
- Gender: Male
- Age: 45
- Hypertension: 0
- Heart Disease: 0
- Ever Married: Yes
- Work Type: Self-employed
- Residence Type: Rural
- Average Glucose Level: 85.6
- BMI: 24.0
- Smoking Status: never smoked

Reasoning:
Step 1: No major risk factors, normal glucose and BMI.
Step 2: Healthy profile overall.
Prediction: 0 - Low risk profile.

### Example 3:
- Gender: Female
- Age: 61
- Hypertension: 1
- Heart Disease: 0
- Ever Married: Yes
- Work Type: Govt_job
- Residence Type: Urban
- Average Glucose Level: 105.0
- BMI: 30.2
- Smoking Status: smokes

Reasoning:
Step 1: Age above 60, smoker, hypertensive.
Step 2: Smoking + hypertension + age moderately increase stroke risk.
Prediction: 1 - Combination of moderate risks justifies positive classification.

### Now evaluate this patient:
- Gender: {row['gender']}
- Age: {row['age']}
- Hypertension: {row['hypertension']}
- Heart Disease: {row['heart_disease']}
- Ever Married: {row['ever_married']}
- Work Type: {row['work_type']}
- Residence Type: {row['Residence_type']}
- Average Glucose Level: {row['avg_glucose_level']}
- BMI: {row['bmi']}
- Smoking Status: {row['smoking_status']}

Reasoning:
"""

    response = llm.invoke([
        {"role": "system", "content": "You are a medical expert specialized in stroke risk prediction. Think step by step and explain your reasoning before providing a final answer."},
        {"role": "user", "content": prompt}
    ])

    output = response.content.strip()

    # Naive binary extraction (adjust based on your LLM's style)
    return int('1' in output.split('\n')[0])


In [5]:
preds = features_sample.apply(lambda row: predict_stroke_llm(row, llm), axis=1)

In [6]:
acc = accuracy_score(target_sample, preds)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.50
