In [6]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [7]:
df = pd.read_csv('stroke_balanced_sample.csv')


target = df['stroke']
features = df.drop(columns=['stroke'])


In [8]:
features_sample = features.head(100)
target_sample = target.head(100)

In [9]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model = "llama3.1:latest")

def predict_stroke_llm(row, llm):
    prompt = f"""
You are a medical AI expert. You must predict whether a patient is at risk of stroke. 
Use the following method for every patient:

### Instructions:
Step 1: Assign a risk score (0 to 2) for each feature:
- Age: 0 if <50, 1 if 50–65, 2 if >65
- Hypertension: 0 = no, 2 = yes
- Heart Disease: 0 = no, 2 = yes
- Average Glucose Level: 0 = <140, 1 = 140–200, 2 = >200
- BMI: 0 = <25, 1 = 25–30, 2 = >30
- Smoking Status: 0 = never, 1 = formerly, 2 = smokes

Step 2: Sum the total risk score (0–12).  
Step 3: If score ≥ 5 → stroke likely (1), else unlikely (0).  
Step 4: Format your output:  
"Prediction: [0 or 1] - [brief reasoning]"

---

### Example 1:
- Gender: Female
- Age: 67
- Hypertension: 0
- Heart Disease: 1
- Ever Married: Yes
- Work Type: Private
- Residence Type: Urban
- Average Glucose Level: 228.69
- BMI: 36.6
- Smoking Status: formerly smoked

Scoring:
- Age: 2
- Hypertension: 0
- Heart Disease: 2
- Glucose: 2
- BMI: 2
- Smoking: 1  
Total Risk Score: 9  
Prediction: 1 - Multiple critical risk factors present.

---

### Example 2:
- Gender: Male
- Age: 45
- Hypertension: 0
- Heart Disease: 0
- Ever Married: Yes
- Work Type: Self-employed
- Residence Type: Rural
- Average Glucose Level: 85.6
- BMI: 24.0
- Smoking Status: never smoked

Scoring:
- Age: 0
- Hypertension: 0
- Heart Disease: 0
- Glucose: 0
- BMI: 0
- Smoking: 0  
Total Risk Score: 0  
Prediction: 0 - No significant risk factors.

---

### Example 3:
- Gender: Female
- Age: 61
- Hypertension: 1
- Heart Disease: 0
- Ever Married: Yes
- Work Type: Govt_job
- Residence Type: Urban
- Average Glucose Level: 150.0
- BMI: 29.0
- Smoking Status: smokes

Scoring:
- Age: 1
- Hypertension: 2
- Heart Disease: 0
- Glucose: 1
- BMI: 1
- Smoking: 2  
Total Risk Score: 7  
Prediction: 1 - Risk is elevated due to hypertension, smoking, and moderate glucose/BMI.

---

### Now evaluate this patient:
- Gender: {row['gender']}
- Age: {row['age']}
- Hypertension: {row['hypertension']}
- Heart Disease: {row['heart_disease']}
- Ever Married: {row['ever_married']}
- Work Type: {row['work_type']}
- Residence Type: {row['Residence_type']}
- Average Glucose Level: {row['avg_glucose_level']}
- BMI: {row['bmi']}
- Smoking Status: {row['smoking_status']}

Scoring:
"""

    response = llm.invoke([
        {"role": "system", "content": "You are a medical AI trained for clinical reasoning and structured stroke risk prediction."},
        {"role": "user", "content": prompt}
    ])

    output = response.content.strip()

    # Extract final prediction
    if output.startswith("Prediction:"):
        label_line = output.splitlines()[0]
    else:
        label_line = next((line for line in output.splitlines() if "Prediction" in line), "")

    return int('1' in label_line.split()[1]) if 'Prediction' in label_line else -1


In [10]:
preds = features_sample.apply(lambda row: predict_stroke_llm(row, llm), axis=1)

KeyboardInterrupt: 

In [None]:
acc = accuracy_score(target_sample, preds)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.61
