In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('stroke_balanced_sample.csv')


target = df['stroke']
features = df.drop(columns=['stroke'])


In [3]:
features_sample = features.head(100)
target_sample = target.head(100)

In [4]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model = "llama3.1:latest")

def predict_stroke_llm(row, llm):
    few_shot_prompt = """
You are a medical AI assistant. Given patient data, predict the likelihood of a stroke. 
Respond with either 0 (no stroke) or 1 (stroke), and explain your reasoning clearly.

### Example 1:
- Gender: Female
- Age: 67
- Hypertension: 0
- Heart Disease: 1
- Ever Married: Yes
- Work Type: Private
- Residence Type: Urban
- Average Glucose Level: 228.69
- BMI: 36.6
- Smoking Status: formerly smoked

Prediction: 1 - Advanced age, high glucose, and heart disease are strong risk factors.

### Example 2:
- Gender: Male
- Age: 45
- Hypertension: 0
- Heart Disease: 0
- Ever Married: Yes
- Work Type: Self-employed
- Residence Type: Rural
- Average Glucose Level: 85.6
- BMI: 24.0
- Smoking Status: never smoked

Prediction: 0 - No major risk factors present, glucose and BMI are normal.

### Example 3:
- Gender: Female
- Age: 61
- Hypertension: 1
- Heart Disease: 0
- Ever Married: Yes
- Work Type: Govt_job
- Residence Type: Urban
- Average Glucose Level: 105.0
- BMI: 30.2
- Smoking Status: smokes

Prediction: 1 - Smoking, hypertension, and age increase stroke risk.

### Now evaluate this patient:
- Gender: {gender}
- Age: {age}
- Hypertension: {hypertension}
- Heart Disease: {heart_disease}
- Ever Married: {ever_married}
- Work Type: {work_type}
- Residence Type: {residence_type}
- Average Glucose Level: {glucose}
- BMI: {bmi}
- Smoking Status: {smoking}

Prediction:
""".format(
        gender=row['gender'],
        age=row['age'],
        hypertension=row['hypertension'],
        heart_disease=row['heart_disease'],
        ever_married=row['ever_married'],
        work_type=row['work_type'],
        residence_type=row['Residence_type'],
        glucose=row['avg_glucose_level'],
        bmi=row['bmi'],
        smoking=row['smoking_status']
    )

    response = llm.invoke([
        {"role": "system", "content": "You are a medical expert specialized in stroke prediction."},
        {"role": "user", "content": few_shot_prompt}
    ])

    output = response.content.strip()

    # Naive binary extraction (adjust based on your LLM's style)
    return int('1' in output.split('\n')[0])

In [5]:
preds = features_sample.apply(lambda row: predict_stroke_llm(row, llm), axis=1)

In [6]:
acc = accuracy_score(target_sample, preds)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.48
