In [1]:
import pandas as pd
from openai import OpenAI
from sklearn.metrics import accuracy_score

In [None]:
client = OpenAI(api_key = 'sk-proj-')

In [3]:
df = pd.read_csv('cardio_train.csv')
print(df.columns)

Index(['id;age;gender;height;weight;ap_hi;ap_lo;cholesterol;gluc;smoke;alco;active;cardio'], dtype='object')


In [4]:
df = pd.read_csv('cardio_train.csv', delimiter=';')


target = df['cardio']
features = df.drop(columns=['cardio'])


In [5]:
features_sample = features.sample(n=60, random_state=42)
target_sample = target.loc[features_sample.index]

In [6]:
def llm_predict_cardiovascular(row):
    prompt = f"""You are given clinical data of a patient. Predict if they have cardiovascular disease.

Risk factors to consider:
- Advanced age (>55 years)
- Elevated blood pressure (≥140/90)
- High cholesterol and glucose levels
- Obesity (BMI ≥30)
- Smoking, alcohol intake, and physical inactivity

Analyze the clinical profile holistically and consider how multiple risk factors may compound.

Format your output as a single number: 0 (no cardiovascular disease) or 1 (cardiovascular disease)."""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a medical assistant specialized in analyzing cardiovascular health data to predict cardiovascular disease."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    output = response.choices[0].message.content.strip()
    
    # Extract prediction using a more reliable method
    import re
    match = re.search(r"FINAL_PREDICTION:\s*([01])", output)
    if match:
        return int(match.group(1))
    else:
        # Fallback: analyze text for prediction evidence
        cardio_indicators = ["cardiovascular disease", "high risk", "predict: 1", "prediction: 1"]
        for indicator in cardio_indicators:
            if indicator in output.lower():
                return 1
        return 0

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

def evaluate_model(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)
    
    print(f"Accuracy: {acc:.2f}")
    print(f"Precision: {prec:.2f}")
    print(f"Recall: {rec:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print(f"Confusion Matrix:")
    print(cm)
    
    return acc, prec, rec, f1, cm

In [8]:
# Step 1: Generate predictions using the improved function
preds_improved = features_sample.apply(llm_predict_cardiovascular, axis=1)

# Step 2: Evaluate the model with the new predictions
evaluate_model(target_sample, preds_improved)

Accuracy: 0.53
Precision: 0.60
Recall: 0.10
F1 Score: 0.18
Confusion Matrix:
[[29  2]
 [26  3]]


(0.5333333333333333,
 0.6,
 0.10344827586206896,
 0.17647058823529413,
 array([[29,  2],
        [26,  3]], dtype=int64))