In [3]:
# 04_supervised_learning.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import os

# ------------------ Load dataset ------------------
df = pd.read_csv("../data/heart_selected.csv")

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ------------------ Define models ------------------
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=200, random_state=42),
    "SVM": SVC(probability=True, random_state=42)
}

# ------------------ Train, evaluate, and save results ------------------
results_path = "../results/evaluation_metrics.txt"
os.makedirs(os.path.dirname(results_path), exist_ok=True)

with open(results_path, "w") as f:
    f.write("Heart Disease Project - Model Evaluation Metrics\n")
    f.write("="*47 + "\n\n")
    
    for name, model in models.items():
        # Train
        model.fit(X_train, y_train)

        # Predict
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred)
        auc = roc_auc_score(y_test, y_proba) if y_proba is not None else "N/A"

        # Print to console
        print(f"\n--- {name} ---")
        print(report)
        print("Accuracy:", round(acc, 3))
        print("ROC AUC:", auc)

        # Save to file
        f.write(f"{name}\n")
        f.write("-" * 30 + "\n")
        f.write(f"Accuracy: {acc:.3f}\n")
        f.write(report + "\n")
        f.write(f"ROC AUC: {auc}\n\n")

print(f"\n✅ Evaluation metrics saved to {results_path}")



--- Logistic Regression ---
              precision    recall  f1-score   support

           0       0.83      0.91      0.87        32
           1       0.88      0.79      0.83        28

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60

Accuracy: 0.85
ROC AUC: 0.9542410714285714

--- Decision Tree ---
              precision    recall  f1-score   support

           0       0.68      0.72      0.70        32
           1       0.65      0.61      0.63        28

    accuracy                           0.67        60
   macro avg       0.67      0.66      0.66        60
weighted avg       0.67      0.67      0.67        60

Accuracy: 0.667
ROC AUC: 0.6875

--- Random Forest ---
              precision    recall  f1-score   support

           0       0.72      0.72      0.72        32
           1       0.68      0.68      0.68        28

    accuracy                    