In [49]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [50]:
df = pd.read_csv('Heart_Disease_Prediction.csv')

In [51]:
df.columns = [
    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
    'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'
]

In [52]:
df['target'] = df['target'].map({'Presence': 1, 'Absence': 0})

In [53]:
X = df.drop('target', axis=1)
y = df['target']

In [54]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training samples: {X_train.shape[0]}")
print(f"Test samples: {X_test.shape[0]}")
print(f"Target distribution in train: {np.bincount(y_train)}")
print(f"Target distribution in test: {np.bincount(y_test)}\n")

Training samples: 216
Test samples: 54
Target distribution in train: [120  96]
Target distribution in test: [30 24]



In [45]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTraining set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")


Training set size: (216, 13)
Test set size: (54, 13)


In [55]:
print("="*50)
print("RANDOM FOREST MODEL")
print("="*50)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, y_pred_rf)

print(f"Random Forest Accuracy: {rf_acc:.4f}")
print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf))

print("\nConfusion Matrix (Random Forest):")
print(confusion_matrix(y_test, y_pred_rf))

RANDOM FOREST MODEL
Random Forest Accuracy: 0.8148

Classification Report (Random Forest):
              precision    recall  f1-score   support

           0       0.86      0.80      0.83        30
           1       0.77      0.83      0.80        24

    accuracy                           0.81        54
   macro avg       0.81      0.82      0.81        54
weighted avg       0.82      0.81      0.82        54


Confusion Matrix (Random Forest):
[[24  6]
 [ 4 20]]


In [56]:
print("\n" + "="*50)
print("XGBOOST MODEL")
print("="*50)

xgb_model = XGBClassifier(
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)
xgb_acc = accuracy_score(y_test, y_pred_xgb)

print(f"XGBoost Accuracy: {xgb_acc:.4f}")
print("\nClassification Report (XGBoost):")
print(classification_report(y_test, y_pred_xgb))

print("\nConfusion Matrix (XGBoost):")
print(confusion_matrix(y_test, y_pred_xgb))


XGBOOST MODEL
XGBoost Accuracy: 0.8148

Classification Report (XGBoost):
              precision    recall  f1-score   support

           0       0.86      0.80      0.83        30
           1       0.77      0.83      0.80        24

    accuracy                           0.81        54
   macro avg       0.81      0.82      0.81        54
weighted avg       0.82      0.81      0.82        54


Confusion Matrix (XGBoost):
[[24  6]
 [ 4 20]]


In [57]:
print("\n" + "="*50)
print("MODEL COMPARISON")
print("="*50)
print(f"Random Forest Accuracy: {rf_acc:.4f}")
print(f"XGBoost Accuracy:       {xgb_acc:.4f}")

# Optional: Feature importance (Random Forest)
print("\nRandom Forest Feature Importances:")
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf_model.feature_importances_
}).sort_values('Importance', ascending=False)
print(importance_df)


MODEL COMPARISON
Random Forest Accuracy: 0.8148
XGBoost Accuracy:       0.8148

Random Forest Feature Importances:
     Feature  Importance
2         cp    0.129436
7    thalach    0.118366
9    oldpeak    0.109802
11        ca    0.108405
12      thal    0.100866
4       chol    0.094452
0        age    0.083341
3   trestbps    0.071727
10     slope    0.062165
8      exang    0.050438
1        sex    0.045102
6    restecg    0.016691
5        fbs    0.009210
