In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
data = pd.read_csv(url, names=column_names, na_values="?")

In [3]:
# Clean the data
data.dropna(inplace=True)
data["target"] = data["target"].map(lambda x: 1 if x > 0 else 0)

In [4]:
# Split features and target
X = data.drop("target", axis=1)
y = data["target"]

In [5]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Train the model
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

In [8]:
# Make predictions
y_pred = rf_classifier.predict(X_test_scaled)

In [9]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.88


In [10]:
# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.89      0.90        36
           1       0.84      0.88      0.86        24

    accuracy                           0.88        60
   macro avg       0.88      0.88      0.88        60
weighted avg       0.88      0.88      0.88        60



In [11]:
# Get feature importances
importances = rf_classifier.feature_importances_
feature_importances = pd.DataFrame({'feature': X.columns, 'importance': importances})
feature_importances = feature_importances.sort_values('importance', ascending=False)

In [12]:
print("\nFeature Importances:")
print(feature_importances)


Feature Importances:
     feature  importance
7    thalach    0.128595
11        ca    0.128311
12      thal    0.104479
9    oldpeak    0.103659
2         cp    0.103653
4       chol    0.097724
0        age    0.094957
3   trestbps    0.089756
8      exang    0.052210
10     slope    0.033971
1        sex    0.032460
6    restecg    0.016737
5        fbs    0.013488
