In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the preprocessed data
data = pd.read_csv("preprocessed_heart.csv")

# Step 1: Split the data into features (X) and target (y)
X = data.drop(columns=['target'])  # All columns except target
y = data['target']                 # Target column

# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")

# Step 3: Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train, y_train)
print("\nModel training complete.")

# Step 4: Make predictions
y_pred = model.predict(X_test)

# Step 5: Evaluate the model
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy * 100:.2f}%")

# Save the trained model (optional, for deployment)
import joblib
joblib.dump(model, "heart_disease_model.pkl")
print("\nTrained model saved as 'heart_disease_model.pkl'")

Training samples: 241, Testing samples: 61

Model training complete.

Confusion Matrix:
[[26  3]
 [ 5 27]]

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.90      0.84      0.87        32

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61


Accuracy: 86.89%

Trained model saved as 'heart_disease_model.pkl'
