In [64]:
# 💓 Heart Disease Prediction - All-in-One Script

# Step 1: Install required libraries (run only once in Jupyter)
!pip install pandas scikit-learn joblib

# Step 2: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Step 3: Load dataset from UCI repository
# Changed URL to the UCI heart disease dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"

# Define column names as they're not included in the raw data
column_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 
                'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
df = pd.read_csv(url, header=None, names=column_names)

# Handle missing values (? characters in the dataset)
df = df.replace('?', pd.NA).dropna()

# Convert columns to appropriate types
for col in ['ca', 'thal', 'target']:
    df[col] = pd.to_numeric(df[col])

# Modify target to binary (0 = no disease, 1 = disease)
df['target'] = df['target'].apply(lambda x: 0 if x == 0 else 1)

# Step 4: Prepare features and target
X = df.drop("target", axis=1)
y = df["target"]

# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the model
model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate the model
y_pred = model.predict(X_test)
print("📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📄 Classification Report:\n", classification_report(y_test, y_pred))

# Step 8: Save the trained model
joblib.dump(model, "heart_disease_model.pkl")
print("✅ Model saved as 'heart_disease_model.pkl'")

# Step 9: Predict custom patient input
input_dict = {
    'age': [45],
    'sex': [0],
    'cp': [1],
    'trestbps': [130],
    'chol': [220],
    'fbs': [0],
    'restecg': [0],
    'thalach': [180],
    'exang': [0],
    'oldpeak': [0.0],
    'slope': [2],
    'ca': [0],
    'thal': [1]
}
input_df = pd.DataFrame(input_dict)

# Step 10: Predict and show confidence
model = joblib.load("heart_disease_model.pkl")
proba = model.predict_proba(input_df)
print(f"\n🔍 Prediction Confidence (0 = Low Risk, 1 = High Risk): {proba}")

# Step 11: Interpret prediction
if proba[0][1] > 0.7:
    print("❗ High Risk of Heart Disease")
elif proba[0][1] < 0.3:
    print("✅ Low Risk of Heart Disease")
else:
    print("⚠️ Moderate Risk of Heart Disease")

Defaulting to user installation because normal site-packages is not writeable
📊 Confusion Matrix:
 [[32  4]
 [ 3 21]]

📄 Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.89      0.90        36
           1       0.84      0.88      0.86        24

    accuracy                           0.88        60
   macro avg       0.88      0.88      0.88        60
weighted avg       0.88      0.88      0.88        60

✅ Model saved as 'heart_disease_model.pkl'

🔍 Prediction Confidence (0 = Low Risk, 1 = High Risk): [[0.9 0.1]]
✅ Low Risk of Heart Disease
