# Heart Disease Prediction using Supervised Machine Learning

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import os
import joblib

In [4]:
# Load dataset
df=pd.read_csv('heart-disease.csv')

In [9]:
df.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [11]:
df.duplicated().sum()

np.int64(0)

In [5]:
# Separate features and target
X = df.drop("HeartDisease", axis=1)
y = df["HeartDisease"]

In [6]:
# Identify categorical and numerical columns
cat_cols = X.select_dtypes(include="object").columns.tolist()
num_cols = X.select_dtypes(exclude="object").columns.tolist()

In [7]:
cat_cols

['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

In [8]:
num_cols

['Age', 'RestingBP', 'Cholesterol', 'FastingBS', 'MaxHR', 'Oldpeak']

In [13]:
# One-hot encode categorical columns

ohe = OneHotEncoder(drop="first", sparse_output=False)
X_cat = ohe.fit_transform(X[cat_cols])

In [14]:
# Scale numerical columns

scaler = StandardScaler()
X_num = scaler.fit_transform(X[num_cols])

In [15]:
# Combine processed features
X_processed = np.hstack([X_cat, X_num])

In [17]:
# Train test split

X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y
)

In [18]:
# models

models = {
    "Random Forest": RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=50, max_depth=3, random_state=42),
    "SVC (RBF Kernel)": SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42),
    "Linear SVC": LinearSVC(C=1.0, max_iter=10000, random_state=42),
    "Logistic Regression": LogisticRegression(C=1.0, max_iter=5000, random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=3),
    "Decision Tree": DecisionTreeClassifier(max_depth=5, random_state=42)
}


In [19]:
import dagshub
dagshub.init(repo_owner='chandrasekharcse522',
             repo_name='Heart-Disease-Prediction-Machine-Learning',
             mlflow=True)

import mlflow
with mlflow.start_run():
  mlflow.log_param('parameter name', 'value')
  mlflow.log_metric('metric name', 1)

üèÉ View run rebellious-tern-298 at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/0/runs/b5e0474fbaf5443cb4fd44309d9880e9
üß™ View experiment at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/0


In [20]:
mlflow.set_experiment("Heart_Disease_Prediction")

2025/11/07 13:46:32 INFO mlflow.tracking.fluent: Experiment with name 'Heart_Disease_Prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/427d1ff204544c51ac656aefbd494b03', creation_time=1762582927680, experiment_id='1', last_update_time=1762582927680, lifecycle_stage='active', name='Heart_Disease_Prediction', tags={}>

In [22]:
best_model_name = None
best_accuracy = 0
best_model = None

for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Train
        model.fit(X_train, y_train)
        
        # Predict and evaluate
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        
        # Log parameters & metrics only
        mlflow.log_param("model_name", name)
        mlflow.log_metric("accuracy", acc)
        
        print(f"‚úÖ {name} Accuracy: {acc:.3f}")
        # Track best model
        if acc > best_accuracy:
            best_accuracy = acc
            best_model_name = name
            best_model = model

print("\n Best Model:", best_model_name)
print(" Best Accuracy:", best_accuracy)

‚úÖ Random Forest Accuracy: 0.837
üèÉ View run Random Forest at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1/runs/be460ba0e1a04495bd04db764f09c1d4
üß™ View experiment at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1
‚úÖ Gradient Boosting Accuracy: 0.891
üèÉ View run Gradient Boosting at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1/runs/657c87740c6843d9ad0109d1acf5ef0a
üß™ View experiment at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1
‚úÖ SVC (RBF Kernel) Accuracy: 0.886
üèÉ View run SVC (RBF Kernel) at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1/runs/0638f63eb742459289609967d5160e8c
üß™ View experiment at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-L

In [26]:
os.makedirs("models",exist_ok=True)

joblib.dump(best_model, "models/best_heart_model.pkl")
print("Best model saved successfully")

Best model saved successfully


## Predict single value

In [25]:
# Example single patient
new_patient = pd.DataFrame({
    "Age": [50],
    "Sex": ["M"],
    "ChestPainType": ["ATA"],
    "RestingBP": [140],
    "Cholesterol": [250],
    "FastingBS": [0],
    "RestingECG": ["Normal"],
    "MaxHR": [150],
    "ExerciseAngina": ["N"],
    "Oldpeak": [1.0],
    "ST_Slope": ["Up"]
})

In [27]:
# --- Preprocess ---
# 1Ô∏è‚É£ One-hot encode categorical features
X_cat_new = ohe.transform(new_patient[cat_cols])

# 2Ô∏è‚É£ Scale numeric features
X_num_new = scaler.transform(new_patient[num_cols])

# 3Ô∏è‚É£ Combine processed features
X_new_processed = np.hstack([X_cat_new, X_num_new])

# --- Predict ---
prediction = best_model.predict(X_new_processed)

# --- Output ---
if prediction[0] == 1:
    print("Predicted Heart Disease: Yes")
else:
    print("Predicted Heart Disease: No")

Predicted Heart Disease: No


In [28]:
# log to mlflow

import mlflow

# Example single patient (same as before)
new_patient = pd.DataFrame({
    "Age": [50],
    "Sex": ["M"],
    "ChestPainType": ["ATA"],
    "RestingBP": [140],
    "Cholesterol": [250],
    "FastingBS": [0],
    "RestingECG": ["Normal"],
    "MaxHR": [150],
    "ExerciseAngina": ["N"],
    "Oldpeak": [1.0],
    "ST_Slope": ["Up"]
})

# Preprocess
X_cat_new = ohe.transform(new_patient[cat_cols])
X_num_new = scaler.transform(new_patient[num_cols])
X_new_processed = np.hstack([X_cat_new, X_num_new])

# Predict
prediction = best_model.predict(X_new_processed)
pred_text = "Yes" if prediction[0] == 1 else "No"

print("Predicted Heart Disease:", pred_text)

# --- Log prediction to MLflow ---
mlflow.set_experiment("Heart_Disease_Prediction")

with mlflow.start_run(run_name="Single_Patient_Prediction"):
    # Log input features
    for col in new_patient.columns:
        mlflow.log_param(col, new_patient[col][0])
    
    # Log prediction
    mlflow.log_metric("prediction", prediction[0])
    
    mlflow.log_param("predicted_label", pred_text)

print("‚úÖ Prediction logged to MLflow successfully!")

Predicted Heart Disease: No
üèÉ View run Single_Patient_Prediction at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1/runs/c9282deaf06e4a0b830a8afed9c3617c
üß™ View experiment at: https://dagshub.com/chandrasekharcse522/Heart-Disease-Prediction-Machine-Learning.mlflow/#/experiments/1
‚úÖ Prediction logged to MLflow successfully!
