In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

# Load dataset
df = pd.read_csv("heart.csv")

# Define target and features
X = df.drop("target", axis=1)
y = df["target"]

# Identify categorical and numerical columns
categorical_cols = ["cp", "restecg", "slope", "thal"]
numerical_cols = [col for col in X.columns if col not in categorical_cols]

# Preprocessing
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

# Combine preprocessors
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numerical_cols),
        ("cat", categorical_transformer, categorical_cols)
    ]
)

# Build full pipeline
clf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(random_state=42))
])

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline
clf.fit(X_train, y_train)

# Evaluate
y_pred = clf.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the full pipeline
joblib.dump(clf, "heart_disease_model.sav")
print("✅ Model saved as 'heart_disease_model.sav'")


Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.86      0.88        29
           1       0.88      0.91      0.89        32

    accuracy                           0.89        61
   macro avg       0.89      0.88      0.88        61
weighted avg       0.89      0.89      0.89        61

✅ Model saved as 'heart_disease_model.sav'


In [3]:
import joblib
import pandas as pd

# Load the trained pipeline
model = joblib.load("heart_disease_model.sav")

# Define feature names in the exact order as training data
feature_names = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
    "thalach", "exang", "oldpeak", "slope", "ca", "thal"
]

# Provide a sample input (this one should ideally predict healthy -> 0)
demo_input = [[
    35,     # age
    0,      # sex
    3,      # cp
    120,    # trestbps
    180,    # chol
    0,      # fbs
    0,      # restecg
    170,    # thalach
    0,      # exang
    0.0,    # oldpeak
    2,      # slope
    0,      # ca
    1       # thal
]]

# Convert to DataFrame
input_df = pd.DataFrame(demo_input, columns=feature_names)

# Predict
prediction = model.predict(input_df)
print("Prediction:", prediction[0])  # 0 = Healthy, 1 = Heart Disease


Prediction: 1
