# 1. Importing dependencies

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
# Preprocessing pipeline (reused)
preprocessor = Pipeline([
    ('scaler', StandardScaler()),
    ('select', SelectKBest(score_func=f_classif, k=10)),
    ('pca', PCA(n_components=8))
])


In [3]:
df = pd.read_csv(r"C:\CodeClause_Internship_Heart_Disease_Risk_Assessment\heart_train.csv")  # Replace with your filename

df.drop(columns="id", axis=1, inplace=True)  # Drop the 'id' column
# 🎯 3. Separate features and label
X = df.drop("cardio", axis=1)
y = df["cardio"]

# 2. Split your cleaned data (after outlier handling, etc.)

In [4]:
X = df.drop(columns='cardio')
y = df['cardio']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


# 3. Define all model pipelines

In [5]:
# XGBoost
xgb_pipe = Pipeline([
    ('pre', preprocessor),
    ('model', XGBClassifier(use_label_encoder=False, eval_metric='logloss'))
])

# Random Forest
rf_pipe = Pipeline([
    ('pre', preprocessor),
    ('model', RandomForestClassifier())
])

# Logistic Regression
logreg_pipe = Pipeline([
    ('pre', preprocessor),
    ('model', LogisticRegression(max_iter=1000))
])


# 4. Fit and Evaluate

In [6]:
def evaluate_model(name, pipeline):
    pipeline.fit(X_train, y_train)
    preds = pipeline.predict(X_test)
    acc = accuracy_score(y_test, preds)
    print(f"🔍 {name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds))
    return pipeline

xgb_model = evaluate_model("XGBoost", xgb_pipe)
rf_model = evaluate_model("Random Forest", rf_pipe)
logreg_model = evaluate_model("Logistic Regression", logreg_pipe)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🔍 XGBoost Accuracy: 0.7194
              precision    recall  f1-score   support

           0       0.71      0.74      0.72      7004
           1       0.73      0.70      0.71      6996

    accuracy                           0.72     14000
   macro avg       0.72      0.72      0.72     14000
weighted avg       0.72      0.72      0.72     14000

🔍 Random Forest Accuracy: 0.6989
              precision    recall  f1-score   support

           0       0.70      0.69      0.70      7004
           1       0.70      0.70      0.70      6996

    accuracy                           0.70     14000
   macro avg       0.70      0.70      0.70     14000
weighted avg       0.70      0.70      0.70     14000

🔍 Logistic Regression Accuracy: 0.7105
              precision    recall  f1-score   support

           0       0.70      0.74      0.72      7004
           1       0.73      0.68      0.70      6996

    accuracy                           0.71     14000
   macro avg       0.71      

# 5. Voting classifier

In [7]:
voting_pipe = Pipeline([
    ('pre', preprocessor),
    ('voting', VotingClassifier(estimators=[
        ('xgb', xgb_model.named_steps['model']),
        ('rf', rf_model.named_steps['model']),
        ('logreg', logreg_model.named_steps['model'])
    ], voting='soft'))  # soft = uses probability
])

# Evaluate Ensemble
voting_model = evaluate_model("Voting Classifier (Ensemble)", voting_pipe)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🔍 Voting Classifier (Ensemble) Accuracy: 0.7175
              precision    recall  f1-score   support

           0       0.71      0.73      0.72      7004
           1       0.72      0.71      0.71      6996

    accuracy                           0.72     14000
   macro avg       0.72      0.72      0.72     14000
weighted avg       0.72      0.72      0.72     14000

