In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
dataset_path = "heart.csv"  
heart_data = pd.read_csv(dataset_path)

# Separate features and target
X = heart_data.drop(columns="target", axis=1)
Y = heart_data["target"]

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train_scaled, X_test_scaled, Y_train, Y_test = train_test_split(
    X_scaled, Y, test_size=0.2, stratify=Y, random_state=42
)

# Initialize models
log_reg = LogisticRegression(max_iter=2000, random_state=42)
decision_tree = DecisionTreeClassifier(max_depth=5, random_state=42)
random_forest = RandomForestClassifier(n_estimators=200, random_state=42)
svc = SVC(probability=True, kernel="rbf", random_state=42)
gradient_boost = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

# Train models
log_reg.fit(X_train_scaled, Y_train)
decision_tree.fit(X_train_scaled, Y_train)
random_forest.fit(X_train_scaled, Y_train)
svc.fit(X_train_scaled, Y_train)
gradient_boost.fit(X_train_scaled, Y_train)

# Create and train the Voting Classifier
voting_clf = VotingClassifier(
    estimators=[
        ("lr", log_reg),
        ("dt", decision_tree),
        ("rf", random_forest),
        ("svc", svc),
        ("gb", gradient_boost),
    ],
    voting="soft",
)
voting_clf.fit(X_train_scaled, Y_train)

# Evaluate models
models = {
    "Logistic Regression": log_reg,
    "Decision Tree": decision_tree,
    "Random Forest": random_forest,
    "Support Vector Classifier": svc,
    "Gradient Boosting": gradient_boost,
    "Voting Classifier": voting_clf,
}

accuracy_results = {}
for name, model in models.items():
    predictions = model.predict(X_test_scaled)
    accuracy = accuracy_score(Y_test, predictions)
    accuracy_results[name] = accuracy

# Display results
accuracy_df = pd.DataFrame(list(accuracy_results.items()), columns=["Model", "Accuracy"])
print("Model Accuracy Results:")
print(accuracy_df)

# Save the scaler and the Voting Classifier
scaler_filename = "heart_scaler.sav"
pickle.dump(scaler, open(scaler_filename, "wb"))

model_filename = "voting_classifier_heart.sav"
pickle.dump(voting_clf, open(model_filename, "wb"))

print(f"Scaler saved as {scaler_filename}.")
print(f"Voting Classifier saved as {model_filename}.")


Model Accuracy Results:
                       Model  Accuracy
0        Logistic Regression  0.803279
1              Decision Tree  0.786885
2              Random Forest  0.819672
3  Support Vector Classifier  0.836066
4          Gradient Boosting  0.819672
5          Voting Classifier  0.819672
Scaler saved as heart_scaler.sav.
Voting Classifier saved as voting_classifier_heart.sav.
