In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

In [2]:

# Load the Cleveland Heart Disease Dataset
data = pd.read_csv('heart.csv')

# Preprocessing the dataset
# Replace missing values with the median of each column
data = data.fillna(data.median())

In [3]:
# Split the dataset into training and testing sets
X = data.drop(['target'], axis=1)
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
# Standardize the data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [5]:
# Build individual machine learning models
lr = LogisticRegression(random_state=0)
dt = DecisionTreeClassifier(random_state=0)
rf = RandomForestClassifier(random_state=0)
ab = AdaBoostClassifier(random_state=0)

In [7]:
# Train individual models
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)
ab.fit(X_train, y_train)

AdaBoostClassifier(random_state=0)

In [8]:
# Evaluate individual models
lr_pred = lr.predict(X_test)
dt_pred = dt.predict(X_test)
rf_pred = rf.predict(X_test)
ab_pred = ab.predict(X_test)

In [9]:
print('Logistic Regression Scores:')
print('Accuracy: ', accuracy_score(y_test, lr_pred))
print('Precision: ', precision_score(y_test, lr_pred))
print('Recall: ', recall_score(y_test, lr_pred))
print('F1-score: ', f1_score(y_test, lr_pred))
print('\n')

Logistic Regression Scores:
Accuracy:  0.8634146341463415
Precision:  0.8264462809917356
Recall:  0.9345794392523364
F1-score:  0.8771929824561404




In [10]:
print('Decision Tree Scores:')
print('Accuracy: ', accuracy_score(y_test, dt_pred))
print('Precision: ', precision_score(y_test, dt_pred))
print('Recall: ', recall_score(y_test, dt_pred))
print('F1-score: ', f1_score(y_test, dt_pred))
print('\n')

Decision Tree Scores:
Accuracy:  1.0
Precision:  1.0
Recall:  1.0
F1-score:  1.0




In [11]:
print('Random Forest Scores:')
print('Accuracy: ', accuracy_score(y_test, rf_pred))
print('Precision: ', precision_score(y_test, rf_pred))
print('Recall: ', recall_score(y_test, rf_pred))
print('F1-score: ', f1_score(y_test, rf_pred))
print('\n')


Random Forest Scores:
Accuracy:  1.0
Precision:  1.0
Recall:  1.0
F1-score:  1.0




In [12]:
print('AdaBoost Scores:')
print('Accuracy: ', accuracy_score(y_test, ab_pred))
print('Precision: ', precision_score(y_test, ab_pred))
print('Recall: ', recall_score(y_test, ab_pred))
print('F1-score: ', f1_score(y_test, ab_pred))
print('\n')

AdaBoost Scores:
Accuracy:  0.8926829268292683
Precision:  0.8695652173913043
Recall:  0.9345794392523364
F1-score:  0.9009009009009009




In [13]:
# Combine the outputs of individual models using Voting Classifier
voting = VotingClassifier(estimators=[('lr', lr), ('dt', dt), ('rf', rf), ('ab', ab)], voting='hard')
voting.fit(X_train, y_train)


VotingClassifier(estimators=[('lr', LogisticRegression(random_state=0)),
                             ('dt', DecisionTreeClassifier(random_state=0)),
                             ('rf', RandomForestClassifier(random_state=0)),
                             ('ab', AdaBoostClassifier(random_state=0))])

In [14]:
# Evaluate the ensemble model
voting_pred = voting.predict(X_test)

In [15]:
print('Ensemble Model Scores:')
print('Accuracy: ', accuracy_score(y_test, voting_pred))
print('Precision: ', precision_score(y_test, voting_pred))
print('Recall: ', recall_score(y_test, voting_pred))
print('F1-score: ', f1_score(y_test, voting_pred))

Ensemble Model Scores:
Accuracy:  0.975609756097561
Precision:  1.0
Recall:  0.9532710280373832
F1-score:  0.9760765550239235
