In [None]:

DESIGN AND IMPLEMENTATION OF HEART DISEASE 
PREDICTION USING MACHINE LEARNING ALGORITHMS

In [None]:
# Importing essential libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Importing and understanding the dataset
dataset = pd.read_csv("heart1.csv")

# Verifying it as a 'dataframe' object in pandas
print(type(dataset))

# Shape of dataset
print(dataset.shape)

# Printing out a few columns
print(dataset.head(5))
print(dataset.sample(5))

In [None]:
# Description
print(dataset.describe())
print(dataset.info())

# Checking for missing values
print(dataset.isnull().sum())

# Analyzing the 'target' variable
print(dataset["target"].describe())
print(dataset["target"].unique())

# Clearly, this is a classification problem, with the target variable having values '0' and '1'
# Checking correlation between columns
print(dataset.corr()["target"].abs().sort_values(ascending=False))


In [None]:
# Exploratory Data Analysis (EDA)
# Analyzing the target variable
y = dataset["target"]
sns.countplot(y)
plt.show()

target_temp = dataset.target.value_counts()
print(target_temp)
print("Percentage of patience with heart problems: "+str(y.where(y==1).count()*100/303))
print("Percentage of patience with heart problems: "+str(y.where(y==0).count()*100/303))

# Analyzing various features
features = ["sex", "cp", "fbs", "restecg", "exang", "slope", "ca", "thal"]
for feature in features:
    print(f"Unique values in {feature}: {dataset[feature].unique()}")
    sns.barplot(x=dataset[feature], y=y)
    plt.show()


In [None]:
# Train-Test Split
from sklearn.model_selection import train_test_split
predictors = dataset.drop("target", axis=1)
target = dataset["target"]

X_train, X_test, Y_train, Y_test = train_test_split(predictors, target, test_size=0.20, random_state=0)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)


In [None]:
# Model Fitting
from sklearn.metrics import accuracy_score

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
max_accuracy = 0
for x in range(200):
    dt = DecisionTreeClassifier(random_state=x)
    dt.fit(X_train, Y_train)
    Y_pred_dt = dt.predict(X_test)
    current_accuracy = round(accuracy_score(Y_pred_dt, Y_test) * 100, 2)
    if current_accuracy > max_accuracy:
        max_accuracy = current_accuracy
        best_x = x
dt = DecisionTreeClassifier(random_state=best_x)
dt.fit(X_train, Y_train)
Y_pred_dt = dt.predict(X_test)
score_dt = round(accuracy_score(Y_pred_dt, Y_test) * 100, 2)
print(f"The accuracy score achieved using Decision Tree is: {score_dt} %")


In [None]:
# XGBoost
import xgboost as xgb
xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=42)
xgb_model.fit(X_train, Y_train)
Y_pred_xgb = xgb_model.predict(X_test)
score_xgb = round(accuracy_score(Y_pred_xgb, Y_test) * 100, 2)
print(f"The accuracy score achieved using XGBoost is: {score_xgb} %")


In [None]:
# Neural Network
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(11, activation='relu', input_dim=13))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=300)

In [None]:
# ## VI. Output final score:
scores = [score_dt, score_rf, score_xgb, score_nn]
algorithms = ["Decision Tree", "Random Forest", "XGBoost", "Neural Network"]
for i in range(len(algorithms)):
    print(f"The accuracy score achieved using {algorithms[i]} is: {scores[i]} %")

In [None]:
# Plotting the accuracy scores
sns.set(rc={'figure.figsize': (15, 8)})
plt.bar(algorithms, scores)
plt.xlabel("Algorithms")
plt.ylabel("Accuracy score")
sns.barplot(x=algorithms, y=scores)
plt.show()