# Heart Disease Prediction system using Machine Learning with Python

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import os

In [2]:
# loading the csv data to a Pandas DataFrame
heart_data = pd.read_csv('heart.csv')

In [15]:
# print first 5 rows of the dataset
heart_data.tail()
heart_data.loc[heart_data["sex"]== 1]


Condition = []

for i in range(len(heart_data['target'])):
    if heart_data['target'][i] == 0:
        Condition.append("No Heart Disease ")
    elif heart_data['target'][i] == 1:
        Condition.append('Heart Disease')

heart_data['Condition'] = Condition
heart_data.to_csv('new.csv')
heart_data.to_excel('new.xlsx', index = False)
heart_data.to_csv('new.txt', index = False, sep = "\t")
heart_data

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target,Condition
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0,No Heart Disease
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0,No Heart Disease
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0,No Heart Disease
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0,No Heart Disease
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0,No Heart Disease
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1,Heart Disease
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0,No Heart Disease
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0,No Heart Disease
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1,Heart Disease


In [None]:
heart_data.describe()

In [None]:
# checking for missing values
heart_data.isnull().sum()

In [None]:
# checking the distribution of Target Variable
heart_data['target'].value_counts()

1 --> Defective Heart

0 --> Healthy Heart

In [None]:
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']
X.head(5)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize = (10,10))
sns.heatmap(heart_data.corr(), annot=True, fmt=".0%")

In [None]:
plt.hist(heart_data["age"],bins=20, rwidth=0.8)
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,stratify=Y,random_state=2)
print(X.shape, X_train.shape, X_test.shape)
X_train.dtypes

In [None]:
model = LogisticRegression()

In [None]:
# training the LogisticRegression model with Training data
model.fit(X_train, Y_train)

with open("heart_disease.pickle","wb") as file:
    pickle.dump(model,file)
        

In [None]:
# accuracy on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

print('Accuracy on Training data : ', training_data_accuracy)

In [None]:
# accuracy on test data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

print('Accuracy on Test data : ', test_data_accuracy)

# Building a Predictive System

In [None]:
input_data = (35,0,0,138,183,0,1,182,0,1.4,2,0,2)

# change the input data to a numpy array
input_data_as_numpy_array= np.asarray(input_data)

# reshape the numpy array as we are predicting for only on instance
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

if (prediction[0]== 0):
    print('🔔***Heart Status***🔔')
    print('No Heart Disease')  
else:
    print('🔔***Heart Status***🔔')
    print('Heart Disease')
    
    

Dataset columns:

age: The person’s age in years
sex: The person’s sex (1 = male, 0 = female)
cp: chest pain type
— Value 0: asymptomatic
— Value 1: atypical angina
— Value 2: non-anginal pain
— Value 3: typical angina
trestbps: The person’s resting blood pressure (mm Hg on admission to the hospital)
chol: The person’s cholesterol measurement in mg/dl
fbs: The person’s fasting blood sugar (> 120 mg/dl, 1 = true; 0 = false)
restecg: resting electrocardiographic results
— Value 0: showing probable or definite left ventricular hypertrophy by Estes’ criteria
— Value 1: normal
— Value 2: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
thalach: The person’s maximum heart rate achieved
exang: Exercise induced angina (1 = yes; 0 = no)
oldpeak: ST depression induced by exercise relative to rest (‘ST’ relates to positions on the ECG plot. See more here)
slope: the slope of the peak exercise ST segment — 0: downsloping; 1: flat; 2: upsloping
0: downsloping; 1: flat; 2: upsloping
ca: The number of major vessels (0–3)
thal: A blood disorder called thalassemia Value 0: NULL (dropped from the dataset previously
Value 1: fixed defect (no blood flow in some part of the heart)
Value 2: normal blood flow
Value 3: reversible defect (a blood flow is observed but it is not normal)
target: Heart disease (1 = no, 0= yes)

In [5]:
import numpy as np
import pandas as pd

my_array = np.array([[11,22,33],[44,55,66]])

print(my_array)
print(type(my_array))


df = pd.DataFrame(my_array, columns = ['Column_A','Column_B','Column_C'])

print(df)
print(type(df))


[[11 22 33]
 [44 55 66]]
<class 'numpy.ndarray'>
   Column_A  Column_B  Column_C
0        11        22        33
1        44        55        66
<class 'pandas.core.frame.DataFrame'>


In [4]:
my_array = np.array([['Jon',25,1995,2016],['Maria',47,1973,2000],['Bill',38,1982,2005]], dtype=object)

df = pd.DataFrame(my_array, columns = ['Name','Age','Birth Year','Graduation Year'])

print(df)
print(type(df))

    Name Age Birth Year Graduation Year
0    Jon  25       1995            2016
1  Maria  47       1973            2000
2   Bill  38       1982            2005
<class 'pandas.core.frame.DataFrame'>


# testing trained model

In [None]:
input_data = (35,0,0,138,183,0,1,182,0,1.4,2,0,2)
input_data_as_numpy_array= np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)


if os.path.exists("mymodel.pickle"):
    print("model is loading now!!!!!!")
    regressor = pickle.load(open("heart_disease.pickle",'rb'))
    


result = regressor.predict(input_data_reshaped)
print(result)

if (prediction[0]== 0):
    print('🔔***Heart Status***🔔')
    print('No Heart Disease')  
else:
    print('🔔***Heart Status***🔔')
    print('Heart Disease')
    