# Heart Disease Prediction

Dataset link: https://www.dropbox.com/s/ltksjtb54wrov5q/HeartDisease.csv?dl=1

## Importing Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

## loading the dataset

In [2]:
df=pd.read_csv("./HeartDisease.csv")

## Exploring the data

In [3]:
df.head()#Checking the first 5 rows in the dataset

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [4]:
df.info()#Checking the information about the dataset such as datatype & column names, null count

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [5]:
df.isnull().sum()#Checking for any missing values

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [6]:
#Spliting the X and y for further process
X=df.drop("target",axis=1)
y=df["target"]

In [7]:
#Spliting the dataset into training and testing data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=14)

## Importing the Classifier Libraries

In [8]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [9]:
#Assigning the short form for the models for easy access
sv=SVC()
rfc=RandomForestClassifier()
dtc=DecisionTreeClassifier()

## Predicting with SVC

In [10]:
#Fitting the model with the training data
sv.fit(X_train,y_train)

In [11]:
#Predicting y data
y_pred=sv.predict(X_test)

In [12]:
#Printing the accuracy score of the model
acc_svc=accuracy_score(y_test,y_pred)
print(accuracy_score(y_test,y_pred))

0.7219512195121951


## Predicting with Random Forest Classifier

In [13]:
rfc.fit(X_train,y_train)#Fitting the model with training data
y_pred=rfc.predict(X_test)#Predicting the y data
acc_rfc=accuracy_score(y_test,y_pred)
print(accuracy_score(y_test,y_pred))#Printing the accuracy score of the model

1.0


## Predicting with Decision Tree Classifier

In [14]:
dtc.fit(X_train,y_train)#Fitting the model with training data
y_pred=dtc.predict(X_test)#Predicting the y data
acc_dtc=accuracy_score(y_test,y_pred)
print(accuracy_score(y_test,y_pred))#Printing the accuracy score of the model

1.0


In [16]:
#Storing the results in the dataframe for easy evaluation
results_model = pd.DataFrame({'Method':['SVC'], 'accuracy': acc_svc},index={'1'})
results_rfc= pd.DataFrame({'Method':['Random Forest'], 'accuracy': acc_rfc},index={'2'})
results_model = pd.concat([results_model, results_rfc])
results_dtc = pd.DataFrame({'Method':['Decision Tree'], 'accuracy': acc_dtc},index={'3'})
results_model = pd.concat([results_model, results_dtc])
results_model = results_model[['Method', 'accuracy']]
results_model

Unnamed: 0,Method,accuracy
1,SVC,0.721951
2,Random Forest,1.0
3,Decision Tree,1.0
