In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Getting our data ready to be used with ML
Three main things to be done:
    
    1. Split data into features and labels (X and y)
    
    2. Fill in missing values
 
    3. Feature encoding (conv non numerical values to numerical values)

In [2]:
heart_disease = pd.read_csv('Heart_Disease_Prediction.csv')

In [3]:
heart_disease.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence


In [5]:
heart_disease = heart_disease.rename(columns = {'Heart Disease': 'Target'})
heart_disease

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Target
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,Absence


In [6]:
heart_disease['Target'] = heart_disease['Target'].map({"Presence": 1, "Absence": 0})

In [7]:
heart_disease

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Target
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,1
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,0
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,1
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,0
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,0
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,0
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,0
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,0


In [8]:
X = heart_disease.drop('Target', axis=1)

y = heart_disease['Target']

In [9]:
X

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6


In [10]:
y

0      1
1      0
2      1
3      0
4      0
      ..
265    0
266    0
267    0
268    0
269    1
Name: Target, Length: 270, dtype: int64

## Split data into training and testing set

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [15]:
X_train

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium
153,64,0,4,130,303,0,0,122,0,2.0,2,2,3
7,59,1,4,110,239,0,2,142,1,1.2,2,1,7
167,53,1,3,130,197,1,2,152,0,1.2,3,0,3
10,59,1,4,135,234,0,0,161,0,0.5,2,0,7
243,62,0,4,140,268,0,2,160,0,3.6,3,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,67,1,4,125,254,1,0,163,0,0.2,2,2,7
22,43,1,4,115,303,0,0,181,0,1.2,2,0,3
140,64,1,4,145,212,0,2,132,0,2.0,2,2,6
40,40,1,4,152,223,0,0,181,0,0.0,1,0,7


In [16]:
y_train

153    0
7      1
167    0
10     0
243    1
      ..
178    1
22     0
140    1
40     1
120    1
Name: Target, Length: 189, dtype: int64

In [17]:
X_test

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium
197,54,0,3,110,214,0,0,158,0,1.6,2,0,3
26,46,0,4,138,243,0,2,152,1,0.0,2,0,3
151,50,0,3,120,219,0,0,158,0,1.6,2,0,3
189,70,1,3,160,269,0,0,112,1,2.9,2,1,7
55,41,0,2,105,198,0,0,168,0,0.0,1,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,59,1,4,138,271,0,2,182,0,0.0,1,0,3
198,69,0,1,140,239,0,0,151,0,1.8,1,2,3
184,53,1,3,130,246,1,2,173,0,0.0,1,3,3
247,65,0,3,155,269,0,0,148,0,0.8,1,0,3


In [18]:
y_test

197    0
26     0
151    0
189    1
55     0
      ..
141    0
198    0
184    0
247    0
67     1
Name: Target, Length: 81, dtype: int64

In [20]:
X_train.shape

(189, 13)

In [21]:
y_train.shape

(189,)

In [22]:
X_test.shape , y_test.shape

((81, 13), (81,))

In [24]:
#to check the no of cols, if any null values etc
heart_disease.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Age                      270 non-null    int64  
 1   Sex                      270 non-null    int64  
 2   Chest pain type          270 non-null    int64  
 3   BP                       270 non-null    int64  
 4   Cholesterol              270 non-null    int64  
 5   FBS over 120             270 non-null    int64  
 6   EKG results              270 non-null    int64  
 7   Max HR                   270 non-null    int64  
 8   Exercise angina          270 non-null    int64  
 9   ST depression            270 non-null    float64
 10  Slope of ST              270 non-null    int64  
 11  Number of vessels fluro  270 non-null    int64  
 12  Thallium                 270 non-null    int64  
 13  Target                   270 non-null    int64  
dtypes: float64(1), int64(13)
m

In [23]:
#to check if any null values
heart_disease.isnull()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Target
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,False,False,False,False,False,False,False,False,False,False,False,False,False,False
266,False,False,False,False,False,False,False,False,False,False,False,False,False,False
267,False,False,False,False,False,False,False,False,False,False,False,False,False,False
268,False,False,False,False,False,False,False,False,False,False,False,False,False,False
