In [5]:
# Dependencies
import pandas as pd

In [6]:
# Create Pandas DataFrame with data
heart_df = pd.read_csv("Resources/Heart_Disease_Prediction.csv")
heart_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence


In [7]:
# Convert sex from numerical to string: 1 = male; 0 = female
sex = {1: 'Male', 0: 'Female'}
heart_df['Sex (String)'] = heart_df['Sex'].map(sex)

# Convert chest pain type from numerical to string
# Value 1: typical angina, Value 2: atypical angina, Value 3: non-anginal pain, Value 4: asymptomatic
chest_pain = {1: 'Typical Angina', 2: 'Atypical Angina', 3: 'Non-Anginal Pain', 4: 'Asymptomatic'}
heart_df['Chest pain type (String)'] = heart_df['Chest pain type'].map(chest_pain)

# Convert FBS over 120 from numerical to boolean
# 1 = true; 0 = false
fbs = {1: True, 0: False}
heart_df['FBS over 120 (Boolean)'] = heart_df['FBS over 120'].map(fbs)

# Convert EKG results from numerical to string
# Value 0: normal
# Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
# Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria  
ekg = {0: 'Normal', 1: 'ST-T Abnormality', 2: 'LVH Indication'}
heart_df['EKG results (String)'] = heart_df['EKG results'].map(ekg)

# Convert Exercise Angina from numerical to boolean
# 1 = yes; 0 = no
exercise = {1: True, 0: False}
heart_df['Exercise angina (Boolean)'] = heart_df['Exercise angina'].map(exercise)

# Convert Slope of ST from numerical to string
# Value 1: upsloping, Value 2: flat, Value 3: downsloping
slope = {1: 'Unsloping', 2: 'Flat', 3: 'Downsloping'}
heart_df['Slope of ST (String)'] = heart_df['Slope of ST'].map(slope)

# Convert Thallium from numerical to string
# Thallium: 3 = normal; 6 = fixed defect; 7 = reversable defect
thallium = {3: 'Normal', 6: 'Fixed Defect', 7: 'Reversable Defect'}
heart_df['Thallium (String)'] = heart_df['Thallium'].map(thallium)

# Convert Heart Disease from string to numerical
# Absence = 0, Presence = 1
heart = {'Absence': 0, 'Presence': 1}
heart_df['Heart Disease (Numerical)'] = heart_df['Heart Disease'].map(heart)

heart_df.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,...,Thallium,Heart Disease,Sex (String),Chest pain type (String),FBS over 120 (Boolean),EKG results (String),Exercise angina (Boolean),Slope of ST (String),Thallium (String),Heart Disease (Numerical)
0,70,1,4,130,322,0,2,109,0,2.4,...,3,Presence,Male,Asymptomatic,False,LVH Indication,False,Flat,Normal,1
1,67,0,3,115,564,0,2,160,0,1.6,...,7,Absence,Female,Non-Anginal Pain,False,LVH Indication,False,Flat,Reversable Defect,0
2,57,1,2,124,261,0,0,141,0,0.3,...,7,Presence,Male,Atypical Angina,False,Normal,False,Unsloping,Reversable Defect,1
3,64,1,4,128,263,0,0,105,1,0.2,...,7,Absence,Male,Asymptomatic,False,Normal,True,Flat,Reversable Defect,0
4,74,0,2,120,269,0,2,121,1,0.2,...,3,Absence,Female,Atypical Angina,False,LVH Indication,True,Unsloping,Normal,0


In [8]:
heart_df.columns

Index(['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120',
       'EKG results', 'Max HR', 'Exercise angina', 'ST depression',
       'Slope of ST', 'Number of vessels fluro', 'Thallium', 'Heart Disease',
       'Sex (String)', 'Chest pain type (String)', 'FBS over 120 (Boolean)',
       'EKG results (String)', 'Exercise angina (Boolean)',
       'Slope of ST (String)', 'Thallium (String)',
       'Heart Disease (Numerical)'],
      dtype='object')

In [10]:
heart_df = heart_df[['Age', 'Sex', 'Sex (String)', 'Chest pain type', 'Chest pain type (String)', 'BP',
                     'Cholesterol', 'FBS over 120', 'FBS over 120 (Boolean)', 'EKG results',
                     'EKG results (String)', 'Max HR', 'Exercise angina', 'Exercise angina (Boolean)',
                     'ST depression', 'Slope of ST', 'Slope of ST (String)', 'Number of vessels fluro',
                     'Thallium', 'Thallium (String)', 'Heart Disease', 'Heart Disease (Numerical)']]
heart_df.head()

Unnamed: 0,Age,Sex,Sex (String),Chest pain type,Chest pain type (String),BP,Cholesterol,FBS over 120,FBS over 120 (Boolean),EKG results,...,Exercise angina,Exercise angina (Boolean),ST depression,Slope of ST,Slope of ST (String),Number of vessels fluro,Thallium,Thallium (String),Heart Disease,Heart Disease (Numerical)
0,70,1,Male,4,Asymptomatic,130,322,0,False,2,...,0,False,2.4,2,Flat,3,3,Normal,Presence,1
1,67,0,Female,3,Non-Anginal Pain,115,564,0,False,2,...,0,False,1.6,2,Flat,0,7,Reversable Defect,Absence,0
2,57,1,Male,2,Atypical Angina,124,261,0,False,0,...,0,False,0.3,1,Unsloping,0,7,Reversable Defect,Presence,1
3,64,1,Male,4,Asymptomatic,128,263,0,False,0,...,1,True,0.2,2,Flat,1,7,Reversable Defect,Absence,0
4,74,0,Female,2,Atypical Angina,120,269,0,False,2,...,1,True,0.2,1,Unsloping,1,3,Normal,Absence,0


In [11]:
# Save edited DataFrame to .csv
heart_df.to_csv("Resources/Heart_Disease_WithText.csv", encoding='utf8', index=False)