## Dependencies

In [3]:
import numpy as np
import pandas as pd

## Dataset

**Source:** https://archive.ics.uci.edu/dataset/45/heart+disease

In [4]:
# initializing the dataset names and feature names
dataset_names = ['cleveland', 'hungarian', 'va']
feature_names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num']

# concatenating the processed datasets from all of the hospitals
datasets = [pd.read_csv(f'../data/processed.{name}.data', names=feature_names, na_values='?').assign(location=name) for name in dataset_names]
df = pd.concat(datasets, ignore_index=True)

# adding information from the heart-disease.names file
df['num'] = df['num'].clip(upper=1)

df = df.replace({
    'age': {0: np.nan},
    'sex': {0: 'Female', 1: 'Male'},
    'cp': {1: 'Typical Angina', 2: 'Atypical Angina', 3: 'Nonanginal Pain', 4: 'Asymptomatic'},
    'trestbps': {0: np.nan},
    'chol': {0: np.nan},
    'fbs': {0: 'Below 120', 1: 'Above 120'},
    'restecg': {0: 'Normal', 1: 'ST-T Abnormality', 2: 'LV Hypertrophy'},
    'thalach': {0: np.nan},
    'exang': {0: 'Exercise Angina Absent', 1: 'Exercise Angina Present'},
    'slope': {1: 'Up', 2: 'Flat', 3: 'Down'},
    'ca': {0:'0 Vessel(s)', 1: '1 Vessel(s)', 2: '2 Vessel(s)', 3: '3 Vessel(s)'},
    'thal': {3: 'Normal', 6: 'Fixed Defect', 7: 'Reversable Defect'},
    'location': {'cleveland': 'Cleveland', 'hungarian': 'Budapest', 'va': 'Long Beach'}
})

# saving the heart disease dataset
df.to_csv('../data/heart_disease_dataset.csv', index=False)

display(df)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num,location
0,63.0,Male,Typical Angina,145.0,233.0,Above 120,LV Hypertrophy,150.0,Exercise Angina Absent,2.3,Down,0 Vessel(s),Fixed Defect,0,Cleveland
1,67.0,Male,Asymptomatic,160.0,286.0,Below 120,LV Hypertrophy,108.0,Exercise Angina Present,1.5,Flat,3 Vessel(s),Normal,1,Cleveland
2,67.0,Male,Asymptomatic,120.0,229.0,Below 120,LV Hypertrophy,129.0,Exercise Angina Present,2.6,Flat,2 Vessel(s),Reversable Defect,1,Cleveland
3,37.0,Male,Nonanginal Pain,130.0,250.0,Below 120,Normal,187.0,Exercise Angina Absent,3.5,Down,0 Vessel(s),Normal,0,Cleveland
4,41.0,Female,Atypical Angina,130.0,204.0,Below 120,LV Hypertrophy,172.0,Exercise Angina Absent,1.4,Up,0 Vessel(s),Normal,0,Cleveland
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
792,54.0,Female,Asymptomatic,127.0,333.0,Above 120,ST-T Abnormality,154.0,Exercise Angina Absent,0.0,,,,1,Long Beach
793,62.0,Male,Typical Angina,,139.0,Below 120,ST-T Abnormality,,,,,,,0,Long Beach
794,55.0,Male,Asymptomatic,122.0,223.0,Above 120,ST-T Abnormality,100.0,Exercise Angina Absent,0.0,,,Fixed Defect,1,Long Beach
795,58.0,Male,Asymptomatic,,385.0,Above 120,LV Hypertrophy,,,,,,,0,Long Beach
