In [13]:
import pandas as pd

heart = pd.read_csv('heart.csv')

clean_heart = heart.dropna()

clean_heart.head()


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [37]:
from sklearn import tree
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

heart_tree = tree.DecisionTreeClassifier()

categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

x = clean_heart.drop('HeartDisease', axis=1)
y = clean_heart['HeartDisease']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2, random_state=42)

encoder = OneHotEncoder(sparse_output=False)
encoded_x_train = encoder.fit_transform(x_train[categorical_cols])
encoded_x_test = encoder.transform(x_test[categorical_cols])

encoded_x_train = pd.DataFrame(encoded_x_train, index=x_train.index)
encoded_x_test = pd.DataFrame(encoded_x_test, index=x_test.index)

x_train = x_train.drop(categorical_cols, axis=1)
x_test = x_test.drop(categorical_cols, axis=1)

x_train = pd.concat([x_train, encoded_x_train], axis=1)
x_test = pd.concat([x_test, encoded_x_test], axis=1)

x_train.columns = x_train.columns.astype(str)
x_test.columns = x_test.columns.astype(str)

heart_tree.fit(x_train, y_train)

tree_text = tree.export_text(heart_tree, feature_names=list(x_train.columns))

print(tree_text)


|--- 13 <= 0.50
|   |--- 2 <= 0.50
|   |   |--- 1 <= 0.50
|   |   |   |--- RestingBP <= 155.00
|   |   |   |   |--- Age <= 53.50
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- Age >  53.50
|   |   |   |   |   |--- Oldpeak <= 1.25
|   |   |   |   |   |   |--- Age <= 64.00
|   |   |   |   |   |   |   |--- Cholesterol <= 301.00
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- Cholesterol >  301.00
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- Age >  64.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- Oldpeak >  1.25
|   |   |   |   |   |   |--- class: 0
|   |   |   |--- RestingBP >  155.00
|   |   |   |   |--- class: 1
|   |   |--- 1 >  0.50
|   |   |   |--- MaxHR <= 136.50
|   |   |   |   |--- 9 <= 0.50
|   |   |   |   |   |--- Cholesterol <= 317.00
|   |   |   |   |   |   |--- RestingBP <= 112.50
|   |   |   |   |   |   |   |--- Oldpeak <= 1.15
|   |   |   |   |   |   |   |   |--- class: 1
|   |   | 