In [22]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from scipy import stats

df = pd.read_csv('../assets/jobsheet-7/heart.csv')

In [23]:
# Data Preparation dengan One-Hot Encoding
categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
encoder = OneHotEncoder()
encoder_df = pd.DataFrame(encoder.fit_transform(df[categorical_cols]).toarray(), columns=encoder.get_feature_names_out(categorical_cols))
df_encoded = pd.concat([df, encoder_df], axis=1)
df_encoded

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,...,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,40,M,ATA,140,289,0,Normal,172,N,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
2,37,M,ATA,130,283,0,ST,98,N,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
4,54,M,NAP,150,195,0,Normal,122,N,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,...,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
914,68,M,ASY,144,193,1,Normal,141,N,3.4,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
916,57,F,ATA,130,236,0,LVH,174,N,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [24]:
# Data Preparation dengan Outlier Removal
z_scores = np.abs(stats.zscore(df[['Age', 'RestingBP', 'Cholesterol', 'FastingBS']]))
df_clean = df[(z_scores < 3).all(axis=1)]
df_clean

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [25]:
# Data Preparation dengan Normalization
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df[['Age', 'RestingBP', 'Cholesterol', 'FastingBS']]), columns=['Age', 'RestingBP', 'Cholesterol', 'FastingBS'])
df_scaled

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS
0,0.244898,0.70,0.479270,0.0
1,0.428571,0.80,0.298507,0.0
2,0.183673,0.65,0.469320,0.0
3,0.408163,0.69,0.354892,0.0
4,0.530612,0.75,0.323383,0.0
...,...,...,...,...
913,0.346939,0.55,0.437811,0.0
914,0.816327,0.72,0.320066,1.0
915,0.591837,0.65,0.217247,0.0
916,0.591837,0.65,0.391376,0.0


In [26]:
# Data Preparation dengan Standarization
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df[['Age', 'RestingBP', 'Cholesterol', 'FastingBS']]), columns=['Age', 'RestingBP', 'Cholesterol', 'FastingBS'])
df_scaled

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS
0,-1.433140,0.410909,0.825070,-0.551341
1,-0.478484,1.491752,-0.171961,-0.551341
2,-1.751359,-0.129513,0.770188,-0.551341
3,-0.584556,0.302825,0.139040,-0.551341
4,0.051881,0.951331,-0.034755,-0.551341
...,...,...,...,...
913,-0.902775,-1.210356,0.596393,-0.551341
914,1.536902,0.627078,-0.053049,1.813758
915,0.370100,-0.129513,-0.620168,-0.551341
916,0.370100,-0.129513,0.340275,-0.551341


In [28]:
# Pembuatan Dataset
x_train, x_test, y_train, y_test = train_test_split(df_clean[['Age', 'RestingBP', 'Cholesterol', 'FastingBS']], df_clean['RestingECG'], test_size=0.2, random_state=42)

print('Data Latih')
print(x_train.shape)
print(y_train.shape)

print('Data Uji')
print(x_test.shape)
print(y_test.shape)

Data Latih
(725, 4)
(725,)
Data Uji
(182, 4)
(182,)
