In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [3]:
# load dataset
filename = 'heart_disease.csv'
names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'smoking','diabetes', 'bmi', 'heart_disease']
df = pd.read_csv(filename, names=names, header=None, skiprows=1)
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,smoking,diabetes,bmi,heart_disease
0,67,1,2,111,536,0,2,88,0,1.3,3,2,3,1,0,23.4,1
1,57,1,3,109,107,0,2,119,0,5.4,2,0,3,0,1,35.4,0
2,43,1,4,171,508,0,1,113,0,3.7,3,0,7,1,1,29.9,0
3,71,0,4,90,523,0,2,152,0,4.7,2,1,3,1,0,15.2,1
4,36,1,2,119,131,0,2,128,0,5.9,3,1,3,1,0,16.7,1


In [10]:
array = df.values
print(type(array))
print(array)

<class 'numpy.ndarray'>
[[67.   1.   2.  ...  0.  23.4  1. ]
 [57.   1.   3.  ...  1.  35.4  0. ]
 [43.   1.   4.  ...  1.  29.9  0. ]
 ...
 [68.   1.   4.  ...  0.  16.1  1. ]
 [66.   0.   3.  ...  0.  27.7  0. ]
 [38.   1.   4.  ...  0.  16.3  0. ]]


In [5]:
# pisah input dan output
X = array[:, 0:8]
y = array[:, 8]
print(X.shape)
print(y.shape)

(3069, 8)
(3069,)


# 1. Rescale Data

In [11]:
# rescale data
scaler = MinMaxScaler(feature_range=(0, 1))
rescaledX = scaler.fit_transform(X)

# set print options dengan numpy
np.set_printoptions(precision=3)

# cetak data setelah rescale 5 baris pertama tiap column X
print(rescaledX[0:5, :])

[[0.809 1.    0.333 0.191 0.872 0.    1.    0.187]
 [0.596 1.    0.667 0.173 0.014 0.    1.    0.393]
 [0.298 1.    1.    0.736 0.816 0.    0.5   0.353]
 [0.894 0.    1.    0.    0.846 0.    1.    0.613]
 [0.149 1.    0.333 0.264 0.062 0.    1.    0.453]]


# 2. Standardize Data

In [12]:
from sklearn.preprocessing import StandardScaler

scaler2 = StandardScaler().fit(X)
rescaledX2 = scaler2.transform(X)

# set print options dengan numpy
np.set_printoptions(precision=3)

# cetak data setelah standardize 5 baris pertama tiap column X
print(rescaledX2[0:5, :])

[[ 1.058  0.894 -0.441 -1.086  1.286 -0.413  1.226 -1.088]
 [ 0.328  0.894  0.46  -1.149 -1.636 -0.413  1.226 -0.373]
 [-0.693  0.894  1.36   0.813  1.095 -0.413 -0.005 -0.511]
 [ 1.35  -1.118  1.36  -1.75   1.197 -0.413  1.226  0.389]
 [-1.204  0.894 -0.441 -0.833 -1.473 -0.413  1.226 -0.165]]


# 3. Normalize Data

In [13]:
from sklearn.preprocessing import Normalizer

scaler3 = Normalizer().fit(X)
normalizedX = scaler3.transform(X)

# set print options dengan numpy
np.set_printoptions(precision=3)

# cetak data setelah normalize 5 baris pertama tiap column X
print(normalizedX[0:5, :])

[[0.12  0.002 0.004 0.199 0.96  0.    0.004 0.158]
 [0.282 0.005 0.015 0.54  0.53  0.    0.01  0.589]
 [0.078 0.002 0.007 0.311 0.924 0.    0.002 0.206]
 [0.128 0.    0.007 0.162 0.94  0.    0.004 0.273]
 [0.163 0.005 0.009 0.538 0.592 0.    0.009 0.578]]


# 4. Binarize Data

In [14]:
from sklearn.preprocessing import Binarizer

binarizer = Binarizer(threshold=0.0).fit(X)
binaryX = binarizer.transform(X)

# set print options dengan numpy
np.set_printoptions(precision=3)

# cetak data setelah binarize 5 baris pertama tiap column X
print(binaryX[0:5, :])

[[1. 1. 1. 1. 1. 0. 1. 1.]
 [1. 1. 1. 1. 1. 0. 1. 1.]
 [1. 1. 1. 1. 1. 0. 1. 1.]
 [1. 0. 1. 1. 1. 0. 1. 1.]
 [1. 1. 1. 1. 1. 0. 1. 1.]]
