Import library

In [103]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

Load Dataset

In [104]:
heart_dataset = pd.read_csv('Heart_Disease_Prediction.csv')

In [105]:
heart_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Age                      270 non-null    int64  
 1   Sex                      270 non-null    int64  
 2   Chest pain type          270 non-null    int64  
 3   BP                       270 non-null    int64  
 4   Cholesterol              270 non-null    int64  
 5   FBS over 120             270 non-null    int64  
 6   EKG results              270 non-null    int64  
 7   Max HR                   270 non-null    int64  
 8   Exercise angina          270 non-null    int64  
 9   ST depression            270 non-null    float64
 10  Slope of ST              270 non-null    int64  
 11  Number of vessels fluro  270 non-null    int64  
 12  Thallium                 270 non-null    int64  
 13  Heart Disease            270 non-null    object 
dtypes: float64(1), int64(12), 

In [106]:
heart_dataset.head()

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence


In [107]:
heart_dataset.shape

(270, 14)

In [108]:
heart_dataset['Heart Disease'].value_counts()

Absence     150
Presence    120
Name: Heart Disease, dtype: int64

In [109]:
# Memisahkan Data dan label
X = heart_dataset.drop(columns ='Heart Disease', axis=1)
Y = heart_dataset['Heart Disease']

Convert Label

In [110]:
Y_convert = {'Absence' : 0, 'Presence' : 1}
Y = Y.replace(Y_convert)

In [111]:
encoder_y = ColumnTransformer(
        [('Heart Disease', OneHotEncoder(), [0])], 
        remainder='passthrough')

In [112]:
print(X)

     Age  Sex  Chest pain type   BP  Cholesterol  FBS over 120  EKG results  \
0     70    1                4  130          322             0            2   
1     67    0                3  115          564             0            2   
2     57    1                2  124          261             0            0   
3     64    1                4  128          263             0            0   
4     74    0                2  120          269             0            2   
..   ...  ...              ...  ...          ...           ...          ...   
265   52    1                3  172          199             1            0   
266   44    1                2  120          263             0            0   
267   56    0                2  140          294             0            2   
268   57    1                4  140          192             0            0   
269   67    1                4  160          286             0            2   

     Max HR  Exercise angina  ST depression  Slope 

In [113]:
print(Y)

0      1
1      0
2      1
3      0
4      0
      ..
265    0
266    0
267    0
268    0
269    1
Name: Heart Disease, Length: 270, dtype: int64


Standarisasi Data

In [114]:
scaler = StandardScaler()

In [115]:
scaler.fit(X)

StandardScaler()

In [116]:
standarized_data = scaler.transform(X)

In [117]:
print (standarized_data)

[[ 1.71209356  0.6894997   0.87092765 ...  0.67641928  2.47268219
  -0.87570581]
 [ 1.38213977 -1.45032695 -0.18355874 ...  0.67641928 -0.71153494
   1.18927733]
 [ 0.2822938   0.6894997  -1.23804513 ... -0.95423434 -0.71153494
   1.18927733]
 ...
 [ 0.1723092  -1.45032695 -1.23804513 ...  0.67641928 -0.71153494
  -0.87570581]
 [ 0.2822938   0.6894997   0.87092765 ...  0.67641928 -0.71153494
   0.67303154]
 [ 1.38213977  0.6894997   0.87092765 ...  0.67641928  2.47268219
  -0.87570581]]


In [118]:
X = standarized_data
Y = heart_dataset['Heart Disease']

In [119]:
Y = Y.replace(Y_convert)

In [120]:
print(Y)


0      1
1      0
2      1
3      0
4      0
      ..
265    0
266    0
267    0
268    0
269    1
Name: Heart Disease, Length: 270, dtype: int64


Memisahkan Data Training dan testing 

In [121]:
x_train, x_test,y_train, y_testing = train_test_split(X, Y, test_size = 0.2, stratify=Y, random_state=2)

In [122]:
print(X.shape, x_train.shape, x_test.shape)

(270, 13) (216, 13) (54, 13)


Convert Label Training dan Testing

In [123]:
train_convert = {'Absence' : 0, 'Presence' : 1}
y_train = y_train.replace(train_convert)

In [124]:
test_convert = {"Absence":0,"Presence":1}
y_testing = y_testing.replace(test_convert)

Membuat data latih dengan menggunakan algoritma SVM

In [125]:
classifier = svm.SVC(kernel='linear')

In [126]:
classifier.fit(x_train, y_train)

SVC(kernel='linear')

In [134]:
print (Y)

0      1
1      0
2      1
3      0
4      0
      ..
265    0
266    0
267    0
268    0
269    1
Name: Heart Disease, Length: 270, dtype: int64


membuat model evaluasi untuk mengukur tingkat akurasi

In [127]:
x_train_prediction = classifier.predict(x_train)
training_data_accuracy = accuracy_score (x_train_prediction, y_train)

In [128]:
print('akurasi data training adalah :', training_data_accuracy)

akurasi data training adalah : 0.875


In [129]:
x_test_prediction = classifier.predict(x_test)
test_data_accuracy= accuracy_score(x_test_prediction, y_testing)

In [130]:
print ('akurasi data testing :', test_data_accuracy)

akurasi data testing : 0.7777777777777778


Membuat model prediksi

In [131]:
input_data = (60,0,3,120,178,1,0,96,0,0,1,0,3)

input_data_as_numpy_aray= np.array (input_data)

input_data_reshape = input_data_as_numpy_aray.reshape (1,-1)

std_data = scaler.transform (input_data_reshape)
print(std_data)

prediction= classifier.predict(std_data)

if(prediction[0] == 0):
    print ('pasien tidak punya penyakit Jantung')
else :
    print('pasien terkena penyakit Jantung')

[[ 0.61224759 -1.45032695 -0.18355874 -0.63630951 -1.38900286  2.39791576
  -1.02628472 -2.32142431 -0.7012223  -0.91856516 -0.95423434 -0.71153494
  -0.87570581]]
pasien tidak punya penyakit Jantung




Simpan Modul Ke sav

In [132]:
import pickle

In [133]:
filename = 'heart-disease.sav'
pickle.dump(classifier, open(filename, 'wb'))