## Support Vector Machine(SVM) pada data Heartdisease

In [1]:
# Import Library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn import svm

In [2]:
# Membaca file csv
df = pd.read_csv("heartdisease.csv")

In [3]:
# Print 5 data teratas
df.head(5)

Unnamed: 0,age,gender,chest pain,rest SBP,cholesterol,fasting blood sugar > 120,rest ECG,max HR,exerc ind ang,ST by exercise,slope peak exc ST,major vessels colored,thal,diameter narrowing
0,63,0,4,145,233,1,2,150,0,2.3,2,0,1,0
1,67,0,1,160,286,0,2,108,1,1.5,0,3,0,1
2,67,0,1,120,229,0,2,129,1,2.6,0,2,2,1
3,37,0,2,130,250,0,0,187,0,3.5,2,0,0,0
4,41,1,3,130,204,0,2,172,0,1.4,1,0,0,0


## Mengubah semua fitur menjadi numeric

In [4]:
# Cek type data awal
df.dtypes

age                            int64
gender                         int64
chest pain                     int64
rest SBP                       int64
cholesterol                    int64
fasting blood sugar > 120      int64
rest ECG                       int64
max HR                         int64
exerc ind ang                  int64
ST by exercise               float64
slope peak exc ST              int64
major vessels colored         object
thal                          object
diameter narrowing             int64
dtype: object

In [5]:
# Cek ukuran/banyak data
df.shape

(303, 14)

In [6]:
# Menghapus data yang mengandung '?'
df = df[~((df['major vessels colored'] == '?' ) | (df['thal'] == '?' ))]

In [7]:
# Cek banyak data setelah dihapus
df.shape

(297, 14)

In [8]:
# Mengubah type data pada 2 colum berikut menjadi numeric
df[['major vessels colored', 'thal']] = df[['major vessels colored', 'thal']].apply(pd.to_numeric)

In [9]:
# Cek type data setelah diubah
df.dtypes

age                            int64
gender                         int64
chest pain                     int64
rest SBP                       int64
cholesterol                    int64
fasting blood sugar > 120      int64
rest ECG                       int64
max HR                         int64
exerc ind ang                  int64
ST by exercise               float64
slope peak exc ST              int64
major vessels colored          int64
thal                           int64
diameter narrowing             int64
dtype: object

## SVM

In [10]:
# Menentukan fitur
X = df[['age', 'gender', 'chest pain', 'rest SBP', 'cholesterol', 'fasting blood sugar > 120', 'rest ECG', 'max HR', 'exerc ind ang', 'ST by exercise', 'slope peak exc ST', 'major vessels colored', 'thal']]

# Menentukan target
y = df[['diameter narrowing']]

In [11]:
# Menentukan data test, data train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [12]:
# SVM classifier
clf = svm.SVC(kernel='linear') # Linear
rbf_clf = svm.SVC(kernel='rbf', gamma='scale') # Gaussian/rbf
poly_clf = svm.SVC(kernel='poly', gamma='scale') # Polynomial
sig_clf = svm.SVC(kernel='sigmoid', gamma='scale') # Sigmoid

# Train model menggunakan data
clf.fit(X_train, y_train.values.ravel())
rbf_clf.fit(X_train, y_train.values.ravel())
poly_clf.fit(X_train, y_train.values.ravel())
sig_clf.fit(X_train, y_train.values.ravel())

# Membuat prediksi
lin_y_pred = clf.predict(X_test)
rbf_y_pred = rbf_clf.predict(X_test)
poly_y_pred = poly_clf.predict(X_test)
sig_y_pred = sig_clf.predict(X_test)

# Menghitung akurasi
print("Accuracy linear:",metrics.accuracy_score(y_test, lin_y_pred))
print("Accuracy rbf:",metrics.accuracy_score(y_test, rbf_y_pred))
print("Accuracy polynomial:",metrics.accuracy_score(y_test, poly_y_pred))
print("Accuracy sigmoid:",metrics.accuracy_score(y_test, sig_y_pred))

Accuracy linear: 0.8666666666666667
Accuracy rbf: 0.5333333333333333
Accuracy polynomial: 0.6166666666666667
Accuracy sigmoid: 0.5
