In [1]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn import svm
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

### Results of SVM and Multi-Layer Perceptron Models Without Pre-Processing

The following results were obtained without any pre-processing of the Heart Disease Data.

In [2]:
data = pd.read_csv('heart-disease-dataset1.csv')
data = data.replace('?', np.nan)
data = data.dropna()

X = data.drop('result', axis = 1)
Y = data['result']

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, random_state=3, test_size=0.2)
svmkernel = ['linear', 'poly', 'rbf', 'sigmoid']

accuracy_list = []

for n in svmkernel:
    sm = svm.SVC(kernel=n, random_state=1, gamma='auto')
    sm = sm.fit(xtrain, ytrain)
    ypred = sm.predict(xtest)
    accuracy = round(sm.score(xtest, ytest) * 100, 3)
    accuracy_list.append(accuracy)
    print('Accuracy of SVM', n, '%', accuracy)

mlp_descent = ['sgd', 'adam']
print()

for k in mlp_descent:
    clf = MLPClassifier(solver=k, alpha=1e-5, hidden_layer_sizes=(1, ),\
                    random_state=1, max_iter=10000, activation='identity', learning_rate_init=0.01)

    clf = clf.fit(xtrain, ytrain)
    accuracy = round(clf.score(xtest, ytest) * 100, 3)
    accuracy_list.append(accuracy)
    print('Accuracy of MLP ', k, '%', accuracy)

print()
print('The highest accuracy is %', max(accuracy_list), 'which corresponds to the Linear SVM Model')
print('The average accuracy of SVM is %', round(np.mean(accuracy_list[0:4]), 3))
print('The average accuracy of MLP is %', round(np.mean(accuracy_list[4:6]), 3))

Accuracy of SVM linear % 60.0
Accuracy of SVM poly % 56.667
Accuracy of SVM rbf % 51.667
Accuracy of SVM sigmoid % 51.667

Accuracy of MLP  sgd % 16.667
Accuracy of MLP  adam % 56.667

The highest accuracy is % 60.0 which corresponds to the Linear SVM Model
The average accuracy of SVM is % 55.0
The average accuracy of MLP is % 36.667


### Results of SVM and Multi-Layer Perceptron Models With Pre-Processing Using Data Scaling

Scaling is a common pre-processing technique which standardizes the data using statistical methods. Some models such as SVMs and Multi-Layer Perceptrons may perform better when the input data is standardized. This is why we have chosen to include this iteration of the SVM and MLP models.

In [3]:
data = pd.read_csv('heart-disease-dataset1.csv')
data = data.replace('?', np.nan)
data = data.dropna()

attributes = data.drop('result', axis = 1)
target = data['result']

scaler = StandardScaler()
scaler = scaler.fit(attributes)
attr_scaled = scaler.transform(attributes) 

xtrain, xtest, ytrain, ytest = train_test_split(attr_scaled, target, random_state=3, test_size=0.2)
svmkernel = ['linear', 'poly', 'rbf', 'sigmoid']

accuracy_list = []

for n in svmkernel:
    sm = svm.SVC(kernel=n, random_state=1, gamma='auto')
    sm = sm.fit(xtrain, ytrain)
    ypred = sm.predict(xtest)
    accuracy = round(sm.score(xtest, ytest) * 100, 3)
    accuracy_list.append(accuracy)
    print('Accuracy of SVM', n, '%', accuracy)

mlp_descent = ['sgd', 'adam']
print()

for k in mlp_descent:
    clf = MLPClassifier(solver=k, alpha=1e-5, hidden_layer_sizes=(1, ),\
                    random_state=1, max_iter=10000, activation='identity', learning_rate_init=0.01)

    clf = clf.fit(xtrain, ytrain)
    accuracy = round(clf.score(xtest, ytest) * 100, 3)
    accuracy_list.append(accuracy)
    print('Accuracy of MLP ', k, '%', accuracy)

print()
print('The highest accuracy is %', max(accuracy_list), 'which corresponds to the Linear SVM Model')
print('The average accuracy of SVM is %', round(np.mean(accuracy_list[0:4]), 3))
print('The average accuracy of MLP is %', round(np.mean(accuracy_list[4:6]), 3))

Accuracy of SVM linear % 63.333
Accuracy of SVM poly % 56.667
Accuracy of SVM rbf % 60.0
Accuracy of SVM sigmoid % 56.667

Accuracy of MLP  sgd % 56.667
Accuracy of MLP  adam % 60.0

The highest accuracy is % 63.333 which corresponds to the Linear SVM Model
The average accuracy of SVM is % 59.167
The average accuracy of MLP is % 58.334


### Results of SVM and Multi-Layer Perceptron Models With Pre-Processing By Binarizing the Result Attribute

Since the result attribute is given by an integer ranging from 0 to 4, and both SVM and Multi-Layer Perceptron work better with binary classification, we decided to see how what results we would get by binarizing the result attribute. In this case, 1 would indicate the existence of heart disease and 0 would indicate no heart disease.

In [4]:
data = pd.read_csv('heart-disease-dataset1.csv')
data = data.replace('?', np.nan)
data = data.dropna()

attributes = data.drop('result', axis = 1)
target = [i if i==0 else 1 for i in data['result']]

xtrain, xtest, ytrain, ytest = train_test_split(attributes, target, random_state=3, test_size=0.2)
svmkernel = ['linear', 'poly', 'rbf', 'sigmoid']

accuracy_list = []

for n in svmkernel:
    sm = svm.SVC(kernel=n, random_state=1, gamma='auto')
    sm = sm.fit(xtrain, ytrain)
    ypred = sm.predict(xtest)
    accuracy = round(sm.score(xtest, ytest) * 100, 3)
    accuracy_list.append(accuracy)
    print('Accuracy of SVM', n, '%', accuracy)

mlp_descent = ['sgd', 'adam']
print()

for k in mlp_descent:
    clf = MLPClassifier(solver=k, alpha=1e-5, hidden_layer_sizes=(1, ),\
                    random_state=1, max_iter=10000, activation='identity', learning_rate_init=0.01)

    clf = clf.fit(xtrain, ytrain)
    accuracy = round(clf.score(xtest, ytest) * 100, 3)
    accuracy_list.append(accuracy)
    print('Accuracy of MLP ', k, '%', accuracy)

print()
print('The highest accuracy is %', max(accuracy_list), 'which corresponds to the Linear SVM Model')
print('The average accuracy of SVM is %', round(np.mean(accuracy_list[0:4]), 3))
print('The average accuracy of MLP is %', round(np.mean(accuracy_list[4:6]), 3))

Accuracy of SVM linear % 85.0
Accuracy of SVM poly % 81.667
Accuracy of SVM rbf % 51.667
Accuracy of SVM sigmoid % 51.667

Accuracy of MLP  sgd % 48.333
Accuracy of MLP  adam % 48.333

The highest accuracy is % 85.0 which corresponds to the Linear SVM Model
The average accuracy of SVM is % 67.5
The average accuracy of MLP is % 48.333


### Conclusion

Overall, the SVM models performed better since they had higher accuracy scores. Regardless of whether pre-processing was performed on the data, and regardless of the type of pre-processing, the SVM models had higher accuracy scores. Among the SVM models, the model which utilized the linear kernel performed the best and resulted in the highest accuracy scores across all iterations of model building. Therefore, the highest performing model is the Linear SVM model.