# Comparison of Classifiers
In this file, we will be splitting the scaled Heart Disease dataset into its Training and Test set.
After that, we will run multiple machine learning classifier algorithms.
We will compare the results of the classifiers to see the accuracy of each.

## Import Statements

In [52]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt

from sklearn.model_selection import train_test_split, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report, roc_curve, auc, roc_auc_score
from sklearn import svm # Support Vector Machine Classifier
from sklearn.ensemble import RandomForestClassifier # Random Forest Classifier
from sklearn.naive_bayes import GaussianNB # Naive Bayes Classifier
from sklearn import metrics

### Read CSV File

In [9]:
df = pd.read_csv('heart_dataset_scaled.csv')

# Display CSV Contents
df.head()

Unnamed: 0,ecg_encoded,bp_encoded,chol_encoded,hr_encoded,op_encoded,st_encoded,Sex_F,Sex_M,"age_binned_(25, 45]","age_binned_(45, 65]","age_binned_(65, 85]",chest_pain_from_heart_N,chest_pain_from_heart_Y,ExerciseAngina_N,ExerciseAngina_Y,HeartDisease
0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
1,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0
2,0.5,0.5,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
3,0.0,0.5,0.5,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0


In [22]:
# Remove Validation Column (Heart Disease) from Other Attributes
cols = X.columns.to_list()
cols = cols[:-1]

X = df[cols]
y = df['HeartDisease']

# Display Contents
X.head()

Unnamed: 0,ecg_encoded,bp_encoded,chol_encoded,hr_encoded,op_encoded,st_encoded,Sex_F,Sex_M,"age_binned_(25, 45]","age_binned_(45, 65]","age_binned_(65, 85]",chest_pain_from_heart_N,chest_pain_from_heart_Y,ExerciseAngina_N
0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
1,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
2,0.5,0.5,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0
3,0.0,0.5,0.5,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0


### Split Dataset into Testing & Training

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y, test_size=0.2)

# Support Vector Machine Classification
Support Vector Machine classifier uses a Radial Basis Function ('RBF') Kernel, which is popular.

In [46]:
# Create SVM Classifier w/ RBF Kernel
svm_clf = svm.SVC(kernel='rbf')

# Train the SVM Model
svm_clf.fit(X_train, y_train)

# Prediction for Test Set
svm_y_pred = svm_clf.predict(X_test)

In [48]:
# SVM Accuracy
svm_acc = metrics.accuracy_score(y_test, svm_y_pred)
print("Accuracy:", svm_acc)

# SVM Precision
svm_pre = metrics.precision_score(y_test, svm_y_pred)
print("Precision:", svm_pre)

# SVM Recall
svm_rcl = metrics.recall_score(y_test, svm_y_pred)
print("Recall:", svm_rcl)

Accuracy: 0.8913043478260869
Precision: 0.8909090909090909
Recall: 0.9245283018867925


## Naive Bayes Classification

In [50]:
# Create a Gaussian Naive Bayes Classifier
nb_clf = GaussianNB()

# Train the Naive Bayes Model
nb_clf.fit(X_train,y_train)

#Prediction for Test Set
nb_y_pred = nb_clf.predict(X_test)

In [51]:
# NB Accuracy
nb_acc = metrics.accuracy_score(y_test, nb_y_pred)
print("Accuracy:", nb_acc)

# NB Precision
nb_pre = metrics.precision_score(y_test, nb_y_pred)
print("Precision:", nb_pre)

# NB Recall
nb_rcl = metrics.recall_score(y_test, nb_y_pred)
print("Recall:", nb_rcl)

Accuracy: 0.7989130434782609
Precision: 0.8349514563106796
Recall: 0.8113207547169812
