In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
X_train = pd.read_csv('heart_data_train.csv').drop('HeartDisease',axis=1).iloc[: , 1:]
y_train = pd.read_csv('heart_data_train.csv').HeartDisease
X_test = pd.read_csv('heart_data_test.csv').drop('HeartDisease',axis=1).iloc[: , 1:]
y_test = pd.read_csv('heart_data_test.csv').HeartDisease

Model fitting:

In [3]:
from sklearn.svm import SVC # Import the SVM class from sklearn
from sklearn.model_selection import train_test_split, GridSearchCV # For splitting data and performing Grid Search CV
from sklearn.metrics import accuracy_score # For evaluating the accuracy of the model
from sklearnex import patch_sklearn

In [None]:
patch_sklearn()

# Create an SVM model
svm_model = SVC() # You can specify the kernel type and other hyperparameters later

# Define the hyperparameter grid for parameter search
param_grid = {
    'C': [0.1, 1, 10], # Different values of regularization parameter C
    'kernel': ['linear', 'rbf'], # Different kernel types
    'gamma': ['scale', 'auto', 0.1, 1], # Different gamma values
}

# Create Grid Search CV object
grid_search = GridSearchCV(svm_model, param_grid, cv=5) # 5-fold cross-validation

# Train the SVM model with parameter search on the training data
grid_search.fit(X_train[:10000], y_train[:10000])

# Get the best parameters and best model from Grid Search CV
best_params = grid_search.best_params_
best_svm = grid_search.best_estimator_

y_pred = best_svm.predict(X_test)
y_pred_prob = best_svm.predict_proba(X_test)[:,1]

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


Model Evaluations:

In [None]:
#from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import log_loss

In [None]:
auc_score = roc_auc_score(y_test, y_pred_prob)
ber_score = 1 - balanced_accuracy_score(y_test,y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
logloss = log_loss(y_test, y_pred, eps=1e-15)
print("AUC Score: {:.2f}".format(auc_score))
print("BER Score: {:.2f}".format(ber_score))
print("f1 Score: {:.2f}".format(f1))
print("Precision Score: {:.2f}".format(precision))
print("Recall Score: {:.2f}".format(recall))
print("Log Loss Score: {:.2f}".format(logloss))

Confusion Matrix:

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
import matplotlib.pyplot as plt

cm_train = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix = cm_train, display_labels = [1, 0])
cm_display.plot()
plt.show()

In [None]:
ROC Curve:

In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import numpy as np

fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic Example')
plt.legend(loc="lower right")
plt.show()