In [87]:
## Creating SVC Model for Heart Disease Dataset

In [88]:
# Import the required modules
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
%matplotlib inline


In [89]:
# Read the Heart_Disease_Prediction.csv file from the Resources folder into a Pandas DataFrame
file_path = Path("../Resources/Heart_Disease_Prediction.csv")
df_heart = pd.read_csv(file_path)

# View df
df_heart

Unnamed: 0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,Presence
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,Absence
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,Presence
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,Absence
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,Absence
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,Absence
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,Absence
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,Absence
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,Absence


In [90]:
# Define the target
target = df_heart['Heart Disease']
target_names = ["presence", "absence"]

# Define the features to include all columns except the target
features = df_heart.drop(columns=['Heart Disease'])
features_names = features.columns
features_names

Index(['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120',
       'EKG results', 'Max HR', 'Exercise angina', 'ST depression',
       'Slope of ST', 'Number of vessels fluro', 'Thallium'],
      dtype='object')

In [91]:
# Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=42)

In [92]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [93]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
svc_model = SVC(kernel = 'linear') #linear gave better results than poly, rbf, sigmoid
svc_model.fit(X_train_scaled, y_train)

SVC(kernel='linear')

In [94]:
# Generate testing predictions
predictions = svc_model.predict(X_test_scaled)

In [95]:
# Model Accuracy
print('Test Acc: %.3f' % svc_model.score(X_test_scaled, y_test))

Test Acc: 0.882


In [96]:
# Create the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculate the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [97]:
# Display results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,36,4
Actual 1,4,24


Accuracy Score : 0.8823529411764706
Classification Report
              precision    recall  f1-score   support

     Absence       0.90      0.90      0.90        40
    Presence       0.86      0.86      0.86        28

    accuracy                           0.88        68
   macro avg       0.88      0.88      0.88        68
weighted avg       0.88      0.88      0.88        68

