# Support Vector Machine

# 1: Importing Libraries and Loading Data

In [1]:
# Importing the libraries
import numpy as np 
import pandas as pd

In [2]:
# Importing the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data"
column_names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "num"]
heart_data = pd.read_csv(url, names = column_names, na_values = "?")
print(heart_data.head())

   age  sex  cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0   28    1   2     130.0  132.0  0.0      2.0    185.0    0.0      0.0   
1   29    1   2     120.0  243.0  0.0      0.0    160.0    0.0      0.0   
2   29    1   2     140.0    NaN  0.0      0.0    170.0    0.0      0.0   
3   30    0   1     170.0  237.0  0.0      1.0    170.0    0.0      0.0   
4   31    0   2     100.0  219.0  0.0      1.0    150.0    0.0      0.0   

   slope  ca  thal  num  
0    NaN NaN   NaN    0  
1    NaN NaN   NaN    0  
2    NaN NaN   NaN    0  
3    NaN NaN   6.0    0  
4    NaN NaN   NaN    0  


# 2: Data Pre-processing

In [3]:
# Data Cleaning
heart_data.drop_duplicates(inplace = True)
heart_data.fillna(heart_data.mean(), inplace = True)

In [4]:
# Splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
X = heart_data.iloc[:, :-1].values
y = heart_data.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [5]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# 3: Implement and Train the Model

In [6]:
# Training the Support Vector Machine model on the training set
from sklearn.svm import SVC
classifier = SVC(random_state = 0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

In [7]:
# Predicting a new result
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[0 1]
 [1 1]
 [0 1]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [1 1]
 [0 1]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [0 0]
 [0 1]
 [1 1]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 1]
 [1 0]]


# 4: Evaluating Metrics

In [8]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[30  3]
 [ 9 17]]


In [9]:
# Evaluation Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# Accuracy
ac = accuracy_score(y_test, y_pred)
print(f"Accuracy:  {ac:.2f}")
# Precision
pr = precision_score(y_test, y_pred)
print(f"Precision: {pr:.2f}")
# Recall 
re = recall_score(y_test, y_pred)
print(f"Recall:    {re:.2f}")
# F1-Score
f = f1_score(y_test, y_pred)
print(f"F1-score:  {f:.2f}")
# AUC-ROC
ra = roc_auc_score(y_test, y_pred)
print(f"AUC-ROC:   {ra:.2f}")

Accuracy:  0.80
Precision: 0.85
Recall:    0.65
F1-score:  0.74
AUC-ROC:   0.78


# 5: Hyperparameter Tuning

In [10]:
# RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Define the hyperparameters to tune
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.1, 1, 10, 100]
}

# Create a Randomized Search object
random_search = RandomizedSearchCV(
    classifier, 
    param_distributions=param_grid, 
    n_iter=10, 
    cv=5, 
    scoring='accuracy', 
    n_jobs=-1, 
    random_state=0
)

# Fit the Randomized Search the data
random_search.fit(X_train, y_train)

# Print the best hyperparameters and corresponding score
print("Randomized Search Best hyperparameters: ", random_search.best_params_)
print(f"Randomized Search Best score:  {random_search.best_score_:.4f}")

Randomized Search Best hyperparameters:  {'kernel': 'linear', 'gamma': 1, 'C': 0.1}
Randomized Search Best score:  0.8462


In [11]:
# GridSearchCV
from sklearn.model_selection import GridSearchCV

# Create a Grid Search object
grid_search = GridSearchCV(
    classifier, 
    param_grid, 
    cv=5, 
    scoring='accuracy',
    n_jobs=-1
)

# Fit the Grid Search object to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters and corresponding score
print("Grid Search Best hyperparameters: ", grid_search.best_params_)
print(f"Grid Search Best score: {grid_search.best_score_:.4f}")

Grid Search Best hyperparameters:  {'C': 0.1, 'gamma': 0.1, 'kernel': 'linear'}
Grid Search Best score: 0.8462
