### Libraries

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

### **Characteristics:** The ten features are defined as follows:

- **sbp:** systolic blood pressure
- **tobacco:** cumulative tobacco (kg)
- **ldl:** low density lipoprotein cholesterol
- **adiposity**
- **famhist:** family history of heart disease (Present=1, Absent=0)
- **typea:** type-A behavior
- **obesity**
- **alcohol:** current alcohol consumption
- **age:** age at onset
- **chd:** coronary heart disease (yes=1 or no=0)

In [4]:
df = pd.DataFrame(pd.read_csv('database.csv'))
# Change the name of the class column
df = df.rename(columns={'chd': 'class'})
print(df.head(10))

   sbp  tobacco   ldl  adipoYesty  famhist  typea  obeYesty  alcohol  age  \
0  160    12.00  5.73       23.11  Present     49     25.30    97.20   52   
1  144     0.01  4.41       28.61   Absent     55     28.87     2.06   63   
2  118     0.08  3.48       32.28  Present     52     29.14     3.81   46   
3  170     7.50  6.41       38.03  Present     51     31.99    24.26   58   
4  134    13.60  3.50       27.78  Present     60     25.99    57.34   49   
5  132     6.20  6.47       36.21  Present     62     30.77    14.14   45   
6  142     4.05  3.38       16.20   Absent     59     20.81     2.62   38   
7  114     4.08  4.59       14.60  Present     62     23.11     6.72   58   
8  114     0.00  3.83       19.40  Present     49     24.86     2.49   29   
9  132     0.00  5.80       30.96  Present     69     30.11     0.00   53   

  class  
0   Yes  
1   Yes  
2    No  
3   Yes  
4   Yes  
5    No  
6    No  
7   Yes  
8    No  
9   Yes  


#### Support Vector Machine

In [9]:
# Categorical features to numerical with LaberEncoder
df['famhist'] = LabelEncoder().fit_transform(df['famhist'])
df['class'] = LabelEncoder().fit_transform(df['class'])

# Separation of features from the class
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the hyperparameters
hyperparameters = {'C': [1, 10, 50, 100], 'kernel': ["linear", "poly", "rbf"]}
clf = SVC()

# Define the grid search with cross-validation
GS = GridSearchCV(clf, hyperparameters, cv = 10) # Number of folds: 10
GS.fit(X_train, y_train.ravel())  # Run the grid search

# Get the best hyperparameters
best_C = GS.best_params_['C']
best_kernel = GS.best_params_['kernel']
print("Best hyperparameters found:")
print("C:", best_C, "kernel:", best_kernel)

# SVM model with the best hyperparameters
SVM = SVC(C=best_C, kernel=best_kernel)
SVM.fit(X_train, y_train.ravel())

# Classifier accuracy on test set
y_pred = SVM.predict(X_test)
accuracy = accuracy_score(y_test.ravel(), y_pred)

print("Accuracy on the test set: {:.4f}".format(accuracy))


Best hyperparameters found:
C: 100 kernel: linear
Accuracy on the test set: 0.7204
