In [1]:
import pandas as pd
import numpy as np

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

```age
sex
chest pain type (4 values)
resting blood pressure
serum cholestoral in mg/dl
fasting blood sugar > 120 mg/dl
resting electrocardiographic results (values 0,1,2)
maximum heart rate achieved
exercise induced angina
oldpeak = ST depression induced by exercise relative to rest
the slope of the peak exercise ST segment
number of major vessels (0-3) colored by flourosopy
thal: 0 = normal; 1 = fixed defect; 2 = reversable defect
heart disease in the patient. It is integer valued 0 = no disease and 1 = disease

In [3]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,63,1,typical,145,233,1,2,150,0,2.3,3,0,fixed,No
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3,normal,Yes
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2,reversable,Yes
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0,normal,No
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0,normal,No


In [4]:
df.shape

(303, 14)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Age        303 non-null    int64  
 1   Sex        303 non-null    int64  
 2   ChestPain  303 non-null    object 
 3   RestBP     303 non-null    int64  
 4   Chol       303 non-null    int64  
 5   Fbs        303 non-null    int64  
 6   RestECG    303 non-null    int64  
 7   MaxHR      303 non-null    int64  
 8   ExAng      303 non-null    int64  
 9   Oldpeak    303 non-null    float64
 10  Slope      303 non-null    int64  
 11  Ca         303 non-null    int64  
 12  Thal       303 non-null    object 
 13  AHD        303 non-null    object 
dtypes: float64(1), int64(10), object(3)
memory usage: 33.3+ KB


In [6]:
df.isnull().sum()

Age          0
Sex          0
ChestPain    0
RestBP       0
Chol         0
Fbs          0
RestECG      0
MaxHR        0
ExAng        0
Oldpeak      0
Slope        0
Ca           0
Thal         0
AHD          0
dtype: int64

### Label Encode categorical columns

In [7]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['Thal'] = le.fit_transform(df['Thal'])
df['ChestPain'] = le.fit_transform(df['ChestPain'])
# df['AHD'] = le.fit_transform(df['AHD'])

In [8]:
df.head()

Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,63,1,3,145,233,1,2,150,0,2.3,3,0,0,No
1,67,1,0,160,286,0,2,108,1,1.5,2,3,1,Yes
2,67,1,0,120,229,0,2,129,1,2.6,2,2,2,Yes
3,37,1,1,130,250,0,0,187,0,3.5,3,0,1,No
4,41,0,2,130,204,0,2,172,0,1.4,1,0,1,No


### Split data intp train and test

In [9]:
y = df['AHD']
X = df.drop(['AHD'], axis=1)

### Grid Search

In [10]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [11]:
param_list = {
    'gamma': [1.0, 0.1, 0.01],
    'kernel': ['linear', 'rbf']
}
svc = SVC()
gs = GridSearchCV(svc, param_grid=param_list)
grid = gs.fit(X,y)

In [12]:
grid.best_params_

gamma_best = grid.best_params_['gamma']
kernel_best = grid.best_params_['kernel']

### Using best params to create model 

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [14]:
svc_best = SVC(gamma=gamma_best, kernel=kernel_best)
svc_best.fit(X_train, y_train)

In [15]:
pred_df = X_test.copy()
pred_df['y_act'] = y_test
pred_df['y_pred'] = svc_best.predict(X_test)

In [16]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(pred_df['y_act'], pred_df['y_pred'])
cr = classification_report(pred_df['y_act'], pred_df['y_pred'])

print(cm)
print(cr)

[[55  9]
 [10 48]]
              precision    recall  f1-score   support

          No       0.85      0.86      0.85        64
         Yes       0.84      0.83      0.83        58

    accuracy                           0.84       122
   macro avg       0.84      0.84      0.84       122
weighted avg       0.84      0.84      0.84       122

