In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [3]:
X = df.drop(columns='target')
y = df['target']

In [4]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
X_train.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
287,71,0,1,160,302,0,1,162,0,0.4,2,2,2
499,46,0,2,142,177,0,0,160,1,1.4,0,0,2
247,46,1,0,140,311,0,1,120,1,1.8,1,2,3
32,57,1,0,130,131,0,1,115,1,1.2,1,1,3
835,49,1,2,118,149,0,0,126,0,0.8,2,3,2


In [5]:
## feature scaling
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler

trf1 = ColumnTransformer(transformers=[
    ('scale',MinMaxScaler(), slice(0,13))
], remainder='passthrough')

In [6]:
from sklearn.svm import LinearSVC, SVC

trf2 = SVC(kernel='poly',gamma='scale',degree=6,decision_function_shape='ovr',coef0=3,C=3)

In [7]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ('tnf1',trf1),
    ('tnf2',trf2)
])

In [8]:
pipe.fit(X_train,y_train)

In [9]:
from sklearn.metrics import classification_report

y_pred = pipe.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      0.95      0.98       167
           1       0.95      1.00      0.97       141

    accuracy                           0.97       308
   macro avg       0.97      0.98      0.97       308
weighted avg       0.98      0.97      0.97       308



## Cross Validation

In [10]:
from sklearn.model_selection import cross_val_score,KFold

cv = KFold(n_splits=10,shuffle=True)
cross_val_score(pipe, X,y,scoring='accuracy',cv=cv,n_jobs=-1,).mean()

1.0

## Hyperparameter Tunning

In [11]:
from sklearn.model_selection import RandomizedSearchCV

In [12]:
param_grid = {
    'tnf2__C':[1,2,3,4,5,6,7,8,9,0],
    'tnf2__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'tnf2__degree': [3,4,5,6,8,10],
    'tnf2__gamma': ['scale', 'auto'],
    'tnf2__coef0': [0.1,0.01,0.3,0.2],
    'tnf2__decision_function_shape': ['ovo', 'ovr']
}

In [13]:
gscv = RandomizedSearchCV(pipe, param_distributions=param_grid, n_iter=20, scoring='accuracy', n_jobs=-1,cv=5) 

In [14]:
gscv.fit(X_train,y_train)

10 fits failed out of a total of 100.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/pipeline.py", line 382, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/svm/_base.py", line 251, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/usr/local/lib/python3.8/dist-packages/sklearn/svm/_base.py", line 333, in _dense_fit
    ) = libsvm.fit(
  F

In [15]:
import pickle

pathurl = open('pipe.pkl','wb')
pickle.dump(pipe, pathurl)
pathurl.close()

In [16]:
X_test.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
151,54,1,1,192,283,0,0,195,0,0.0,2,1,3
429,47,1,2,108,243,0,1,152,0,0.0,2,0,2
478,39,1,2,140,321,0,0,182,0,0.0,2,0,2
412,57,1,0,150,276,0,0,112,1,0.6,1,1,1
147,41,1,0,110,172,0,0,158,0,0.0,2,0,3


In [17]:
y_test.head()

151    0
429    0
478    1
412    0
147    0
Name: target, dtype: int64