In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, RepeatedKFold, train_test_split, KFold
from sklearn.model_selection import cross_val_score, RandomizedSearchCV


from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import optuna

In [2]:
df = pd.read_csv("heart.csv")

In [3]:
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [18]:
X = df.drop("output", axis = 1)
y = df.output

X_train, X_test, y_train, y_test = train_test_split(
 X, y, test_size=0.3, random_state=0)

In [17]:
#without hyper parameter tuning
from sklearn.metrics import f1_score
from sklearn import svm


model = svm.SVC()

 
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred, average='binary')
f1

0.7368421052631579

Grid Seach Part

In [4]:
param_grid = {'kernel': ['linear', 'poly','rbf'],
            'C': [0, 10],
            'gamma' : ['scale', 'auto'],
            'degree' : list(range(1, 3))


}

In [6]:
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring="f1") 
grid_search.fit(X_train, y_train)


60 fits failed out of a total of 120.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/miniconda3/lib/python3.9/site-packages/sklearn/svm/_base.py", line 180, in fit
    self._validate_params()
  File "/opt/miniconda3/lib/python3.9/site-packages/sklearn/base.py", line 581, in _validate_params
    validate_parameter_constraints(
  File "/opt/miniconda3/lib/python3.9/site-packages/sklearn/utils/_param_validation.py", line 97, in validate_parameter_constraints
    raise InvalidParameter

In [7]:
print("Best parameters: {}".format(grid_search.best_params_))
print("Best F1 score: {:.2f}".format(grid_search.best_score_))

Best parameters: {'C': 10, 'degree': 1, 'gamma': 'auto', 'kernel': 'poly'}
Best cross-validation score: 0.85


In [8]:
#Best parameters: {'C': 10, 'degree': 1, 'gamma': 'auto', 'kernel': 'poly'}
#Best cross-validation score: 0.82


print("Test set score: {:.2f}".format(grid_search.score(X_test, y_test)))

Test set score: 0.84


In [9]:
print("Best estimator:\n{}".format(grid_search.best_estimator_))

Best estimator:
SVC(C=10, degree=1, gamma='auto', kernel='poly')


In [15]:
model = svm.SVC(C=10, degree=1, gamma='auto', kernel='poly')

model.fit(X_train,y_train)
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred, average='binary')
f1

0.8431372549019609

Optuna Part

In [8]:
from sklearn.metrics import f1_score

def objective(trial):
 
    
    C = trial.suggest_int('C', 1, 10)
    kernel = trial.suggest_categorical("kernel", ['linear', 'poly','rbf'])

    degree =trial.suggest_int("degree", 1, 3)
    gamma = trial.suggest_categorical("gamma", ["scale","auto"])

  
    model= SVC(kernel=kernel, C=C, degree = degree, gamma= gamma, probability = True)
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred, average='binary')
    
    return f1 


In [9]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

trial = study.best_trial

[32m[I 2023-03-21 22:49:25,513][0m A new study created in memory with name: no-name-a509c000-206b-41f7-93d3-109bc332a76f[0m
[32m[I 2023-03-21 22:49:25,537][0m Trial 0 finished with value: 0.6814814814814814 and parameters: {'C': 10, 'kernel': 'rbf', 'degree': 1, 'gamma': 'auto'}. Best is trial 0 with value: 0.6814814814814814.[0m
[32m[I 2023-03-21 22:49:25,879][0m Trial 1 finished with value: 0.8461538461538461 and parameters: {'C': 4, 'kernel': 'poly', 'degree': 1, 'gamma': 'auto'}. Best is trial 1 with value: 0.8461538461538461.[0m
[32m[I 2023-03-21 22:49:27,487][0m Trial 2 finished with value: 0.8316831683168316 and parameters: {'C': 2, 'kernel': 'linear', 'degree': 2, 'gamma': 'auto'}. Best is trial 1 with value: 0.8461538461538461.[0m
[32m[I 2023-03-21 22:49:27,505][0m Trial 3 finished with value: 0.6814814814814814 and parameters: {'C': 5, 'kernel': 'rbf', 'degree': 1, 'gamma': 'auto'}. Best is trial 1 with value: 0.8461538461538461.[0m
[32m[I 2023-03-21 22:49:27,

In [16]:
#parameters: {'C': 4, 'kernel': 'poly', 'degree': 1, 'gamma': 'auto'}. Best is trial 1 with value: 0.8461538461538461.
from sklearn.model_selection import cross_val_score
from sklearn import svm
model = svm.SVC(C=4, degree=1, gamma='auto', kernel='poly')
 
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred, average='binary')
f1

0.8461538461538461