Since we always want the model with best scores, tuning the hyper parameters may help in improving that score

Using the Breast Cancer Wisconsin dataset

In [35]:
import pandas as pd                                         # import pandas

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer             # load the breast cancer from sklearn

from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix

In [36]:
# load the data set

dataset = load_breast_cancer()
X = dataset.data
y = dataset.target                                           # split data and target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1010)

# train  model without grid search

model = SVC()
model.fit(X_train, y_train)

# predictions

predictions = model.predict(X_test)
metrics.accuracy_score(y_test, predictions)




0.9415204678362573

In [37]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      0.86      0.93        73
           1       0.91      1.00      0.95        98

    accuracy                           0.94       171
   macro avg       0.95      0.93      0.94       171
weighted avg       0.95      0.94      0.94       171



**Use Grid search**

In [38]:
# define parameter range
param_grid = {
              "C": [0.1, 1, 10, 100],
              "gamma": [1, 0.1, 0.01, 0.001, 0.0001],
              "gamma": ['scale', 'auto'],
              "kernel": ["linear"]
             }
param_grid

{'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto'], 'kernel': ['linear']}

In [41]:
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3, n_jobs = -1)
grid.fit(X_train, y_train)


Fitting 5 folds for each of 8 candidates, totalling 40 fits


GridSearchCV(estimator=SVC(), n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto'],
                         'kernel': ['linear']},
             verbose=3)

In [46]:
print(grid.best_params_)
grid_predictions = grid.predict(X_test)

{'C': 100, 'gamma': 'scale', 'kernel': 'linear'}


In [47]:
print(classification_report(y_test, grid_predictions ))

              precision    recall  f1-score   support

           0       0.95      0.96      0.95        73
           1       0.97      0.96      0.96        98

    accuracy                           0.96       171
   macro avg       0.96      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



Accuracy improves after the tuning