# Hyperparameter Tuning with GridSearchCV

- hyperparameter tuning 을 자동화

In [43]:
from sklearn.datasets import load_breast_cancer 
from sklearn.svm import SVC 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split 
from sklearn.metrics import recall_score, precision_score
from sklearn.metrics import classification_report, confusion_matrix 

In [44]:
dataset = load_breast_cancer()
X=dataset.data
Y=dataset.target
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size = 0.30, random_state = 101) 
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((398, 30), (171, 30), (398,), (171,))

## model without GridSearch

In [45]:
model = SVC() 
model.fit(X_train, y_train) 

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [46]:
y_pred = model.predict(X_test) 
print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.95      0.85      0.90        66
           1       0.91      0.97      0.94       105

    accuracy                           0.92       171
   macro avg       0.93      0.91      0.92       171
weighted avg       0.93      0.92      0.92       171



In [47]:
print(recall_score(y_test, y_pred, pos_label=0))
print(recall_score(y_test, y_pred, pos_label=1))
print(precision_score(y_test, y_pred, pos_label=0))
print(precision_score(y_test, y_pred, pos_label=1))

0.8484848484848485
0.9714285714285714
0.9491525423728814
0.9107142857142857


## Grid Search 

In [48]:
param_grid = {'C': [0.1, 1, 10, 100],  
              'gamma':['scale', 'auto'],
              'kernel': ['rbf','linear','sigmoid']}  

grid = GridSearchCV(SVC(), param_grid, verbose=3, n_jobs=-1)

# fitting the model for grid search 
grid.fit(X_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  97 out of 120 | elapsed:    5.0s remaining:    1.2s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:   17.6s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto'],
                         'kernel': ['rbf', 'linear', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [51]:
# print best parameter after tuning 
print(grid.best_params_) 
grid_predictions = grid.predict(X_test) 

{'C': 100, 'gamma': 'scale', 'kernel': 'linear'}


In [52]:
# print classification report 
print(classification_report(y_test, grid_predictions)) 

              precision    recall  f1-score   support

           0       0.97      0.91      0.94        66
           1       0.94      0.98      0.96       105

    accuracy                           0.95       171
   macro avg       0.96      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171

