# GridSearchCV


In [50]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


### Setting up


In [51]:
# Load data
dataObj = load_breast_cancer()
X = dataObj.data
y = dataObj.target

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, 
    stratify=y,
    test_size=0.30,
    random_state=1)

# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Classifier
svc = SVC(random_state=1)

In [52]:
pd.DataFrame(data=svc.get_params(), index=["param"]).T

Unnamed: 0,param
C,1.0
break_ties,False
cache_size,200
class_weight,
coef0,0.0
decision_function_shape,ovr
degree,3
gamma,scale
kernel,rbf
max_iter,-1


### Define parameter sets

In [53]:
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]

set1 = {'C': param_range,
        'kernel': ['linear']}

set2 = {'C': param_range,
        'gamma': param_range,
        'kernel': ['rbf']}

param_grid = [set1, set2]

### GridSearchCV classifier

In [54]:
gs = GridSearchCV(estimator=svc, 
                  param_grid=param_grid, 
                  scoring='accuracy', 
                  cv=5,
                  n_jobs=-1)

In [55]:
# Get parameter names
for k, v in gs.get_params().items():
    print(f"{k:35.35s}: {str(v)}")

cv                                 : 5
error_score                        : nan
estimator__C                       : 1.0
estimator__break_ties              : False
estimator__cache_size              : 200
estimator__class_weight            : None
estimator__coef0                   : 0.0
estimator__decision_function_shape : ovr
estimator__degree                  : 3
estimator__gamma                   : scale
estimator__kernel                  : rbf
estimator__max_iter                : -1
estimator__probability             : False
estimator__random_state            : 1
estimator__shrinking               : True
estimator__tol                     : 0.001
estimator__verbose                 : False
estimator                          : SVC(random_state=1)
n_jobs                             : -1
param_grid                         : [{'C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0], 'kernel': ['linear']}, {'C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0], 'gamma': [0.0001, 0.

### Training

In [56]:
gs.fit(X_train_std,y_train)

In [57]:
df = pd.DataFrame(gs.cv_results_)
print(df.shape)
display(df.head())

(72, 16)


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002202,0.000401,0.001,0.0,0.0001,linear,,"{'C': 0.0001, 'kernel': 'linear'}",0.625,0.625,0.6375,0.632911,0.632911,0.630665,0.004919,30
1,0.002799,0.000401,0.0008,0.0004,0.001,linear,,"{'C': 0.001, 'kernel': 'linear'}",0.9125,0.95,0.9125,0.936709,0.924051,0.927152,0.014508,23
2,0.001786,0.000394,0.0004,0.00049,0.01,linear,,"{'C': 0.01, 'kernel': 'linear'}",0.975,0.9875,0.9375,0.974684,0.974684,0.969873,0.016919,9
3,0.001372,0.000485,0.000599,0.000489,0.1,linear,,"{'C': 0.1, 'kernel': 'linear'}",0.975,1.0,0.9625,0.974684,1.0,0.982437,0.015032,1
4,0.001399,0.00049,0.0004,0.00049,1.0,linear,,"{'C': 1.0, 'kernel': 'linear'}",0.9625,0.9875,0.95,0.974684,0.987342,0.972405,0.014534,8


In [58]:
df = df.sort_values(by=['rank_test_score'])
display(df.head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
3,0.001372,0.000485,0.000599,0.000489,0.1,linear,,"{'C': 0.1, 'kernel': 'linear'}",0.975,1.0,0.9625,0.974684,1.0,0.982437,0.015032,1
57,0.002502,0.000448,0.0014,0.00049,100.0,rbf,0.001,"{'C': 100.0, 'gamma': 0.001, 'kernel': 'rbf'}",0.9625,1.0,0.975,0.974684,0.987342,0.979905,0.012754,2
50,0.002103,0.000203,0.001301,0.0004,10.0,rbf,0.01,"{'C': 10.0, 'gamma': 0.01, 'kernel': 'rbf'}",0.95,1.0,0.975,0.974684,0.987342,0.977405,0.016574,3
64,0.002,1e-06,0.0012,0.0004,1000.0,rbf,0.0001,"{'C': 1000.0, 'gamma': 0.0001, 'kernel': 'rbf'}",0.95,1.0,0.975,0.974684,0.987342,0.977405,0.016574,3
56,0.002147,0.00077,0.001853,0.000292,100.0,rbf,0.0001,"{'C': 100.0, 'gamma': 0.0001, 'kernel': 'rbf'}",0.9625,1.0,0.9375,0.974684,1.0,0.974937,0.023717,5


### Refitting
- Note that grid search already refit the entire training data with the best parameters. You can check this from this setting.

In [59]:
gs.refit

True

In [60]:
print(gs.best_score_)
print(gs.best_params_)

0.9824367088607595
{'C': 0.1, 'kernel': 'linear'}


### Test result

In [61]:
y_pred = gs.predict(X_test)
testing_accuracy = gs.score(X_test_std,y_test)
print(f"Testing accuracy: {testing_accuracy:6.3f}")

Testing accuracy:  0.971


In [62]:
# To do this manually
clf = gs.best_estimator_
clf.fit(X_train_std, y_train)
testing_accuracy = clf.score(X_test_std,y_test)
print(f"Testing accuracy: {testing_accuracy:6.3f}")

Testing accuracy:  0.971
