# Parameter tuning for classification models

## GridSearchCV


In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV


### Setting up


In [None]:
# Load data
dataObj = load_breast_cancer()
X = dataObj.data
y = dataObj.target

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, 
    stratify=y,
    test_size=0.30,
    random_state=1)

# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

# Classifier
svc = SVC(random_state=1)

In [None]:
pd.DataFrame(data=svc.get_params(), index=["param"]).T

### Define parameter sets

In [None]:
param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]

set1 = {'C': param_range,
        'kernel': ['linear']}

set2 = {'C': param_range,
        'gamma': param_range,
        'kernel': ['rbf']}

param_grid = [set1, set2]

### GridSearchCV classifier

In [None]:
gs = GridSearchCV(estimator=svc, 
                  param_grid=param_grid, 
                  scoring='accuracy', 
                  cv=5,
                  n_jobs=-1)

In [None]:
# Get parameter names
for k, v in gs.get_params().items():
    print(f"{k:35.35s}: {str(v)}")

### Training

In [None]:
gs.fit(X_train_std,y_train)

In [None]:
df = pd.DataFrame(gs.cv_results_)
print(df.shape)
display(df.head())

In [None]:
df = df.sort_values(by=['rank_test_score'])
display(df.head())

### Refitting
- Note that grid search already refit the entire training data with the best parameters. You can check this from this setting.

In [None]:
gs.refit

In [None]:
print(gs.best_score_)
print(gs.best_params_)

### Test result

In [None]:
y_pred = gs.predict(X_test_std)
testing_accuracy = gs.score(X_test_std,y_test)
print(f"Testing accuracy: {testing_accuracy:6.3f}")

In [None]:
# To do this manually
clf = gs.best_estimator_
clf.fit(X_train_std, y_train)
testing_accuracy = clf.score(X_test_std,y_test)
print(f"Testing accuracy: {testing_accuracy:6.3f}")