In [1]:
from sklearn.datasets import load_boston, load_diabetes # 회귀용
from sklearn.datasets import load_iris, load_wine # 분류용
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [2]:
X, y = load_iris(return_X_y=True) 
# X, y = shuffle(X, y, random_state=seed) 
# do not need shuffle beforehand because train_test_split shuffles by default.
seed = 1234
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

In [3]:
sts = StandardScaler()
X_train_scaled = sts.fit_transform(X_train)
X_test_scaled = sts.transform(X_test)

In [4]:
clf = LogisticRegression()
scores = cross_validate(clf, X_train_scaled, y_train, cv=5, scoring='accuracy')

In [5]:
scores['test_score'].mean()

0.9416666666666667

In [6]:
scores.keys()

dict_keys(['fit_time', 'score_time', 'test_score'])

In [7]:
clf.get_params().keys()

dict_keys(['C', 'class_weight', 'dual', 'fit_intercept', 'intercept_scaling', 'l1_ratio', 'max_iter', 'multi_class', 'n_jobs', 'penalty', 'random_state', 'solver', 'tol', 'verbose', 'warm_start'])

In [8]:
params = {
    'max_iter': [50, 100, 150],
    'C' : [1.0, 1.2],
}
search = GridSearchCV(clf, scoring='accuracy', cv=5,
                      param_grid=params,
                      refit=True, n_jobs=-1)

In [9]:
search.fit(X_train_scaled, y_train)

GridSearchCV(cv=5, estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': [1.0, 1.2], 'max_iter': [50, 100, 150]},
             scoring='accuracy')

In [10]:
new_clf = search.best_estimator_
new_clf

LogisticRegression(C=1.2, max_iter=50)

In [11]:
search.best_score_

0.95

In [12]:
new_clf.predict(X_test_scaled)

array([1, 1, 2, 0, 1, 0, 0, 0, 1, 2, 1, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1,
       1, 1, 2, 0, 2, 1, 2, 0])

In [13]:
scores = cross_validate(new_clf, X_train_scaled, y_train, cv=10, scoring='accuracy')
scores['test_score'].mean()

0.9416666666666668

```
search.fit 을 해야 하나? 그냥 clf = search.best_estimator_하면 안되나? 안 됨! .fit를 해야 계산하여 지정함.

refit의 용도는? : best estimator를 찾은 뒤에 그것으로 재학습 시킴. 없으면 .best_estimator_ 값을 돌려주지 않음. default가 True이므로 지정하지 않아도 됨.
```