# 18.머신러닝 GridSearch와 Pipeline

### 머신러닝 모델의 하이퍼-파라미터 간편 변경하면서 모델링 : GridSearch
### 여러가지 전처리와 모델링을 함꺼번에 할수 있게 하는 Pipeline

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.datasets import load_breast_cancer, load_boston
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [4]:
def 모델평가(model, X, y, **설정):
    X_train, X_test, y_train, y_test = train_test_split(X, y, **설정)
    model.fit(X_train, y_train)
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    return train_score, test_score

In [5]:
cancer = load_breast_cancer()
boston = load_boston()

In [6]:
pd.DataFrame(boston.data, columns=boston.feature_names)[:3]

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03


In [7]:
from sklearn.linear_model import LogisticRegression, Ridge

In [8]:
from sklearn.svm import SVC, SVR

In [9]:
svr_pipe = Pipeline([
    ('mn_scaler', MinMaxScaler()),
    ('svr', SVR(kernel='rbf'))
])

In [10]:
설정범위 = {
    'svr__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'svr__gamma': [0.001, 0.01, 0.1, 1, 10, 100]
}

In [11]:
튜닝기 = GridSearchCV(svr_pipe, param_grid=설정범위, cv=5)

In [12]:
모델평가(튜닝기, boston.data, boston.target, random_state=0)

(0.9649475594671193, 0.8118756880352304)

In [13]:
튜닝기.best_params_

{'svr__C': 100, 'svr__gamma': 1}

In [14]:
튜닝기.best_score_

0.8520991095344013

In [15]:
튜닝기.best_estimator_

Pipeline(memory=None,
         steps=[('mn_scaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ('svr',
                 SVR(C=100, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
                     gamma=1, kernel='rbf', max_iter=-1, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [16]:
best_model = 튜닝기.best_estimator_

 위의 보스턴 데이터는 이미 정제되어 있는 상태이고, 데이터에 대한 분석 과정은 여기에서는 하지 않음

#### 훈련된 모델을 파일로 내보내기

In [17]:
# pickle 표준 lib
import pickle 

In [18]:
with open('boston_svm.pkl', 'wb') as 파일:
    pickle.dump(best_model, 파일)

In [19]:
!dir *.pkl

 C 드라이브의 볼륨에는 이름이 없습니다.
 볼륨 일련 번호: 6E92-1B42

 C:\Users\Gzone^^~~\My_TEST\ALL_ML_AL\TEST 디렉터리

2020-06-26  오후 06:50            44,462 boston_svm.pkl
               1개 파일              44,462 바이트
               0개 디렉터리  60,033,638,400 바이트 남음


In [20]:
del svr_pipe

In [21]:
svr_pipe.predict(boston.data)

NameError: name 'svr_pipe' is not defined

In [22]:
with open('boston_svm.pkl','rb') as 파일:
    best_model = pickle.load(파일)

In [23]:
best_model.score(boston.data, boston.target)

0.9277735876236972