In [1]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
cancer = load_breast_cancer()
data_df = pd.DataFrame(cancer.data, columns=cancer.feature_names)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target,
                                                   test_size=0.33, random_state=100)

In [4]:
import pickle
start_time = time.time()

gb_clf = GradientBoostingClassifier(random_state=0)
gb_clf.fit(X_train, y_train)


with open('models/gbm_model.pkl','wb') as f:
    pickle.dump(gb_clf, f)
    
with open('models/gbm_model.pkl','rb') as f:
    model = pickle.load(f)
    gb_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test,gb_pred)
    print(f'GBM 정확도:{accuracy}')

print(f'GBM 수행시간 : {time.time()-start_time}')

GBM 정확도:0.9468085106382979
GBM 수행시간 : 0.8647432327270508


In [6]:
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators':[100,500],
    'learning_rate':[0.05,0.1]
}
grid_cv=GridSearchCV(gb_clf,param_grid=params,cv=2,verbose=1)
grid_cv.fit(X_train, y_train)
print(f'최적 하이퍼 파라미터:\n{grid_cv.best_params_}')
print(f'최고 예측 정확도:{grid_cv.best_score_}')
print(f'{grid_cv.best_estimator_}')

Fitting 2 folds for each of 4 candidates, totalling 8 fits
최적 하이퍼 파라미터:
{'learning_rate': 0.1, 'n_estimators': 100}
최고 예측 정확도:0.9448746211077432
GradientBoostingClassifier(random_state=0)


In [7]:
with open('models/gbm_grid_model.pkl','wb') as f:
    pickle.dump(grid_cv, f)

with open('models/gbm_grid_model.pkl','rb') as f:
    model = pickle.load(f)
    pred = model.best_estimator_.predict(X_test)
    accuracy = accuracy_score(y_test,pred)
    print(f'GBM 정확도:{accuracy_score(y_test,pred)}')


GBM 정확도:0.9468085106382979
