# model_GS_GB

#### Grid_Search
https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter  
https://www.codexa.net/hyperparameter-tuning-python/

In [1]:
# !jupyter nbconvert --to python model_GS_GB.ipynb

In [1]:
# warningの無視
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

from sklearn.metrics import f1_score

%matplotlib inline

In [3]:
def importances(model):
    """ 変数重要度
    """
    importances = model.feature_importances_
    indices = np.argsort(importances)

    plt.barh(range(len(indices[-20:])), importances[indices[-20:]] , align='center')
    plt.yticks(range(len(indices[-20:])), feature_X[indices[-20:]])
    plt.title('decision tree feature importance')
    plt.xlabel('feature importance')
    plt.ylabel('variable')
    plt.show()

#### =========要変更===========

In [4]:
path = "../data/models/"
modelName= "model_GS_GB"

#### ========================

In [5]:
train_X=pd.read_pickle('../data/feature/train_X.pickle')
valid_X=pd.read_pickle('../data/feature/valid_X.pickle')
test_X=pd.read_pickle('../data/feature/test_X.pickle')

train_y=pd.read_pickle('../data/feature/train_y.pickle')
valid_y=pd.read_pickle('../data/feature/valid_y.pickle')
test_y=pd.read_pickle('../data/feature/test_y.pickle')

In [6]:
"""ハイパーパラメータの max_depth(木の深さ), 
# n_estimators(決定木の数)に関してグリッドサーチを行う
# グリッドサーチに使用するパラメータの値を用意
"""
seed=1
params = {
    'max_depth': [10, 20, 30],
    'n_estimators': [10, 100, 500]}

#### =========要変更===========

In [7]:
model=GradientBoostingClassifier(random_state=seed)

#### ========================

In [8]:
grid = GridSearchCV(estimator=model,
    param_grid=params,
    n_jobs=1,
    cv=KFold(5,shuffle=True, random_state=seed),
    scoring='f1',
    verbose=3,
    return_train_score=True)

In [9]:
grid.fit(train_X,  train_y)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END ..................max_depth=10, n_estimators=10; total time=  13.6s
[CV 2/5] END ..................max_depth=10, n_estimators=10; total time=  13.7s
[CV 3/5] END ..................max_depth=10, n_estimators=10; total time=  13.6s
[CV 4/5] END ..................max_depth=10, n_estimators=10; total time=  13.7s
[CV 5/5] END ..................max_depth=10, n_estimators=10; total time=  13.6s
[CV 1/5] END .................max_depth=10, n_estimators=100; total time= 2.3min


KeyboardInterrupt: 

In [None]:
print(grid.best_estimator_)

In [None]:
# 最適解でモデルを作成
model = grid.best_estimator_

In [None]:
# モデルの保存
with open(path + modelName + '.pickle', mode='wb') as f:
    pickle.dump(model, f)

#### ========================

In [35]:
pred=model.predict(test_X)

In [3]:
print(f1_score(test_y, pred))

#### ========================

In [44]:
# 変数重要度を見るため、変数名を取り出しておく
feature_X = train_X.columns
feature_y = train_y.name

In [4]:
importances(model)