####  Test hyperopt


### Case.1 : 　Classification Case: Quote from "Data Analysis Technic to win at  Kaggle"

In [31]:
#!pip install hyperopt

In [32]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数、test_xはテストデータ
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv('./sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
#test_x = pd.read_csv('./input/sample-data/test_preprocessed.csv')

# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# xgboostによる学習・予測を行うクラス
import xgboost as xgb


class Model:

    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params = {}
        else:
            self.params = params

    def fit(self, tr_x, tr_y, va_x, va_y):
        params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
        params.update(self.params)
        num_round = 50
        dtrain = xgb.DMatrix(tr_x, label=tr_y)
        dvalid = xgb.DMatrix(va_x, label=va_y)
        watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
        self.model = xgb.train(params, dtrain, num_round, evals=watchlist)

    def predict(self, x):
        data = xgb.DMatrix(x)
        pred = self.model.predict(data)
        return pred


# -----------------------------------
# 探索するパラメータの空間の指定
# -----------------------------------
# hp.choiceでは、複数の選択肢から選ぶ
# hp.uniformでは、下限・上限を指定した一様分布から抽出する。引数は下限・上限
# hp.quniformでは、下限・上限を指定した一様分布のうち一定の間隔ごとの点から抽出する。引数は下限・上限・間隔
# hp.loguniformでは、下限・上限を指定した対数が一様分布に従う分布から抽出する。引数は下限・上限の対数をとった値

from hyperopt import hp

space = {
    'activation': hp.choice('activation', ['prelu', 'relu']),
    'dropout': hp.uniform('dropout', 0, 0.2),
    'units': hp.quniform('units', 32, 256, 32),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01)),
}

# -----------------------------------
# hyperoptを使ったパラメータ探索
# -----------------------------------
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss


def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# 探索するパラメータの空間を指定する
space = {
    'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'gamma': hp.quniform('gamma', 0, 0.4, 0.1),
}

# hyperoptによるパラメータ探索の実行
max_evals = 10
trials = Trials()
history = []
fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals)

# 記録した情報からパラメータとスコアを出力する
# （trialsからも情報が取得できるが、パラメータの取得がやや行いづらいため）
history = sorted(history, key=lambda tpl: tpl[1])
best = history[0]
print(f'best params:{best[0]}, score:{best[1]:.4f}')


[0]	train-error:0.154	eval-error:0.1624                                                                                

[1]	train-error:0.1476	eval-error:0.1604                                                                               

[2]	train-error:0.138533	eval-error:0.1512                                                                             

[3]	train-error:0.1416	eval-error:0.1516                                                                               

[4]	train-error:0.1348	eval-error:0.1456                                                                               

[5]	train-error:0.129867	eval-error:0.1428                                                                             

[6]	train-error:0.1272	eval-error:0.1404                                                                               

[7]	train-error:0.122267	eval-error:0.1424                                                                             

[8]	train-error:0.118933	eval-er

  if getattr(data, 'base', None) is not None and \



[16]	train-error:0.094133	eval-error:0.1196                                                                            

[17]	train-error:0.093467	eval-error:0.1192                                                                            

[18]	train-error:0.0908	eval-error:0.1208                                                                              

[19]	train-error:0.088533	eval-error:0.1188                                                                            

[20]	train-error:0.084533	eval-error:0.12                                                                              

[21]	train-error:0.082667	eval-error:0.1196                                                                            

[22]	train-error:0.078667	eval-error:0.1164                                                                            

[23]	train-error:0.077867	eval-error:0.114                                                                             

[24]	train-error:0.0752	eval-err

  if getattr(data, 'base', None) is not None and \



[16]	train-error:0.089467	eval-error:0.116

[17]	train-error:0.088533	eval-error:0.112                                                                             

[18]	train-error:0.083733	eval-error:0.112                                                                             

[19]	train-error:0.082933	eval-error:0.1096                                                                            

[20]	train-error:0.0812	eval-error:0.108                                                                               

[21]	train-error:0.081467	eval-error:0.1092                                                                            

[22]	train-error:0.079867	eval-error:0.1108                                                                            

[23]	train-error:0.0764	eval-error:0.1072                                                                              

[24]	train-error:0.074267	eval-error:0.11                                                                    

  if getattr(data, 'base', None) is not None and \



[16]	train-error:0.074933	eval-error:0.1136                                                                            

[17]	train-error:0.073067	eval-error:0.1108                                                                            

[18]	train-error:0.0696	eval-error:0.112                                                                               

[19]	train-error:0.0696	eval-error:0.1092                                                                              

[20]	train-error:0.067067	eval-error:0.11                                                                              

[21]	train-error:0.066267	eval-error:0.1084                                                                            

[22]	train-error:0.063333	eval-error:0.1096                                                                            

[23]	train-error:0.061733	eval-error:0.1116                                                                            

[24]	train-error:0.0568	eval-err

  if getattr(data, 'base', None) is not None and \



[10]	train-error:0.0532	eval-error:0.1112                                                                              

[11]	train-error:0.051067	eval-error:0.114                                                                             

[12]	train-error:0.0456	eval-error:0.1092                                                                              

[13]	train-error:0.042133	eval-error:0.1092                                                                            

[14]	train-error:0.038533	eval-error:0.1136                                                                            

[15]	train-error:0.0348	eval-error:0.112                                                                               

[16]	train-error:0.029867	eval-error:0.1128                                                                            

[17]	train-error:0.029333	eval-error:0.112                                                                             

[18]	train-error:0.026533	eval-e

  if getattr(data, 'base', None) is not None and \



[15]	train-error:0.096667	eval-error:0.12

[16]	train-error:0.094133	eval-error:0.1196                                                                            

[17]	train-error:0.093467	eval-error:0.1192                                                                            

[18]	train-error:0.0908	eval-error:0.1208                                                                              

[19]	train-error:0.088533	eval-error:0.1188                                                                            

[20]	train-error:0.084533	eval-error:0.12                                                                              

[21]	train-error:0.082667	eval-error:0.1196                                                                            

[22]	train-error:0.078667	eval-error:0.1164                                                                            

[23]	train-error:0.077867	eval-error:0.114                                                                    

  if getattr(data, 'base', None) is not None and \



[10]	train-error:0.0256	eval-error:0.1184                                                                              

[11]	train-error:0.024267	eval-error:0.1156                                                                            

[12]	train-error:0.02	eval-error:0.1124                                                                                

[13]	train-error:0.0184	eval-error:0.1116                                                                              

[14]	train-error:0.0152	eval-error:0.1088                                                                              

[15]	train-error:0.012933	eval-error:0.1096                                                                            

[16]	train-error:0.0124	eval-error:0.11                                                                                

[17]	train-error:0.010133	eval-error:0.1084                                                                            

[18]	train-error:0.0088	eval-err

  if getattr(data, 'base', None) is not None and \



[11]	train-error:0.0304	eval-error:0.1156                                                                              

[12]	train-error:0.026533	eval-error:0.1136                                                                            

[13]	train-error:0.0232	eval-error:0.1144                                                                              

[14]	train-error:0.0204	eval-error:0.1112                                                                              

[15]	train-error:0.0196	eval-error:0.112                                                                               

[16]	train-error:0.017467	eval-error:0.1092                                                                            

[17]	train-error:0.0152	eval-error:0.11                                                                                

[18]	train-error:0.012	eval-error:0.1088                                                                               

[19]	train-error:0.010533	eval-e

  if getattr(data, 'base', None) is not None and \



[15]	train-error:0.0772	eval-error:0.116                                                                               

[16]	train-error:0.075067	eval-error:0.1136                                                                            

[17]	train-error:0.073067	eval-error:0.1108                                                                            

[18]	train-error:0.069733	eval-error:0.112                                                                             

[19]	train-error:0.069733	eval-error:0.1092                                                                            

[20]	train-error:0.0672	eval-error:0.11                                                                                

[21]	train-error:0.066267	eval-error:0.1084                                                                            

[22]	train-error:0.0636	eval-error:0.1096                                                                              

[23]	train-error:0.061733	eval-e

  if getattr(data, 'base', None) is not None and \



[16]	train-error:0.069733	eval-error:0.11                                                                              

[17]	train-error:0.0692	eval-error:0.1088                                                                              

[18]	train-error:0.0664	eval-error:0.1024                                                                              

[19]	train-error:0.064	eval-error:0.1036                                                                               

[20]	train-error:0.062133	eval-error:0.1028                                                                            

[21]	train-error:0.059733	eval-error:0.1024                                                                            

[22]	train-error:0.055867	eval-error:0.1012                                                                            

[23]	train-error:0.055333	eval-error:0.102                                                                             

[24]	train-error:0.053733	eval-e

  if getattr(data, 'base', None) is not None and \



[10]	train-error:0.034	eval-error:0.1148                                                                               

[11]	train-error:0.0304	eval-error:0.1156                                                                              

[12]	train-error:0.026533	eval-error:0.1136                                                                            

[13]	train-error:0.0232	eval-error:0.1144                                                                              

[14]	train-error:0.0204	eval-error:0.1112                                                                              

[15]	train-error:0.0196	eval-error:0.112                                                                               

[16]	train-error:0.017467	eval-error:0.1092                                                                            

[17]	train-error:0.0152	eval-error:0.11                                                                                

[18]	train-error:0.012	eval-erro

### Case2-1 : Regression / predict Tokyo-housing /　target is price:　Modified by Bashii 2020-04-16

In [33]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv("housing_tokyo_chuo_line_week3.csv")
train.head()


Unnamed: 0,uid,years,minutes,sqrm,distance,renovate,express,price
0,13101-1,34,3,35,2.983801,0,0,24000000
1,13101-2,37,4,20,2.983801,0,0,13000000
2,13101-4,34,3,30,2.983801,0,0,22000000
3,13101-5,35,1,70,2.983801,0,0,69000000
4,13101-6,34,3,25,2.983801,0,0,21000000


In [34]:
train_x = train.iloc[: ,1:-1 ]
train_y = train['price']

In [35]:
# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
#訓練用と検証用のindexを作成する
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# xgboostによる学習・予測を行うクラス
import xgboost as xgb


class Model:

    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params = {}
        else:
            self.params = params

    def fit(self, tr_x, tr_y, va_x, va_y):
        #　訓練データで学習を行う
        #params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
        params = {'objective':'reg:squarederror', 'silent': 1 , 'random_state': 71} #回帰の場合
        params.update(self.params) #updateメソッドで辞書型データに追加を行う。
        num_round = 50
        dtrain = xgb.DMatrix(tr_x, label=tr_y) #xgbのデータ型に変換
        dvalid = xgb.DMatrix(va_x, label=va_y)
        watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
        self.model = xgb.train(params, dtrain, num_round, evals=watchlist)

    def predict(self, x):
        data = xgb.DMatrix(x)
        pred = self.model.predict(data)
        return pred


# -----------------------------------
# 探索するパラメータの空間の指定
# -----------------------------------
# hp.choiceでは、複数の選択肢から選ぶ
# hp.uniformでは、下限・上限を指定した一様分布から抽出する。引数は下限・上限
# hp.quniformでは、下限・上限を指定した一様分布のうち一定の間隔ごとの点から抽出する。引数は下限・上限・間隔
# hp.loguniformでは、下限・上限を指定した対数が一様分布に従う分布から抽出する。引数は下限・上限の対数をとった値

from hyperopt import hp

space = {
    'activation': hp.choice('activation', ['prelu', 'relu']),
    'dropout': hp.uniform('dropout', 0, 0.2),
    'units': hp.quniform('units', 32, 256, 32),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01)),
}



In [36]:
# -----------------------------------
# hyperoptを使ったパラメータ探索
# -----------------------------------
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss
from sklearn.metrics import mean_squared_error
import math


def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    #score = log_loss(va_y, va_pred) # modified 04-16 
    score = np.sqrt(mean_squared_error(va_y, va_pred)) # score should be rmse 
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}




In [38]:
# 探索するパラメータの空間を指定する
space = {
    'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'gamma': hp.quniform('gamma', 0, 0.4, 0.1),
}

# hyperoptによるパラメータ探索の実行
max_evals = 10
trials = Trials()
history = []
fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals)

# 記録した情報からパラメータとスコアを出力する
# （trialsからも情報が取得できるが、パラメータの取得がやや行いづらいため）
history = sorted(history, key=lambda tpl: tpl[1])

best = history[0]
print(f'best params:{best[0]}, score:{best[1]:.4f}')


[0]	train-rmse:2.94355e+07	eval-rmse:3.09964e+07                                                                       

[1]	train-rmse:2.18563e+07	eval-rmse:2.32619e+07                                                                       

[2]	train-rmse:1.65124e+07	eval-rmse:1.77919e+07                                                                       

[3]	train-rmse:1.29489e+07	eval-rmse:1.42494e+07                                                                       

[4]	train-rmse:1.05617e+07	eval-rmse:1.18998e+07                                                                       

[5]	train-rmse:9.0191e+06	eval-rmse:1.05371e+07                                                                        

[6]	train-rmse:8.00249e+06	eval-rmse:9.64646e+06                                                                       

[7]	train-rmse:7.36213e+06	eval-rmse:9.03612e+06                                                                       

[8]	train-rmse:6.9325e+06	eval-r

  if getattr(data, 'base', None) is not None and \



[40]	train-rmse:5.19225e+06	eval-rmse:7.38418e+06                                                                      

[41]	train-rmse:5.18591e+06	eval-rmse:7.3662e+06                                                                       

[42]	train-rmse:5.17612e+06	eval-rmse:7.37158e+06                                                                      

[43]	train-rmse:5.16181e+06	eval-rmse:7.36881e+06                                                                      

[44]	train-rmse:5.15226e+06	eval-rmse:7.35815e+06                                                                      

[45]	train-rmse:5.13647e+06	eval-rmse:7.36238e+06                                                                      

[46]	train-rmse:5.13016e+06	eval-rmse:7.35519e+06                                                                      

[47]	train-rmse:5.1133e+06	eval-rmse:7.34928e+06                                                                       

[48]	train-rmse:5.08768e+06	eval

  if getattr(data, 'base', None) is not None and \



[34]	train-rmse:5.39425e+06	eval-rmse:7.36763e+06

[35]	train-rmse:5.37571e+06	eval-rmse:7.35825e+06                                                                      

[36]	train-rmse:5.37185e+06	eval-rmse:7.347e+06                                                                        

[37]	train-rmse:5.34848e+06	eval-rmse:7.34544e+06                                                                      

[38]	train-rmse:5.31707e+06	eval-rmse:7.30276e+06                                                                      

[39]	train-rmse:5.29708e+06	eval-rmse:7.31634e+06                                                                      

[40]	train-rmse:5.26102e+06	eval-rmse:7.31671e+06                                                                      

[41]	train-rmse:5.24064e+06	eval-rmse:7.30068e+06                                                                      

[42]	train-rmse:5.2226e+06	eval-rmse:7.28629e+06                                                      

  if getattr(data, 'base', None) is not None and \




[41]	train-rmse:5.18591e+06	eval-rmse:7.3662e+06                                                                       

[42]	train-rmse:5.17612e+06	eval-rmse:7.37158e+06                                                                      

[43]	train-rmse:5.16181e+06	eval-rmse:7.36881e+06                                                                      

[44]	train-rmse:5.15226e+06	eval-rmse:7.35815e+06                                                                      

[45]	train-rmse:5.13647e+06	eval-rmse:7.36238e+06                                                                      

[46]	train-rmse:5.13016e+06	eval-rmse:7.35519e+06                                                                      

[47]	train-rmse:5.1133e+06	eval-rmse:7.34928e+06                                                                       

[48]	train-rmse:5.08768e+06	eval-rmse:7.35499e+06                                                                      

[49]	train-rmse:5.07174e+06	eva

  if getattr(data, 'base', None) is not None and \



[39]	train-rmse:5.28825e+06	eval-rmse:7.25832e+06

[40]	train-rmse:5.27666e+06	eval-rmse:7.25719e+06                                                                      

[41]	train-rmse:5.26668e+06	eval-rmse:7.23716e+06                                                                      

[42]	train-rmse:5.23996e+06	eval-rmse:7.23797e+06                                                                      

[43]	train-rmse:5.21024e+06	eval-rmse:7.23131e+06                                                                      

[44]	train-rmse:5.2082e+06	eval-rmse:7.23484e+06                                                                       

[45]	train-rmse:5.20546e+06	eval-rmse:7.23444e+06                                                                      

[46]	train-rmse:5.18873e+06	eval-rmse:7.24415e+06                                                                      

[47]	train-rmse:5.18621e+06	eval-rmse:7.25137e+06                                                     

  if getattr(data, 'base', None) is not None and \



[35]	train-rmse:4.60736e+06	eval-rmse:7.01102e+06

[36]	train-rmse:4.60138e+06	eval-rmse:7.00948e+06                                                                      

[37]	train-rmse:4.57916e+06	eval-rmse:7.01256e+06                                                                      

[38]	train-rmse:4.56168e+06	eval-rmse:7.03015e+06                                                                      

[39]	train-rmse:4.55093e+06	eval-rmse:7.02813e+06                                                                      

[40]	train-rmse:4.53538e+06	eval-rmse:7.01681e+06                                                                      

[41]	train-rmse:4.52167e+06	eval-rmse:7.03315e+06                                                                      

[42]	train-rmse:4.51673e+06	eval-rmse:7.03466e+06                                                                      

[43]	train-rmse:4.4997e+06	eval-rmse:7.03873e+06                                                      

  if getattr(data, 'base', None) is not None and \



[24]	train-rmse:4.4519e+06	eval-rmse:7.3004e+06                                                                        

[25]	train-rmse:4.42587e+06	eval-rmse:7.28407e+06                                                                      

[26]	train-rmse:4.39262e+06	eval-rmse:7.27029e+06                                                                      

[27]	train-rmse:4.38264e+06	eval-rmse:7.27344e+06                                                                      

[28]	train-rmse:4.33939e+06	eval-rmse:7.25238e+06                                                                      

[29]	train-rmse:4.30681e+06	eval-rmse:7.23762e+06                                                                      

[30]	train-rmse:4.27604e+06	eval-rmse:7.17808e+06                                                                      

[31]	train-rmse:4.24118e+06	eval-rmse:7.15887e+06                                                                      

[32]	train-rmse:4.2195e+06	eval-

  if getattr(data, 'base', None) is not None and \



[34]	train-rmse:3.4749e+06	eval-rmse:7.53595e+06                                                                       

[35]	train-rmse:3.43743e+06	eval-rmse:7.53247e+06                                                                      

[36]	train-rmse:3.39442e+06	eval-rmse:7.54861e+06                                                                      

[37]	train-rmse:3.37424e+06	eval-rmse:7.54837e+06                                                                      

[38]	train-rmse:3.32507e+06	eval-rmse:7.5632e+06                                                                       

[39]	train-rmse:3.27314e+06	eval-rmse:7.56691e+06                                                                      

[40]	train-rmse:3.26377e+06	eval-rmse:7.54944e+06                                                                      

[41]	train-rmse:3.25243e+06	eval-rmse:7.5378e+06                                                                       

[42]	train-rmse:3.22185e+06	eval

  if getattr(data, 'base', None) is not None and \



[31]	train-rmse:3.74115e+06	eval-rmse:7.48385e+06                                                                      

[32]	train-rmse:3.73603e+06	eval-rmse:7.48787e+06                                                                      

[33]	train-rmse:3.67837e+06	eval-rmse:7.47523e+06                                                                      

[34]	train-rmse:3.6557e+06	eval-rmse:7.47349e+06                                                                       

[35]	train-rmse:3.63781e+06	eval-rmse:7.48165e+06                                                                      

[36]	train-rmse:3.6209e+06	eval-rmse:7.46244e+06                                                                       

[37]	train-rmse:3.60752e+06	eval-rmse:7.4677e+06                                                                       

[38]	train-rmse:3.60116e+06	eval-rmse:7.46663e+06                                                                      

[39]	train-rmse:3.5673e+06	eval-

  if getattr(data, 'base', None) is not None and \



[39]	train-rmse:5.28825e+06	eval-rmse:7.25832e+06                                                                      

[40]	train-rmse:5.27666e+06	eval-rmse:7.25719e+06                                                                      

[41]	train-rmse:5.26668e+06	eval-rmse:7.23716e+06                                                                      

[42]	train-rmse:5.23996e+06	eval-rmse:7.23797e+06                                                                      

[43]	train-rmse:5.21024e+06	eval-rmse:7.23131e+06                                                                      

[44]	train-rmse:5.2082e+06	eval-rmse:7.23484e+06                                                                       

[45]	train-rmse:5.20546e+06	eval-rmse:7.23444e+06                                                                      

[46]	train-rmse:5.18873e+06	eval-rmse:7.24415e+06                                                                      

[47]	train-rmse:5.18621e+06	eval

  if getattr(data, 'base', None) is not None and \



[27]	train-rmse:5.48724e+06	eval-rmse:7.5612e+06

[28]	train-rmse:5.46448e+06	eval-rmse:7.56011e+06                                                                      

[29]	train-rmse:5.43931e+06	eval-rmse:7.54382e+06                                                                      

[30]	train-rmse:5.41306e+06	eval-rmse:7.53205e+06                                                                      

[31]	train-rmse:5.39564e+06	eval-rmse:7.53452e+06                                                                      

[32]	train-rmse:5.3821e+06	eval-rmse:7.52224e+06                                                                       

[33]	train-rmse:5.37651e+06	eval-rmse:7.52232e+06                                                                      

[34]	train-rmse:5.35846e+06	eval-rmse:7.50786e+06                                                                      

[35]	train-rmse:5.35467e+06	eval-rmse:7.51084e+06                                                      

In [41]:
print(f'best params:{best[0]}, score:{best[1]:.4f}')

best params:{'gamma': 0.0, 'max_depth': 5, 'min_child_weight': 4.0}, score:7035681.1410


In [40]:
### Best score RMSE  =  7192525.9042

### Case2-2 Fixed parameter xgboost 

In [42]:
# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
#訓練用と検証用のindexを作成する
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

dtrain = xgb.DMatrix(tr_x, label=tr_y) #xgbのデータ型に変換
dvalid = xgb.DMatrix(va_x, label=va_y)


  if getattr(data, 'base', None) is not None and \


In [43]:
params = {'objective':'reg:squarederror', 'silent':1 , 'random_state':71}
num_round =50
watchlist = [(dtrain,'train'),(dvalid,'eval')]

In [44]:
model = xgb.train(params, dtrain,num_round, evals=watchlist)

[0]	train-rmse:2.92318e+07	eval-rmse:3.07929e+07
[1]	train-rmse:2.14017e+07	eval-rmse:2.2768e+07
[2]	train-rmse:1.59455e+07	eval-rmse:1.74258e+07
[3]	train-rmse:1.21721e+07	eval-rmse:1.36784e+07
[4]	train-rmse:9.5989e+06	eval-rmse:1.12043e+07
[5]	train-rmse:7.94583e+06	eval-rmse:9.78007e+06
[6]	train-rmse:6.86056e+06	eval-rmse:8.97579e+06
[7]	train-rmse:6.11143e+06	eval-rmse:8.4136e+06
[8]	train-rmse:5.66383e+06	eval-rmse:8.11738e+06
[9]	train-rmse:5.37088e+06	eval-rmse:7.91797e+06
[10]	train-rmse:5.16083e+06	eval-rmse:7.74709e+06
[11]	train-rmse:5.02754e+06	eval-rmse:7.69371e+06
[12]	train-rmse:4.93783e+06	eval-rmse:7.67085e+06
[13]	train-rmse:4.7933e+06	eval-rmse:7.65224e+06
[14]	train-rmse:4.70651e+06	eval-rmse:7.57702e+06
[15]	train-rmse:4.60675e+06	eval-rmse:7.55909e+06
[16]	train-rmse:4.53352e+06	eval-rmse:7.55255e+06
[17]	train-rmse:4.46646e+06	eval-rmse:7.53673e+06
[18]	train-rmse:4.41877e+06	eval-rmse:7.54778e+06
[19]	train-rmse:4.37953e+06	eval-rmse:7.51305e+06
[20]	train-rms

In [45]:
### Best score RMSE  =  7330625.00000     ( hyper-opt : 7192525.9042 )

### Case 3-1 : lightgbm -> very poor rmse = 41,972,989.9949

In [48]:
# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
#訓練用と検証用のindexを作成する
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# -----------------------------------
# lightgbmの実装
# -----------------------------------
import lightgbm as lgb
from sklearn.metrics import log_loss

# 特徴量と目的変数をlightgbmのデータ構造に変換する
lgb_train = lgb.Dataset(tr_x, tr_y)
lgb_eval = lgb.Dataset(va_x, va_y)

# ハイパーパラメータの設定
params = {'objective': 'regression', 'seed': 71, 'verbose': 0, 'metrics': 'rmse','max_depth' : 5}
# params = {'metric': 'rmse','max_depth' : 9} # Quote from web 
num_round = 100

# 学習の実行
# カテゴリ変数をパラメータで指定している
# バリデーションデータもモデルに渡し、学習の進行とともにスコアがどう変わるかモニタリングする
#categorical_features = ['product', 'medical_info_b2', 'medical_info_b3']
model = lgb.train(params, lgb_train, num_boost_round=num_round,
                  #categorical_feature=categorical_features,
                  valid_names=['train', 'valid'], valid_sets=[lgb_train, lgb_eval])


#model = lgb.train(params,
#                lgb_train,
#                valid_sets=lgb_eval,
#                num_boost_round=10000,
#                early_stopping_rounds=100,
#                verbose_eval=50)

# バリデーションデータでのスコアの確認
va_pred = model.predict(va_x)
score = np.sqrt(mean_squared_error(va_y, va_pred)) # score should be rmse 
print(f'rmse: {score:.4f}')

# 予測#
#pred = model.predict(test_x)


[1]	train's rmse: 1.99253e+07	valid's rmse: 2.08145e+07
[2]	train's rmse: 1.8397e+07	valid's rmse: 1.9357e+07
[3]	train's rmse: 1.70462e+07	valid's rmse: 1.81049e+07
[4]	train's rmse: 1.58082e+07	valid's rmse: 1.69065e+07
[5]	train's rmse: 1.47116e+07	valid's rmse: 1.57885e+07
[6]	train's rmse: 1.37086e+07	valid's rmse: 1.48291e+07
[7]	train's rmse: 1.28342e+07	valid's rmse: 1.39542e+07
[8]	train's rmse: 1.20143e+07	valid's rmse: 1.31459e+07
[9]	train's rmse: 1.13204e+07	valid's rmse: 1.25095e+07
[10]	train's rmse: 1.0709e+07	valid's rmse: 1.19486e+07
[11]	train's rmse: 1.01556e+07	valid's rmse: 1.14441e+07
[12]	train's rmse: 9.65342e+06	valid's rmse: 1.09688e+07
[13]	train's rmse: 9.18699e+06	valid's rmse: 1.05277e+07
[14]	train's rmse: 8.81142e+06	valid's rmse: 1.01851e+07
[15]	train's rmse: 8.46774e+06	valid's rmse: 9.84986e+06
[16]	train's rmse: 8.16015e+06	valid's rmse: 9.56064e+06
[17]	train's rmse: 7.89006e+06	valid's rmse: 9.31167e+06
[18]	train's rmse: 7.65666e+06	valid's rmse

# hyperopt and lightgbm

In [52]:
# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
#訓練用と検証用のindexを作成する
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# xgboostによる学習・予測を行うクラス
import xgboost as xgb


class Model:

    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params = {}
        else:
            self.params = params

    def fit(self, tr_x, tr_y, va_x, va_y):
        #　訓練データで学習を行う
        #params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
        params = {'objective':'reg:squarederror', 'silent': 1 , 'random_state': 71} #回帰の場合
        params.update(self.params) #updateメソッドで辞書型データに追加を行う。
        num_round = 50
        lgb_train = lgb.Dataset(tr_x, tr_y)
        lgb_eval = lgb.Dataset(va_x, va_y)
        watchlist = [(lgb_train, 'train'), (lgb_eval, 'eval')]
        self.model = xgb.train(params, lgb_train, num_round, evals=watchlist)
        
        model = lgb.train(params, lgb_train, num_boost_round=num_round,
                  #categorical_feature=categorical_features,
                  valid_names=['train', 'valid'], valid_sets=[lgb_train, lgb_eval])

    def predict(self, x):
        data = xgb.DMatrix(x)
        pred = self.model.predict(data)
        return pred


# -----------------------------------
# 探索するパラメータの空間の指定
# -----------------------------------
# hp.choiceでは、複数の選択肢から選ぶ
# hp.uniformでは、下限・上限を指定した一様分布から抽出する。引数は下限・上限
# hp.quniformでは、下限・上限を指定した一様分布のうち一定の間隔ごとの点から抽出する。引数は下限・上限・間隔
# hp.loguniformでは、下限・上限を指定した対数が一様分布に従う分布から抽出する。引数は下限・上限の対数をとった値

from hyperopt import hp

space = {
    'activation': hp.choice('activation', ['prelu', 'relu']),
    'dropout': hp.uniform('dropout', 0, 0.2),
    'units': hp.quniform('units', 32, 256, 32),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01)),
}



In [53]:
# -----------------------------------
# hyperoptを使ったパラメータ探索
# -----------------------------------
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss
from sklearn.metrics import mean_squared_error
import math


def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    #score = log_loss(va_y, va_pred) # modified 04-16 
    score = np.sqrt(mean_squared_error(va_y, va_pred)) # score should be rmse 
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}




In [54]:
# 探索するパラメータの空間を指定する
space = {
    'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'gamma': hp.quniform('gamma', 0, 0.4, 0.1),
}

# hyperoptによるパラメータ探索の実行
max_evals = 10
trials = Trials()
history = []
fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals)

# 記録した情報からパラメータとスコアを出力する
# （trialsからも情報が取得できるが、パラメータの取得がやや行いづらいため）
history = sorted(history, key=lambda tpl: tpl[1])

best = history[0]
print(f'best params:{best[0]}, score:{best[1]:.4f}')


  0%|                                                                           | 0/10 [00:00<?, ?trial/s, best loss=?]

job exception: invalid cache item: Dataset



  0%|                                                                           | 0/10 [00:00<?, ?trial/s, best loss=?]


TypeError: invalid cache item: Dataset

In [55]:
print(f'best params:{best[0]}, score:{best[1]:.4f}')

best params:{'gamma': 0.0, 'max_depth': 5, 'min_child_weight': 4.0}, score:7035681.1410
