####  Test hyperopt


### Case.1 : 　Classification Case: Quote from "Data Analysis Technic to win at  Kaggle"

In [41]:
#!pip install hyperopt

In [42]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数、test_xはテストデータ
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv('./sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
#test_x = pd.read_csv('./input/sample-data/test_preprocessed.csv')

# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# xgboostによる学習・予測を行うクラス
import xgboost as xgb


class Model:

    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params = {}
        else:
            self.params = params

    def fit(self, tr_x, tr_y, va_x, va_y):
        params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
        params.update(self.params)
        num_round = 50
        dtrain = xgb.DMatrix(tr_x, label=tr_y)
        dvalid = xgb.DMatrix(va_x, label=va_y)
        watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
        self.model = xgb.train(params, dtrain, num_round, evals=watchlist)

    def predict(self, x):
        data = xgb.DMatrix(x)
        pred = self.model.predict(data)
        return pred


# -----------------------------------
# 探索するパラメータの空間の指定
# -----------------------------------
# hp.choiceでは、複数の選択肢から選ぶ
# hp.uniformでは、下限・上限を指定した一様分布から抽出する。引数は下限・上限
# hp.quniformでは、下限・上限を指定した一様分布のうち一定の間隔ごとの点から抽出する。引数は下限・上限・間隔
# hp.loguniformでは、下限・上限を指定した対数が一様分布に従う分布から抽出する。引数は下限・上限の対数をとった値

from hyperopt import hp

space = {
    'activation': hp.choice('activation', ['prelu', 'relu']),
    'dropout': hp.uniform('dropout', 0, 0.2),
    'units': hp.quniform('units', 32, 256, 32),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01)),
}

# -----------------------------------
# hyperoptを使ったパラメータ探索
# -----------------------------------
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss


def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}


# 探索するパラメータの空間を指定する
space = {
    'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'gamma': hp.quniform('gamma', 0, 0.4, 0.1),
}

# hyperoptによるパラメータ探索の実行
max_evals = 10
trials = Trials()
history = []
fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals)

# 記録した情報からパラメータとスコアを出力する
# （trialsからも情報が取得できるが、パラメータの取得がやや行いづらいため）
history = sorted(history, key=lambda tpl: tpl[1])
best = history[0]
print(f'best params:{best[0]}, score:{best[1]:.4f}')


[0]	train-error:0.12107	eval-error:0.14720                                                                             

[1]	train-error:0.10800	eval-error:0.14160                                                                             

[2]	train-error:0.09747	eval-error:0.13840                                                                             

[3]	train-error:0.09480	eval-error:0.13640                                                                             

[4]	train-error:0.08987	eval-error:0.12960                                                                             

[5]	train-error:0.08573	eval-error:0.12960                                                                             

[6]	train-error:0.07827	eval-error:0.12360                                                                             

[7]	train-error:0.07587	eval-error:0.12240                                                                             

[8]	train-error:0.06880	eval-err

[16]	train-error:0.09173	eval-error:0.11240                                                                            

[17]	train-error:0.09053	eval-error:0.11400                                                                            

[18]	train-error:0.08653	eval-error:0.11360                                                                            

[19]	train-error:0.08413	eval-error:0.11000                                                                            

[20]	train-error:0.08227	eval-error:0.11080                                                                            

[21]	train-error:0.08173	eval-error:0.10760                                                                            

[22]	train-error:0.08013	eval-error:0.10800                                                                            

[23]	train-error:0.07840	eval-error:0.10760                                                                            

[24]	train-error:0.07627	eval-er

[32]	train-error:0.02720	eval-error:0.10040                                                                            

[33]	train-error:0.02733	eval-error:0.10040                                                                            

[34]	train-error:0.02560	eval-error:0.10120                                                                            

[35]	train-error:0.02493	eval-error:0.09960                                                                            

[36]	train-error:0.02333	eval-error:0.09880                                                                            

[37]	train-error:0.02307	eval-error:0.09760                                                                            

[38]	train-error:0.02120	eval-error:0.09840                                                                            

[39]	train-error:0.02147	eval-error:0.09920                                                                            

[40]	train-error:0.02040	eval-er

[48]	train-error:0.03080	eval-error:0.09760                                                                            

[49]	train-error:0.02987	eval-error:0.09240                                                                            

params: {'gamma': 0.2, 'max_depth': 5, 'min_child_weight': 4.0}, logloss: 0.2213                                       
[0]	train-error:0.11147	eval-error:0.14280                                                                             

[1]	train-error:0.09920	eval-error:0.13960                                                                             

[2]	train-error:0.08840	eval-error:0.13560                                                                             

[3]	train-error:0.08467	eval-error:0.13120                                                                             

[4]	train-error:0.07627	eval-error:0.12400                                                                             

[5]	train-error:0.06907	eval-erro

[13]	train-error:0.02613	eval-error:0.11440                                                                            

[14]	train-error:0.02147	eval-error:0.11440                                                                            

[15]	train-error:0.01933	eval-error:0.11760                                                                            

[16]	train-error:0.01800	eval-error:0.11480                                                                            

[17]	train-error:0.01573	eval-error:0.11000                                                                            

[18]	train-error:0.01453	eval-error:0.11000                                                                            

[19]	train-error:0.01173	eval-error:0.10720                                                                            

[20]	train-error:0.00933	eval-error:0.10600                                                                            

[21]	train-error:0.00840	eval-er

[29]	train-error:0.08920	eval-error:0.11400                                                                            

[30]	train-error:0.08760	eval-error:0.11240                                                                            

[31]	train-error:0.08613	eval-error:0.10880                                                                            

[32]	train-error:0.08400	eval-error:0.11160                                                                            

[33]	train-error:0.08320	eval-error:0.11120                                                                            

[34]	train-error:0.08173	eval-error:0.11160                                                                            

[35]	train-error:0.08227	eval-error:0.11040                                                                            

[36]	train-error:0.08227	eval-error:0.10920                                                                            

[37]	train-error:0.08107	eval-er

[45]	train-error:0.00840	eval-error:0.09720                                                                            

[46]	train-error:0.00747	eval-error:0.09800                                                                            

[47]	train-error:0.00560	eval-error:0.09560                                                                            

[48]	train-error:0.00560	eval-error:0.09520                                                                            

[49]	train-error:0.00507	eval-error:0.09520                                                                            

params: {'gamma': 0.30000000000000004, 'max_depth': 7, 'min_child_weight': 4.0}, logloss: 0.2190                       
[0]	train-error:0.13000	eval-error:0.15240                                                                             

[1]	train-error:0.11707	eval-error:0.14560                                                                             

[2]	train-error:0.11333	eval-erro

[10]	train-error:0.11147	eval-error:0.13200                                                                            

[11]	train-error:0.10507	eval-error:0.12760                                                                            

[12]	train-error:0.10307	eval-error:0.12480                                                                            

[13]	train-error:0.10147	eval-error:0.12120                                                                            

[14]	train-error:0.09413	eval-error:0.12080                                                                            

[15]	train-error:0.09160	eval-error:0.11840                                                                            

[16]	train-error:0.08947	eval-error:0.11600                                                                            

[17]	train-error:0.08853	eval-error:0.11200                                                                            

[18]	train-error:0.08373	eval-er

### Case2 : Regression / predict Tokyo-housing /　target is price:　Modified by Bashii 2020-04-16

In [43]:
# ---------------------------------
# データ等の準備
# ----------------------------------
import numpy as np
import pandas as pd

# train_xは学習データ、train_yは目的変数
# pandasのDataFrame, Seriesで保持します。（numpyのarrayで保持することもあります）

train = pd.read_csv("housing_tokyo_chuo_line_week3.csv")
train.head()


Unnamed: 0,uid,years,minutes,sqrm,distance,renovate,express,price
0,13101-1,34,3,35,2.983801,0,0,24000000
1,13101-2,37,4,20,2.983801,0,0,13000000
2,13101-4,34,3,30,2.983801,0,0,22000000
3,13101-5,35,1,70,2.983801,0,0,69000000
4,13101-6,34,3,25,2.983801,0,0,21000000


In [44]:
train_x = train.iloc[: ,1:-1 ]
train_y = train['price']

In [45]:
# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
#訓練用と検証用のindexを作成する
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

# xgboostによる学習・予測を行うクラス
import xgboost as xgb


class Model:

    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params = {}
        else:
            self.params = params

    def fit(self, tr_x, tr_y, va_x, va_y):
        #　訓練データで学習を行う
        #params = {'objective': 'binary:logistic', 'silent': 1, 'random_state': 71}
        params = {'objective':'reg:squarederror', 'silent': 1 , 'random_state': 71} #回帰の場合
        params.update(self.params) #updateメソッドで辞書型データに追加を行う。
        num_round = 50
        dtrain = xgb.DMatrix(tr_x, label=tr_y) #xgbのデータ型に変換
        dvalid = xgb.DMatrix(va_x, label=va_y)
        watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
        self.model = xgb.train(params, dtrain, num_round, evals=watchlist)

    def predict(self, x):
        data = xgb.DMatrix(x)
        pred = self.model.predict(data)
        return pred


# -----------------------------------
# 探索するパラメータの空間の指定
# -----------------------------------
# hp.choiceでは、複数の選択肢から選ぶ
# hp.uniformでは、下限・上限を指定した一様分布から抽出する。引数は下限・上限
# hp.quniformでは、下限・上限を指定した一様分布のうち一定の間隔ごとの点から抽出する。引数は下限・上限・間隔
# hp.loguniformでは、下限・上限を指定した対数が一様分布に従う分布から抽出する。引数は下限・上限の対数をとった値

from hyperopt import hp

space = {
    'activation': hp.choice('activation', ['prelu', 'relu']),
    'dropout': hp.uniform('dropout', 0, 0.2),
    'units': hp.quniform('units', 32, 256, 32),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01)),
}



In [46]:
# -----------------------------------
# hyperoptを使ったパラメータ探索
# -----------------------------------
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss
from sklearn.metrics import mean_squared_error
import math


def score(params):
    # パラメータを与えたときに最小化する評価指標を指定する
    # 具体的には、モデルにパラメータを指定して学習・予測させた場合のスコアを返すようにする

    # max_depthの型を整数型に修正する
    params['max_depth'] = int(params['max_depth'])

    # Modelクラスを定義しているものとする
    # Modelクラスは、fitで学習し、predictで予測値の確率を出力する
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred = model.predict(va_x)
    #score = log_loss(va_y, va_pred) # modified 04-16 
    score = np.sqrt(mean_squared_error(va_y, va_pred))　# score should be rmse 
    print(f'params: {params}, logloss: {score:.4f}')

    # 情報を記録しておく
    history.append((params, score))

    return {'loss': score, 'status': STATUS_OK}




### Seems OK.

In [52]:
# 探索するパラメータの空間を指定する
space = {
    'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'gamma': hp.quniform('gamma', 0, 0.4, 0.1),
}

# hyperoptによるパラメータ探索の実行
max_evals = 10
trials = Trials()
history = []
fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=max_evals)

# 記録した情報からパラメータとスコアを出力する
# （trialsからも情報が取得できるが、パラメータの取得がやや行いづらいため）
history = sorted(history, key=lambda tpl: tpl[1])

best = history[0]
print(f'best params:{best[0]}, score:{best[1]:.4f}')


[0]	train-rmse:29435498.00000	eval-rmse:30996432.00000                                                                 

[1]	train-rmse:21856338.00000	eval-rmse:23261902.00000                                                                 

[2]	train-rmse:16512384.00000	eval-rmse:17791948.00000                                                                 

[3]	train-rmse:12948883.00000	eval-rmse:14249404.00000                                                                 

[4]	train-rmse:10558537.00000	eval-rmse:11928664.00000                                                                 

[5]	train-rmse:9016975.00000	eval-rmse:10486821.00000                                                                  

[6]	train-rmse:7988780.50000	eval-rmse:9530399.00000                                                                   

[7]	train-rmse:7353711.50000	eval-rmse:8956440.00000                                                                   

[8]	train-rmse:6927239.50000	eva

[16]	train-rmse:5877326.00000	eval-rmse:7721957.50000                                                                  

[17]	train-rmse:5800356.00000	eval-rmse:7659926.00000                                                                  

[18]	train-rmse:5757075.50000	eval-rmse:7613188.50000                                                                  

[19]	train-rmse:5735687.50000	eval-rmse:7608182.50000                                                                  

[20]	train-rmse:5710833.00000	eval-rmse:7598942.00000                                                                  

[21]	train-rmse:5662755.50000	eval-rmse:7551708.00000                                                                  

[22]	train-rmse:5626047.00000	eval-rmse:7557417.50000                                                                  

[23]	train-rmse:5610692.50000	eval-rmse:7540442.00000                                                                  

[24]	train-rmse:5553174.50000	ev

[32]	train-rmse:5438668.00000	eval-rmse:7391925.00000                                                                  

[33]	train-rmse:5424165.50000	eval-rmse:7374377.50000                                                                  

[34]	train-rmse:5394249.00000	eval-rmse:7367628.50000                                                                  

[35]	train-rmse:5375711.00000	eval-rmse:7358249.00000                                                                  

[36]	train-rmse:5371854.50000	eval-rmse:7347002.50000                                                                  

[37]	train-rmse:5348478.50000	eval-rmse:7345444.00000                                                                  

[38]	train-rmse:5317074.00000	eval-rmse:7302764.50000                                                                  

[39]	train-rmse:5297084.00000	eval-rmse:7316342.00000                                                                  

[40]	train-rmse:5261023.00000	ev

[48]	train-rmse:4291274.50000	eval-rmse:7179207.50000                                                                  

[49]	train-rmse:4271636.50000	eval-rmse:7192526.00000                                                                  

params: {'gamma': 0.1, 'max_depth': 5, 'min_child_weight': 2.0}, logloss: 7192525.9042                                 
[0]	train-rmse:29197456.00000	eval-rmse:30748972.00000                                                                 

[1]	train-rmse:21326804.00000	eval-rmse:22743830.00000                                                                 

[2]	train-rmse:15828735.00000	eval-rmse:17301856.00000                                                                 

[3]	train-rmse:12045661.00000	eval-rmse:13631660.00000                                                                 

[4]	train-rmse:9465156.00000	eval-rmse:11343278.00000                                                                  

[5]	train-rmse:7760337.00000	eval

[13]	train-rmse:3431083.50000	eval-rmse:7568931.00000                                                                  

[14]	train-rmse:3339463.50000	eval-rmse:7567750.50000                                                                  

[15]	train-rmse:3266171.50000	eval-rmse:7564847.50000                                                                  

[16]	train-rmse:3173496.25000	eval-rmse:7536088.50000                                                                  

[17]	train-rmse:3101754.75000	eval-rmse:7525839.00000                                                                  

[18]	train-rmse:3070456.00000	eval-rmse:7520495.50000                                                                  

[19]	train-rmse:3005150.00000	eval-rmse:7529858.00000                                                                  

[20]	train-rmse:2963856.50000	eval-rmse:7518665.50000                                                                  

[21]	train-rmse:2948939.75000	ev

[29]	train-rmse:4754988.00000	eval-rmse:7245392.50000                                                                  

[30]	train-rmse:4687356.00000	eval-rmse:7216067.00000                                                                  

[31]	train-rmse:4660442.00000	eval-rmse:7183057.50000                                                                  

[32]	train-rmse:4643130.50000	eval-rmse:7188991.50000                                                                  

[33]	train-rmse:4639504.00000	eval-rmse:7184203.50000                                                                  

[34]	train-rmse:4625270.00000	eval-rmse:7140429.00000                                                                  

[35]	train-rmse:4596422.50000	eval-rmse:7142141.00000                                                                  

[36]	train-rmse:4587303.50000	eval-rmse:7141541.50000                                                                  

[37]	train-rmse:4567300.00000	ev

[45]	train-rmse:4204068.00000	eval-rmse:7257617.50000                                                                  

[46]	train-rmse:4182157.25000	eval-rmse:7236545.00000                                                                  

[47]	train-rmse:4158484.75000	eval-rmse:7212256.00000                                                                  

[48]	train-rmse:4129728.00000	eval-rmse:7210117.50000                                                                  

[49]	train-rmse:4107439.75000	eval-rmse:7204438.50000                                                                  

params: {'gamma': 0.2, 'max_depth': 5, 'min_child_weight': 1.0}, logloss: 7204438.6248                                 
[0]	train-rmse:29181452.00000	eval-rmse:30727258.00000                                                                 

[1]	train-rmse:21288358.00000	eval-rmse:22730372.00000                                                                 

[2]	train-rmse:15769544.00000	eva

[10]	train-rmse:4541307.50000	eval-rmse:7611319.00000                                                                  

[11]	train-rmse:4371276.50000	eval-rmse:7534963.50000                                                                  

[12]	train-rmse:4230074.00000	eval-rmse:7506927.50000                                                                  

[13]	train-rmse:4081998.50000	eval-rmse:7500632.00000                                                                  

[14]	train-rmse:3983694.00000	eval-rmse:7490631.00000                                                                  

[15]	train-rmse:3915417.00000	eval-rmse:7466450.00000                                                                  

[16]	train-rmse:3845254.00000	eval-rmse:7461328.00000                                                                  

[17]	train-rmse:3790541.00000	eval-rmse:7433821.00000                                                                  

[18]	train-rmse:3771323.25000	ev

In [53]:
print(f'best params:{best[0]}, score:{best[1]:.4f}')

best params:{'gamma': 0.1, 'max_depth': 5, 'min_child_weight': 2.0}, score:7192525.9042


### Another Trial

In [54]:
# 学習データを学習データとバリデーションデータに分ける
from sklearn.model_selection import KFold

kf = KFold(n_splits=4, shuffle=True, random_state=71)
#訓練用と検証用のindexを作成する
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

dtrain = xgb.DMatrix(tr_x, label=tr_y) #xgbのデータ型に変換
dvalid = xgb.DMatrix(va_x, label=va_y)


In [55]:
params = {'objective':'reg:squarederror', 'silent':1 , 'random_state':71}
num_round =50
watchlist = [(dtrain,'train'),(dvalid,'eval')]

In [56]:
model = xgb.train(params, dtrain,num_round, evals=watchlist)

[0]	train-rmse:29231778.00000	eval-rmse:30792920.00000
[1]	train-rmse:21401708.00000	eval-rmse:22767962.00000
[2]	train-rmse:15945462.00000	eval-rmse:17425754.00000
[3]	train-rmse:12172091.00000	eval-rmse:13678410.00000
[4]	train-rmse:9598904.00000	eval-rmse:11204296.00000
[5]	train-rmse:7945833.50000	eval-rmse:9780067.00000
[6]	train-rmse:6860563.00000	eval-rmse:8975791.00000
[7]	train-rmse:6111426.00000	eval-rmse:8413604.00000
[8]	train-rmse:5663827.50000	eval-rmse:8117376.00000
[9]	train-rmse:5370885.00000	eval-rmse:7917968.00000
[10]	train-rmse:5160830.50000	eval-rmse:7747086.00000
[11]	train-rmse:5027537.00000	eval-rmse:7693710.00000
[12]	train-rmse:4937834.00000	eval-rmse:7670854.50000
[13]	train-rmse:4793298.00000	eval-rmse:7652235.50000
[14]	train-rmse:4706511.50000	eval-rmse:7577021.00000
[15]	train-rmse:4606752.00000	eval-rmse:7559090.00000
[16]	train-rmse:4533517.00000	eval-rmse:7552550.50000
[17]	train-rmse:4466463.00000	eval-rmse:7536730.50000
[18]	train-rmse:4418774.00000