In [1]:
import numpy as np 
import pandas as pd

In [2]:
train = pd.read_csv('input/sample-data/train_preprocessed.csv')
train_x = train.drop(['target'], axis=1)
train_y = train['target']
test_x = pd.read_csv('input/sample-data/test_preprocessed.csv')

In [3]:
from sklearn.model_selection import KFold

kf =KFold(n_splits=4, shuffle= True, random_state=71)

In [4]:
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x= train_x.iloc[tr_idx], train_x.iloc[va_idx]
tr_y, va_y= train_y.iloc[tr_idx], train_y.iloc[va_idx]

In [5]:
import xgboost as xgb

In [6]:
class Model:
    def __init__(self, params=None):
        self.model = None
        if params is None:
            self.params={}
        else:
            self.params=params
    
    def fit(self, tr_x, tr_y, va_x, va_y):
        params= {
            'objective':'binary:logistic', 
            'verbosity':0, 
            'random_state':71
        }
        params.update(self.params)
        num_round=10
        dtrain= xgb.DMatrix(tr_x, label = tr_y)
        dvalid = xgb.DMatrix(va_x, label = va_y)
        watchlist=[(dtrain, 'train'), (dvalid, 'eval')]
        self.model = xgb.train(params, dtrain, num_round, evals=watchlist)
    
    def predict(self, x):
        data= xgb.DMatrix(x)
        pred= self.model.predict(data)
        return pred

In [7]:
from hyperopt import hp

In [9]:
space= {
    'activation':hp.choice('activation',['prelu','relu']) ,#(label='activation', option=['prelu', 'relu'])
    'dropout':hp.uniform('dropout', 0.2),
    'units':hp.quniform('units', 32,256,32),
    'learning_rate':hp.loguniform('learning_rate', np.log(0.00001), np.log(0.01))
}

In [10]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import log_loss

In [12]:
def score(params):
    params['max_depth']=int(params['max_depth'])
    
    model = Model(params)
    model.fit(tr_x, tr_y, va_x, va_y)
    va_pred= model.predict(va_x)
    score = log_loss(va_y, va_pred)
    print(f'params:{params}, logloss:{score:.4f}')
    
    history.append((params, score))
    return {'loss':score, 'status':STATUS_OK}


In [14]:
#탐색할 매개변수 공간 지정
space= {
    'min_child_weight': hp.quniform('min_child_weight', 1,5,1),
    'max_depth': hp.quniform('max_depth', 3,9,1),
    'gamma':hp.quniform('gamma', 0, 0.4, 0.1)
}

In [15]:
#hyperopt에 의한 매개변수 탐색 실행 
max_evals=10
trials= Trials()
history= []
fmin(score, space, algo=tpe.suggest, trials= trials, max_evals= max_evals)

[0]	train-logloss:0.55704	eval-logloss:0.56275        

[1]	train-logloss:0.47971	eval-logloss:0.48930        

[2]	train-logloss:0.43033	eval-logloss:0.44067        

[3]	train-logloss:0.39670	eval-logloss:0.40974        

[4]	train-logloss:0.36958	eval-logloss:0.38426        

[5]	train-logloss:0.35162	eval-logloss:0.36757        

[6]	train-logloss:0.33714	eval-logloss:0.35444        

[7]	train-logloss:0.32096	eval-logloss:0.34284        

[8]	train-logloss:0.30766	eval-logloss:0.33135        

[9]	train-logloss:0.29785	eval-logloss:0.32446        

params:{'gamma': 0.4, 'max_depth': 4, 'min_child_weight': 3.0}, logloss:0.3245
[0]	train-logloss:0.53467	eval-logloss:0.54419                                  

[1]	train-logloss:0.44254	eval-logloss:0.46497                                  

[2]	train-logloss:0.38290	eval-logloss:0.41454                                  

[3]	train-logloss:0.33583	eval-logloss:0.37390                                  

[4]	train-logloss:0.30268	eval-lo

[3]	train-logloss:0.35645	eval-logloss:0.38633                                   

[4]	train-logloss:0.32557	eval-logloss:0.36268                                   

[5]	train-logloss:0.30369	eval-logloss:0.34502                                   

[6]	train-logloss:0.28432	eval-logloss:0.33176                                   

[7]	train-logloss:0.26706	eval-logloss:0.32074                                   

[8]	train-logloss:0.25570	eval-logloss:0.31406                                   

[9]	train-logloss:0.24234	eval-logloss:0.30401                                   

params:{'gamma': 0.2, 'max_depth': 6, 'min_child_weight': 3.0}, logloss:0.3040   
100%|██████████| 10/10 [00:01<00:00,  9.39trial/s, best loss: 0.27979850967898967]


{'gamma': 0.1, 'max_depth': 8.0, 'min_child_weight': 1.0}

In [18]:
#기록한 정보에서 매개변수와 점수를 출력
history= sorted(history, key= lambda tpl: tpl[1])
best = history[0]
print(f'best_params:{best[0]}, score:{best[1]:.4f}')

best_params:{'gamma': 0.1, 'max_depth': 8, 'min_child_weight': 1.0}, score:0.2798
