#### Hyperopt hyperparameters tuning

In [None]:
lgb_params = {
    'objective': 'binary',
    'metric': 'auc',
    
    'n_estimators': 100,
    'learning_rate': 0.01, 
    
#     'feature_fraction': 0.75,
#     'bagging_fraction': 0.75,
#     'bagging_freq': 1,
    
    'min_gain_to_split': 1e-3,
    'reg_lambda': 1e-1,
   
#     'max_depth': 8,
    'num_leaves': 2**5,
    'min_data_in_leaf': 2**5, 
    
    'is_unbalance': True,
    'importance_type': 'gain',
    
    'nthread': -1,
    'bagging_seed': 42, 
}

In [None]:
from hyperopt import fmin, tpe, hp

alpha = 0.5

space = {
    'n_estimators': hp.choice('n_estimators', list(range(30, 500, 10))),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(0.5)), 
    'max_depth': hp.quniform('max_depth', 3, 10, 1), 
    'colsample_bytree': hp.quniform('colsample_bytree', 0.8, 1, 0.01), 
    'min_child_samples': hp.quniform('min_child_samples', 2, 300, 1), 
    'reg_lambda': hp.uniform('reg_lambda', 0.0, 5),
    'reg_alpha': hp.uniform('reg_alpha', 0.0, 5),
}

def objective(params):
    
    params = {
        'objective': 'binary',
        'metric': 'auc',
        'is_unbalance': True,
        'importance_type': 'gain',

        'nthread': -1,
        'bagging_seed': 42, 
        
        'n_estimators': 100,
        'learning_rate': (params['learning_rate']),
        'num_leaves': 2**int(params['max_depth']),
        'max_depth': int(params['max_depth']),
        'colsample_bytree': (params['colsample_bytree']),
        'min_child_samples': int(params['min_child_samples']) ,
        'reg_lambda': params['reg_lambda'],
        'reg_alpha': params['reg_alpha'],
    }
 
    model_lgb = lgb.LGBMClassifier(**lgb_params)

    model_lgb.fit(
        X_train,
        y_train, 
        eval_metric = "auc",
        # verbose = -1,
        # early_stopping_rounds = 10,
        eval_set = [(X_train, y_train), (X_test, y_test)]
    )
 
    forecast_lgb_test = model_lgb.predict_proba(X_test)[:,1]
    score_test = roc_auc_score(y_test, forecast_lgb_test)
    
    if alpha > 0:
        forecast_lgb_train = model_lgb.predict_proba(X_train)[:,1]
        score_train = roc_auc_score(y_train, forecast_lgb_train)
        
        score_total = score_test - alpha * (score_train - score_test)
        
    else:
        score_total = score_test
    
    print("score {:.5f}; params {}".format(score_test, params))
    return -score_total

best = fmin(
    fn = objective,
    space = space,
    algo = tpe.suggest,
    max_evals = 50
)

In [None]:
best

In [None]:
clf_lgb_best = lgb.LGBMClassifier(**best)
clf_lgb_best

# LGBMClassifier(colsample_bytree=0.93, learning_rate=0.10366005116679088,
#                max_depth=7.0, min_child_samples=98.0, n_estimators=12,
#                reg_alpha=4.396672045765595, reg_lambda=2.0620178916994973)

In [None]:
clf_lgb_best = lgb.LGBMClassifier(colsample_bytree=0.93, learning_rate=0.10366005116679088,
               max_depth=7, min_child_samples=98, n_estimators=12,
               reg_alpha=4.396672045765595, reg_lambda=2.0620178916994973)

clf_lgb_best.fit(X_train, y_train)

In [None]:
kfold = model_selection.KFold(n_splits = 4)

AUC_cv_best = cross_val_score(clf_lgb_best , X_not_target, y, cv = kfold, scoring = 'roc_auc', n_jobs = -1)
AUC_cv_best