## Lightgbm template

In [None]:
from hyperopt import hp, tpe
from hyperopt.fmin import fmin
import lightgbm as lgbm
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score, StratifiedKFold

In [None]:
def gini(truth, predictions):
    g = np.asarray(np.c_[truth, predictions, np.arange(len(truth)) ], dtype=np.float)
    g = g[np.lexsort((g[:,2], -1*g[:,1]))]
    gs = g[:,0].cumsum().sum() / g[:,0].sum()
    gs -= (len(truth) + 1) / 2.
    return gs / len(truth)

In [None]:
def gini_lgb(truth, predictions):
    score = gini(truth, predictions) / gini(truth, truth)
    return 'gini', score, True

In [None]:
def gini_sklearn(truth, predictions):
    return gini(truth, predictions) / gini(truth, truth)

gini_scorer = make_scorer(gini_sklearn, greater_is_better=True, needs_proba=True)

In [None]:
def objective(params):
    params = {
        'num_leaves': int(params['num_leaves']),
        'colsample_bytree': '{:.3f}'.format(params['colsample_bytree']),
    }
    
    clf = lgbm.LGBMClassifier(
        n_estimators=500,
        learning_rate=0.01,
        **params
    )
    
    score = cross_val_score(clf, X, Y, scoring=gini_scorer, cv=StratifiedKFold()).mean()
    print("Gini {:.3f} params {}".format(score, params))
    return score

space = {
    'num_leaves': hp.quniform('num_leaves', 8, 128, 2),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.3, 1.0),
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)

In [None]:
print("Hyperopt estimated optimum {}".format(best))

In [None]:
lgbm_model = lgbm.LGBMClassifier(
    n_estimators=10000,
    learning_rate=0.1,
    num_leaves=14,
    colsample_bytree=0.4668461475067747
)

In [None]:
# fit model to training data
lgbm_model.fit(X_train, y_train)

In [None]:
# make predictions for test data
y_pred = lgbm_model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [None]:
# Find optimal threshold

#Look in list of 50 values between 0.35 and 0.55.
thresholds=(np.linspace(0.35,0.55,50))
auc={}
acc={}
# See the score for the predictions under different trigger thresholds
for t in thresholds:
    predictions=lgbm_model.predict_proba(X_test)[:,1]>t
    auc["Threshold:",t]="Score:",roc_auc_score(y_test, predictions)
    acc["Threshold:",t] = "Score:",accuracy_score(y_test, predictions)

In [None]:
print("Best AUC result:")
print(max(auc.items(), key=lambda k: k[1]))

print("Best accuracy result:")
print(max(acc.items(), key=lambda k: k[1]))