Load the modules and data.

In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
import time

train = pd.read_csv("../input/train.csv")
test = pd.read_csv("../input/test.csv")

Apply simple extra features.

In [None]:

index = train.columns.values[2:202]
for df in [test, train]:
    df['sum'] = df[index].sum(axis=1)  
    df['min'] = df[index].min(axis=1)
    df['max'] = df[index].max(axis=1)
    df['mean'] = df[index].mean(axis=1)
    df['std'] = df[index].std(axis=1)
    df['skew'] = df[index].skew(axis=1)
    df['kurt'] = df[index].kurtosis(axis=1)
    df['med'] = df[index].median(axis=1)

features = [c for c in train.columns if c not in ['ID_code', 'target']]
labels = train['target']

Set the parameters for lgbm classifier.

In [None]:
params = {
    'bagging_freq': 5,
    'bagging_fraction': 0.335,
    'boost_from_average':'false',
    'boost': 'gbdt',
    'feature_fraction': 0.041,
    'learning_rate': 0.0083,
    'max_depth': -1,
    'metric':'auc',
    'min_data_in_leaf': 80,
    'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 13,
    'num_threads': 8,
    'tree_learner': 'serial',
    'objective': 'binary',
    'verbosity': 1}

Train the model on 15 folds.

In [None]:
folds = StratifiedKFold(n_splits=15, shuffle=False, random_state=42)
oof = np.zeros(len(train))
predictions = np.zeros(len(test))

print('Training LGB...')
start = time.time()
for fold, (train_idx, val_idx) in enumerate(folds.split(train.values, labels.values)):
    print("Fold: " + str((fold + 1)))
    trn_data = lgb.Dataset(train.iloc[train_idx][features], label=labels.iloc[train_idx])
    val_data = lgb.Dataset(train.iloc[val_idx][features], label=labels.iloc[val_idx])
    
    clf = lgb.train(params, trn_data, 1000000, valid_sets = [trn_data, val_data], verbose_eval=5000,
                    early_stopping_rounds = 4000)
    
    oof[val_idx] = clf.predict(train.iloc[val_idx][features], num_iteration=clf.best_iteration)
    predictions += clf.predict(test[features], num_iteration=clf.best_iteration) / folds.n_splits
print("CV score: " + str(roc_auc_score(labels, oof)))
end = time.time()
print("Training time: " + str(end-start) + ' seconds')

Submit the predictions.

In [None]:
sub = pd.DataFrame({"ID_code": test.ID_code.values})
sub["target"] = predictions
sub.to_csv('submission.csv',index=False)