In [1]:
import gc
import os
import logging
import datetime
import warnings
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import StratifiedKFold, train_test_split

warnings.filterwarnings('ignore')

In [2]:
#logger
def get_logger():
    FORMAT = '[%(levelname)s]%(asctime)s:%(name)s:%(message)s'
    logging.basicConfig(format=FORMAT)
    logger = logging.getLogger('main')
    logger.setLevel(logging.DEBUG)
    return logger

In [3]:
logger = get_logger()

In [4]:
def read_data(nrows=None):
    logger.info('Input data')
    train_df = pd.read_csv('./input/train.csv',nrows=nrows)
    test_df = pd.read_csv('./input/test.csv')
    return train_df, test_df

In [5]:
def process_data(train_df, test_df):
    logger.info('Features engineering')
    idx = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    enginering_feats = [('var_26','var_44'),('var_44','var_123'),('var_44','var_155')]
    
    for df in [test_df, train_df]:
        #for feat in idx:
        #    df['r2_'+feat] = np.round(df[feat], 2)
        for fe_id, fe in enumerate(enginering_feats):
            # Magic Feature Enginering
            df['%s_plus_%s'%fe] = df[fe[0]]+df[fe[1]]
            df['%s_minus_%s'%fe] = df[fe[1]]-df[fe[0]]
            df.drop(list(fe), axis=1)
        #df['sum'] = df[idx].sum(axis=1)  
        #df['min'] = df[idx].min(axis=1)
        #df['max'] = df[idx].max(axis=1)
        #df['mean'] = df[idx].mean(axis=1)
        #df['std'] = df[idx].std(axis=1)
        #df['skew'] = df[idx].skew(axis=1)
        #df['kurt'] = df[idx].kurtosis(axis=1)
        #df['med'] = df[idx].median(axis=1)
    print('Train and test shape:',train_df.shape, test_df.shape)
    return train_df, test_df

In [12]:
def run_model_1(train_df, test_df):
    logger.info('Prepare the model')
    features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    target = train_df['target']
    logger.info('Run model')
    param = {
        'bagging_freq': 5,
        'bagging_fraction': 0.38,
        'boost_from_average':'false',
        'boost': 'gbdt',
        'feature_fraction': 0.045,
        'learning_rate': 0.0095,
        'max_depth': -1,  
        'metric':'auc',
        'min_data_in_leaf': 80,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 13,
        'num_threads': 8,
        'tree_learner': 'serial',
        'objective': 'binary', 
        'verbosity': 1,
    }
    num_round = 1000000
    folds = StratifiedKFold(n_splits=20, shuffle=False, random_state=44000)
    oof = np.zeros(len(train_df))
    predictions = np.zeros(len(test_df))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
        print("Fold {}".format(fold_))
        trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
        val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])
        clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 4000)
        oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
        predictions += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits
    score = roc_auc_score(target, oof)
    print("CV score: {:<8.5f}".format(score))
    return predictions, oof

def run_model_2(train_df, test_df):
    logger.info('Prepare the model')
    features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    target = train_df['target']
    logger.info('Run model')
    param = {
        'bagging_freq': 5,
        'bagging_fraction': 0.38,
        'boost_from_average':'false',
        'boost': 'gbdt',
        'feature_fraction': 0.045,
        'learning_rate': 0.01,
        'max_depth': -1,  
        'metric':'auc',
        'min_data_in_leaf': 80,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 13,
        'num_threads': 8,
        'tree_learner': 'serial',
        'objective': 'binary', 
        'verbosity': 1
    }
    num_round = 1000000
    folds = StratifiedKFold(n_splits=20, shuffle=False, random_state=44000)
    oof = np.zeros(len(train_df))
    predictions = np.zeros(len(test_df))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
        print("Fold {}".format(fold_))
        trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
        val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])
        clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 4000)
        oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
        predictions += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits
    score = roc_auc_score(target, oof)
    print("CV score: {:<8.5f}".format(score))
    return predictions, oof


def run_model_3(train_df, test_df):
    logger.info('Prepare the model')
    features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    target = train_df['target']
    logger.info('Run model')
    param = {
        'bagging_freq': 5,
        'bagging_fraction': 0.4,
        'boost_from_average':'false',
        'boost': 'gbdt',
        'feature_fraction': 0.05,
        'learning_rate': 0.01,
        'max_depth': -1,  
        'metric':'auc',
        'min_data_in_leaf': 80,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 13,
        'num_threads': 8,
        'tree_learner': 'serial',
        'objective': 'binary', 
        'verbosity': 1
    }
    num_round = 1000000
    folds = StratifiedKFold(n_splits=20, shuffle=False, random_state=44000)
    oof = np.zeros(len(train_df))
    predictions = np.zeros(len(test_df))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
        print("Fold {}".format(fold_))
        trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
        val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])
        clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 4000)
        oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
        predictions += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits
    score = roc_auc_score(target, oof)
    print("CV score: {:<8.5f}".format(score))
    return predictions, oof

In [7]:
def submit(test_df, predictions, score, name='model_1'):
    logger.info('Prepare submission')
    sub = pd.DataFrame({"ID_code": test_df.ID_code.values})
    sub["target"] = predictions
    sub.to_csv("submission_{}_{:<8.5f}.csv".format(name, score), index=False)

In [14]:
def read_process_train_submit(nrows=None):
    train_df, test_df = read_data(nrows)
    #train_df, test_df = process_data(train_df, test_df)
    predictions, score = run_model_1(train_df, test_df)
    submit(test_df, predictions, score)
    predictions_1, score = run_model_2(train_df, test_df)
    submit(test_df, predictions_1, score, 'model_2')

In [11]:
train_df, test_df = read_data()
train_df, test_df = process_data(train_df, test_df)
predictions, oof = run_model_1(train_df, test_df)
#submit(test_df, predictions, score)

[INFO]2019-03-12 16:14:10,728:main:Input data
[INFO]2019-03-12 16:14:24,525:main:Features engineering
[INFO]2019-03-12 16:14:27,842:main:Prepare the model
[INFO]2019-03-12 16:14:27,843:main:Run model


Train and test shape: (200000, 208) (200000, 207)
Fold 0
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.897457	valid_1's auc: 0.877774
[2000]	training's auc: 0.909275	valid_1's auc: 0.884993
[3000]	training's auc: 0.916672	valid_1's auc: 0.888644
[4000]	training's auc: 0.922195	valid_1's auc: 0.891044
[5000]	training's auc: 0.926906	valid_1's auc: 0.892696
[6000]	training's auc: 0.930939	valid_1's auc: 0.89354
[7000]	training's auc: 0.934578	valid_1's auc: 0.893961
[8000]	training's auc: 0.937972	valid_1's auc: 0.894476
[9000]	training's auc: 0.941233	valid_1's auc: 0.894671
[10000]	training's auc: 0.944321	valid_1's auc: 0.894864
[11000]	training's auc: 0.94736	valid_1's auc: 0.895028
[12000]	training's auc: 0.950202	valid_1's auc: 0.894941
[13000]	training's auc: 0.953018	valid_1's auc: 0.895066
[14000]	training's auc: 0.955653	valid_1's auc: 0.894973
Early stopping, best iteration is:
[10815]	training's auc: 0.94682	valid_1's auc: 0.895129
F

[5000]	training's auc: 0.926698	valid_1's auc: 0.895648
[6000]	training's auc: 0.930711	valid_1's auc: 0.896555
[7000]	training's auc: 0.934387	valid_1's auc: 0.896983
[8000]	training's auc: 0.937799	valid_1's auc: 0.897466
[9000]	training's auc: 0.941056	valid_1's auc: 0.897781
[10000]	training's auc: 0.944137	valid_1's auc: 0.897972
[11000]	training's auc: 0.94718	valid_1's auc: 0.897882
[12000]	training's auc: 0.950062	valid_1's auc: 0.89794
[13000]	training's auc: 0.952831	valid_1's auc: 0.897725
Early stopping, best iteration is:
[9627]	training's auc: 0.943002	valid_1's auc: 0.898072
Fold 9
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.896468	valid_1's auc: 0.885247
[2000]	training's auc: 0.908654	valid_1's auc: 0.894126
[3000]	training's auc: 0.916246	valid_1's auc: 0.898604
[4000]	training's auc: 0.921946	valid_1's auc: 0.900179
[5000]	training's auc: 0.926619	valid_1's auc: 0.901616
[6000]	training's auc: 0.93063	valid_1's auc: 0.9025

Fold 17
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.896201	valid_1's auc: 0.888203
[2000]	training's auc: 0.908457	valid_1's auc: 0.898285
[3000]	training's auc: 0.916111	valid_1's auc: 0.903429
[4000]	training's auc: 0.921738	valid_1's auc: 0.906498
[5000]	training's auc: 0.926412	valid_1's auc: 0.90785
[6000]	training's auc: 0.930373	valid_1's auc: 0.908661
[7000]	training's auc: 0.934004	valid_1's auc: 0.909119
[8000]	training's auc: 0.937445	valid_1's auc: 0.909307
[9000]	training's auc: 0.940752	valid_1's auc: 0.90928
[10000]	training's auc: 0.943875	valid_1's auc: 0.909524
[11000]	training's auc: 0.94693	valid_1's auc: 0.909755
[12000]	training's auc: 0.949831	valid_1's auc: 0.909782
[13000]	training's auc: 0.952642	valid_1's auc: 0.909675
[14000]	training's auc: 0.955299	valid_1's auc: 0.909671
[15000]	training's auc: 0.957863	valid_1's auc: 0.90943
[16000]	training's auc: 0.960378	valid_1's auc: 0.909467
Early stopping, best iteratio

In [None]:
predictions_1, oof_1 = run_model_2(train_df, test_df)
#submit(test_df, predictions_1, score, 'model_2')

[INFO]2019-03-12 18:13:10,531:main:Prepare the model
[INFO]2019-03-12 18:13:10,533:main:Run model


Fold 0
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.898205	valid_1's auc: 0.878127
[2000]	training's auc: 0.910071	valid_1's auc: 0.88537
[3000]	training's auc: 0.917568	valid_1's auc: 0.889241
[4000]	training's auc: 0.923172	valid_1's auc: 0.891694
[5000]	training's auc: 0.927947	valid_1's auc: 0.893369
[6000]	training's auc: 0.932077	valid_1's auc: 0.894162
[7000]	training's auc: 0.935832	valid_1's auc: 0.894606
[8000]	training's auc: 0.939325	valid_1's auc: 0.895001
[9000]	training's auc: 0.942695	valid_1's auc: 0.894997
[10000]	training's auc: 0.945882	valid_1's auc: 0.89505
[11000]	training's auc: 0.948998	valid_1's auc: 0.89501
[12000]	training's auc: 0.951954	valid_1's auc: 0.894939
[13000]	training's auc: 0.954831	valid_1's auc: 0.895112
[14000]	training's auc: 0.957551	valid_1's auc: 0.894951
[15000]	training's auc: 0.960231	valid_1's auc: 0.894753
[16000]	training's auc: 0.962761	valid_1's auc: 0.894664
Early stopping, best iteratio

[1000]	training's auc: 0.897566	valid_1's auc: 0.876537
[2000]	training's auc: 0.90972	valid_1's auc: 0.886766
[3000]	training's auc: 0.917342	valid_1's auc: 0.892249
[4000]	training's auc: 0.923048	valid_1's auc: 0.89453
[5000]	training's auc: 0.927691	valid_1's auc: 0.895717
[6000]	training's auc: 0.931794	valid_1's auc: 0.896325
[7000]	training's auc: 0.93559	valid_1's auc: 0.896648
[8000]	training's auc: 0.939117	valid_1's auc: 0.896924
[9000]	training's auc: 0.942504	valid_1's auc: 0.897167
[10000]	training's auc: 0.945663	valid_1's auc: 0.897253
[11000]	training's auc: 0.948786	valid_1's auc: 0.897089
[12000]	training's auc: 0.951767	valid_1's auc: 0.897099
[13000]	training's auc: 0.954606	valid_1's auc: 0.896829
Early stopping, best iteration is:
[9521]	training's auc: 0.944173	valid_1's auc: 0.897434
Fold 9
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.897272	valid_1's auc: 0.885732
[2000]	training's auc: 0.909573	valid_1's auc: 0.8945

[2000]	training's auc: 0.90935	valid_1's auc: 0.898861
[3000]	training's auc: 0.917083	valid_1's auc: 0.903875
[4000]	training's auc: 0.922732	valid_1's auc: 0.906839
[5000]	training's auc: 0.927516	valid_1's auc: 0.908246
[6000]	training's auc: 0.93157	valid_1's auc: 0.908981
[7000]	training's auc: 0.935332	valid_1's auc: 0.909475
[8000]	training's auc: 0.938869	valid_1's auc: 0.909707
[9000]	training's auc: 0.942299	valid_1's auc: 0.909737
[10000]	training's auc: 0.945531	valid_1's auc: 0.909833
[11000]	training's auc: 0.948672	valid_1's auc: 0.910048
[12000]	training's auc: 0.951625	valid_1's auc: 0.909927
[13000]	training's auc: 0.95453	valid_1's auc: 0.909755
[14000]	training's auc: 0.957261	valid_1's auc: 0.909648
[15000]	training's auc: 0.959882	valid_1's auc: 0.90931
Early stopping, best iteration is:
[11072]	training's auc: 0.948875	valid_1's auc: 0.910103
Fold 18
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.896914	valid_1's auc: 0.8

In [25]:
predictions_2, oof_2 = run_model_3(train_df, test_df)
#submit(test_df, predictions_1, score, 'model_3')

[INFO]2019-03-12 12:07:41,010:main:Prepare the model
[INFO]2019-03-12 12:07:41,011:main:Run model


Fold 0
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.898963	valid_1's auc: 0.879443
[2000]	training's auc: 0.910857	valid_1's auc: 0.888608
[3000]	training's auc: 0.918607	valid_1's auc: 0.893338
[4000]	training's auc: 0.924418	valid_1's auc: 0.896423
[5000]	training's auc: 0.9292	valid_1's auc: 0.898378
[6000]	training's auc: 0.933456	valid_1's auc: 0.89912
[7000]	training's auc: 0.937393	valid_1's auc: 0.899814
[8000]	training's auc: 0.9411	valid_1's auc: 0.900267
[9000]	training's auc: 0.944591	valid_1's auc: 0.900735
[10000]	training's auc: 0.94786	valid_1's auc: 0.900824
[11000]	training's auc: 0.951008	valid_1's auc: 0.90081
[12000]	training's auc: 0.954086	valid_1's auc: 0.900658
[13000]	training's auc: 0.95704	valid_1's auc: 0.900667
[14000]	training's auc: 0.95991	valid_1's auc: 0.900572
Early stopping, best iteration is:
[10487]	training's auc: 0.949417	valid_1's auc: 0.900918
Fold 1
Training until validation scores don't improve for

[7000]	training's auc: 0.937077	valid_1's auc: 0.903839
[8000]	training's auc: 0.940751	valid_1's auc: 0.904143
[9000]	training's auc: 0.944275	valid_1's auc: 0.904393
[10000]	training's auc: 0.947633	valid_1's auc: 0.904389
[11000]	training's auc: 0.950799	valid_1's auc: 0.904232
[12000]	training's auc: 0.953871	valid_1's auc: 0.903955
[13000]	training's auc: 0.956821	valid_1's auc: 0.903623
Early stopping, best iteration is:
[9613]	training's auc: 0.94636	valid_1's auc: 0.904541
Fold 10
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.898088	valid_1's auc: 0.887067
[2000]	training's auc: 0.910235	valid_1's auc: 0.895774
[3000]	training's auc: 0.917874	valid_1's auc: 0.900864
[4000]	training's auc: 0.923744	valid_1's auc: 0.903518
[5000]	training's auc: 0.928703	valid_1's auc: 0.905242
[6000]	training's auc: 0.932989	valid_1's auc: 0.905917
[7000]	training's auc: 0.936934	valid_1's auc: 0.906562
[8000]	training's auc: 0.940591	valid_1's auc: 0.9

In [42]:
roc_auc_score(train_df.target.values, (oof_1+oof+oof_2)/3)

0.9008632381434583

In [55]:
submit(test_df, predictions, 0.90047, 'model_1')
submit(test_df, predictions_1, 0.90031, 'model_2')
submit(test_df, predictions_2, 0.90058, 'model_3')
submit(test_df, (predictions+predictions_1+predictions_2)/3, 0.90086, 'average_3_models')
submit(test_df, 0.42*predictions_1 + 0.58 * predictions_2, 0.90089, 'average(0.42)_2_3_models')

[INFO]2019-03-12 14:29:07,302:main:Prepare submission
[INFO]2019-03-12 14:29:07,831:main:Prepare submission
[INFO]2019-03-12 14:29:08,359:main:Prepare submission
[INFO]2019-03-12 14:29:08,938:main:Prepare submission
[INFO]2019-03-12 14:29:09,511:main:Prepare submission


In [44]:
models_predictions=pd.DataFrame()

In [45]:
models_predictions['target_1'] = predictions
models_predictions['target_2'] = predictions_1
models_predictions['target_3'] = predictions_2

In [46]:
blend_features = pd.DataFrame()

In [47]:
blend_features['target_1'] = oof
blend_features['target_2'] = oof_1
blend_features['target_3'] = oof_2

In [51]:
np.linspace(0., 1., 51)

array([0.  , 0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14, 0.16, 0.18, 0.2 ,
       0.22, 0.24, 0.26, 0.28, 0.3 , 0.32, 0.34, 0.36, 0.38, 0.4 , 0.42,
       0.44, 0.46, 0.48, 0.5 , 0.52, 0.54, 0.56, 0.58, 0.6 , 0.62, 0.64,
       0.66, 0.68, 0.7 , 0.72, 0.74, 0.76, 0.78, 0.8 , 0.82, 0.84, 0.86,
       0.88, 0.9 , 0.92, 0.94, 0.96, 0.98, 1.  ])

In [52]:
max_score = 0.5
max_alpha = 0.
for alpha in np.linspace(0., 1., 51):
    alpha_predictions = alpha * oof_1 + (1.-alpha)*oof_2
    score = roc_auc_score(train_df.target.values, alpha_predictions)
    if max_score < score:
        max_score = score
        max_alpha = alpha

In [57]:
from sklearn.linear_model import LogisticRegression

In [58]:
lr = LogisticRegression()

In [63]:
folds = StratifiedKFold(n_splits=12, shuffle=False, random_state=44000)
oof = np.zeros(len(train_df))
predictions = np.zeros(len(test_df))
for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, train_df.target.values)):
    print("Fold {}".format(fold_))
    trn_data, trn_label = blend_features.iloc[trn_idx], train_df.target.iloc[trn_idx]
    val_data = blend_features.iloc[val_idx]
    lr.fit(trn_data, trn_label)
    oof[val_idx] = lr.predict_proba(val_data)[:,1]
    predictions += lr.predict_proba(models_predictions)[:,1] / folds.n_splits
score = roc_auc_score(train_df.target, oof)
print("CV score: {:<8.5f}".format(score))

Fold 0
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10
Fold 11
CV score: 0.90084 


In [12]:
train_df, test_df = read_data()
predictions, score = run_model_1(train_df, test_df)
submit(test_df, predictions, score, 'model_1_bis')
predictions_1, score_1 = run_model_2(train_df, test_df)
submit(test_df, predictions_1, score, 'model_2_bis')


[INFO]2019-03-12 02:36:29,991:main:Input data
[INFO]2019-03-12 02:36:43,425:main:Prepare the model
[INFO]2019-03-12 02:36:43,426:main:Run model


Fold 0
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905372	valid_1's auc: 0.885864
[2000]	training's auc: 0.914113	valid_1's auc: 0.891452
[3000]	training's auc: 0.920626	valid_1's auc: 0.894854
[4000]	training's auc: 0.926155	valid_1's auc: 0.896845
[5000]	training's auc: 0.931039	valid_1's auc: 0.897769
[6000]	training's auc: 0.935557	valid_1's auc: 0.898404
[7000]	training's auc: 0.939719	valid_1's auc: 0.899061
[8000]	training's auc: 0.943735	valid_1's auc: 0.899239
[9000]	training's auc: 0.947461	valid_1's auc: 0.899447
[10000]	training's auc: 0.951051	valid_1's auc: 0.899315
[11000]	training's auc: 0.954376	valid_1's auc: 0.89938
[12000]	training's auc: 0.957558	valid_1's auc: 0.899317
Early stopping, best iteration is:
[9216]	training's auc: 0.948261	valid_1's auc: 0.899542
Fold 1
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.90512	valid_1's auc: 0.888327
[2000]	training's auc: 0.914124	valid_

[9000]	training's auc: 0.947175	valid_1's auc: 0.903622
[10000]	training's auc: 0.950779	valid_1's auc: 0.903488
Early stopping, best iteration is:
[7286]	training's auc: 0.940581	valid_1's auc: 0.903748
Fold 10
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.904531	valid_1's auc: 0.895531
[2000]	training's auc: 0.913261	valid_1's auc: 0.900155
[3000]	training's auc: 0.919932	valid_1's auc: 0.903293
[4000]	training's auc: 0.925489	valid_1's auc: 0.904834
[5000]	training's auc: 0.930442	valid_1's auc: 0.905729
[6000]	training's auc: 0.93489	valid_1's auc: 0.90614
[7000]	training's auc: 0.939135	valid_1's auc: 0.906941
[8000]	training's auc: 0.943118	valid_1's auc: 0.90717
[9000]	training's auc: 0.946916	valid_1's auc: 0.90736
[10000]	training's auc: 0.950525	valid_1's auc: 0.907147
[11000]	training's auc: 0.953976	valid_1's auc: 0.907063
[12000]	training's auc: 0.957182	valid_1's auc: 0.907098
Early stopping, best iteration is:
[9195]	training's 

[INFO]2019-03-12 03:21:42,653:main:Prepare submission


CV score: 0.89942 


[INFO]2019-03-12 03:21:43,139:main:Prepare the model
[INFO]2019-03-12 03:21:43,140:main:Run model


Fold 0
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905684	valid_1's auc: 0.886762
[2000]	training's auc: 0.913304	valid_1's auc: 0.891134
[3000]	training's auc: 0.918813	valid_1's auc: 0.89401
[4000]	training's auc: 0.923336	valid_1's auc: 0.895835
[5000]	training's auc: 0.927349	valid_1's auc: 0.896895
[6000]	training's auc: 0.931076	valid_1's auc: 0.897694
[7000]	training's auc: 0.934621	valid_1's auc: 0.898318
[8000]	training's auc: 0.93798	valid_1's auc: 0.898472
[9000]	training's auc: 0.941235	valid_1's auc: 0.898774
[10000]	training's auc: 0.944421	valid_1's auc: 0.898735
[11000]	training's auc: 0.947437	valid_1's auc: 0.898734
[12000]	training's auc: 0.950359	valid_1's auc: 0.898563
Early stopping, best iteration is:
[9201]	training's auc: 0.941911	valid_1's auc: 0.898869
Fold 1
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905448	valid_1's auc: 0.889827
[2000]	training's auc: 0.913011	valid_

[1000]	training's auc: 0.90517	valid_1's auc: 0.893151
[2000]	training's auc: 0.912774	valid_1's auc: 0.897807
[3000]	training's auc: 0.918268	valid_1's auc: 0.900156
[4000]	training's auc: 0.923024	valid_1's auc: 0.901745
[5000]	training's auc: 0.927196	valid_1's auc: 0.902803
[6000]	training's auc: 0.930958	valid_1's auc: 0.903359
[7000]	training's auc: 0.934545	valid_1's auc: 0.903771
[8000]	training's auc: 0.937977	valid_1's auc: 0.90359
[9000]	training's auc: 0.94123	valid_1's auc: 0.903837
[10000]	training's auc: 0.944376	valid_1's auc: 0.903678
[11000]	training's auc: 0.947374	valid_1's auc: 0.903627
[12000]	training's auc: 0.950254	valid_1's auc: 0.903585
Early stopping, best iteration is:
[8984]	training's auc: 0.941175	valid_1's auc: 0.903877
Fold 10
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905169	valid_1's auc: 0.8963
[2000]	training's auc: 0.912566	valid_1's auc: 0.900695
[3000]	training's auc: 0.917997	valid_1's auc: 0.903323

[INFO]2019-03-12 04:10:38,866:main:Prepare submission


CV score: 0.89956 


In [14]:
submit(test_df, predictions_1, score, 'model_2_bis')

[INFO]2019-03-12 09:44:52,537:main:Prepare submission


In [15]:
submit(test_df, (predictions_1 + predictions)/2, score, 'model_1_2_bis')

[INFO]2019-03-12 09:44:54,443:main:Prepare submission


In [None]:
m

In [None]:
train_df, test_df = read_data()
train_df, test_df = process_data(train_df, test_df)