In [1]:
import gc
import os
import logging
import datetime
import warnings
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import StratifiedKFold, train_test_split

warnings.filterwarnings('ignore')

In [2]:
#logger
def get_logger():
    FORMAT = '[%(levelname)s]%(asctime)s:%(name)s:%(message)s'
    logging.basicConfig(format=FORMAT)
    logger = logging.getLogger('main')
    logger.setLevel(logging.DEBUG)
    return logger

In [3]:
logger = get_logger()

In [4]:
def read_data(nrows=None):
    logger.info('Input data')
    train_df = pd.read_csv('./input/train.csv',nrows=nrows)
    test_df = pd.read_csv('./input/test.csv')
    return train_df, test_df

In [9]:
def process_data(train_df, test_df):
    logger.info('Features engineering')
    idx = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    enginering_feats = [('var_26','var_44'),('var_44','var_123'),('var_44','var_155')]
    
    for df in [test_df, train_df]:
        #for feat in idx:
        #    df['r2_'+feat] = np.round(df[feat], 2)
        #for fe_id, fe in enumerate(enginering_feats):
        #    # Magic Feature Enginering
        #    df['%s_plus_%s'%fe] = df[fe[0]]+df[fe[1]]
        #    df['%s_minus_%s'%fe] = df[fe[1]]-df[fe[0]]
        #    df.drop(list(fe), axis=1)
        df['sum'] = df[idx].sum(axis=1)  
        df['min'] = df[idx].min(axis=1)
        df['max'] = df[idx].max(axis=1)
        df['mean'] = df[idx].mean(axis=1)
        df['std'] = df[idx].std(axis=1)
        df['skew'] = df[idx].skew(axis=1)
        df['kurt'] = df[idx].kurtosis(axis=1)
        df['med'] = df[idx].median(axis=1)
    print('Train and test shape:',train_df.shape, test_df.shape)
    return train_df, test_df

In [38]:
def run_model_1(train_df, test_df):
    logger.info('Prepare the model')
    features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    target = train_df['target']
    logger.info('Run model')
    param = {
        'bagging_freq': 5,
        'bagging_fraction': 0.38,
        'boost_from_average':'false',
        'boost': 'gbdt',
        'feature_fraction': 0.045,
        'learning_rate': 0.0095,
        'max_depth': -1,  
        'metric':'auc',
        'min_data_in_leaf': 80,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 13,
        'num_threads': 8,
        'tree_learner': 'serial',
        'objective': 'binary', 
        'verbosity': 1,
    }
    num_round = 1000000
    folds = StratifiedKFold(n_splits=12, shuffle=False, random_state=44000)
    oof = np.zeros(len(train_df))
    predictions = np.zeros(len(test_df))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
        print("Fold {}".format(fold_))
        trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
        val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])
        clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 4000)
        oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
        predictions += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits
    score = roc_auc_score(target, oof)
    print("CV score: {:<8.5f}".format(score))
    return predictions, oof

def run_model_2(train_df, test_df):
    logger.info('Prepare the model')
    features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    target = train_df['target']
    logger.info('Run model')
    param = {
        'bagging_freq': 5,
        'bagging_fraction': 0.38,
        'boost_from_average':'false',
        'boost': 'gbdt',
        'feature_fraction': 0.045,
        'learning_rate': 0.01,
        'max_depth': -1,  
        'metric':'auc',
        'min_data_in_leaf': 80,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 13,
        'num_threads': 8,
        'tree_learner': 'serial',
        'objective': 'binary', 
        'verbosity': 1
    }
    num_round = 1000000
    folds = StratifiedKFold(n_splits=12, shuffle=False, random_state=44000)
    oof = np.zeros(len(train_df))
    predictions = np.zeros(len(test_df))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
        print("Fold {}".format(fold_))
        trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
        val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])
        clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 4000)
        oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
        predictions += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits
    score = roc_auc_score(target, oof)
    print("CV score: {:<8.5f}".format(score))
    return predictions, oof


def run_model_3(train_df, test_df):
    logger.info('Prepare the model')
    features = [c for c in train_df.columns if c not in ['ID_code', 'target']]
    target = train_df['target']
    logger.info('Run model')
    param = {
        'bagging_freq': 5,
        'bagging_fraction': 0.4,
        'boost_from_average':'false',
        'boost': 'gbdt',
        'feature_fraction': 0.05,
        'learning_rate': 0.01,
        'max_depth': -1,  
        'metric':'auc',
        'min_data_in_leaf': 80,
        'min_sum_hessian_in_leaf': 10.0,
        'num_leaves': 13,
        'num_threads': 8,
        'tree_learner': 'serial',
        'objective': 'binary', 
        'verbosity': 1
    }
    num_round = 1000000
    folds = StratifiedKFold(n_splits=12, shuffle=False, random_state=44000)
    oof = np.zeros(len(train_df))
    predictions = np.zeros(len(test_df))
    for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, target.values)):
        print("Fold {}".format(fold_))
        trn_data = lgb.Dataset(train_df.iloc[trn_idx][features], label=target.iloc[trn_idx])
        val_data = lgb.Dataset(train_df.iloc[val_idx][features], label=target.iloc[val_idx])
        clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 4000)
        oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], num_iteration=clf.best_iteration)
        predictions += clf.predict(test_df[features], num_iteration=clf.best_iteration) / folds.n_splits
    score = roc_auc_score(target, oof)
    print("CV score: {:<8.5f}".format(score))
    return predictions, oof

In [7]:
def submit(test_df, predictions, score, name='model_1'):
    logger.info('Prepare submission')
    sub = pd.DataFrame({"ID_code": test_df.ID_code.values})
    sub["target"] = predictions
    sub.to_csv("submission_{}_{:<8.5f}.csv".format(name, score), index=False)

In [14]:
def read_process_train_submit(nrows=None):
    train_df, test_df = read_data(nrows)
    #train_df, test_df = process_data(train_df, test_df)
    predictions, score = run_model_1(train_df, test_df)
    submit(test_df, predictions, score)
    predictions_1, score = run_model_2(train_df, test_df)
    submit(test_df, predictions_1, score, 'model_2')

In [39]:
train_df, test_df = read_data()
#train_df, test_df = process_data(train_df, test_df)
predictions, oof = run_model_1(train_df, test_df)
#submit(test_df, predictions, score)

[INFO]2019-03-12 13:25:19,781:main:Input data
[INFO]2019-03-12 13:25:31,879:main:Prepare the model
[INFO]2019-03-12 13:25:31,880:main:Run model


Fold 0
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.900259	valid_1's auc: 0.882001
[2000]	training's auc: 0.910857	valid_1's auc: 0.889734
[3000]	training's auc: 0.917914	valid_1's auc: 0.894287
[4000]	training's auc: 0.923261	valid_1's auc: 0.896737
[5000]	training's auc: 0.927762	valid_1's auc: 0.89834
[6000]	training's auc: 0.931813	valid_1's auc: 0.899458
[7000]	training's auc: 0.935478	valid_1's auc: 0.900155
[8000]	training's auc: 0.939029	valid_1's auc: 0.90061
[9000]	training's auc: 0.94234	valid_1's auc: 0.900935
[10000]	training's auc: 0.945503	valid_1's auc: 0.901085
[11000]	training's auc: 0.948489	valid_1's auc: 0.901197
[12000]	training's auc: 0.951395	valid_1's auc: 0.901387
[13000]	training's auc: 0.954271	valid_1's auc: 0.901348
[14000]	training's auc: 0.95698	valid_1's auc: 0.901393
[15000]	training's auc: 0.959637	valid_1's auc: 0.901314
[16000]	training's auc: 0.962104	valid_1's auc: 0.901307
Early stopping, best iteration

[13000]	training's auc: 0.954299	valid_1's auc: 0.898599
[14000]	training's auc: 0.956995	valid_1's auc: 0.89834
[15000]	training's auc: 0.959571	valid_1's auc: 0.898343
Early stopping, best iteration is:
[11498]	training's auc: 0.950033	valid_1's auc: 0.898885
Fold 9
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.899554	valid_1's auc: 0.886862
[2000]	training's auc: 0.910243	valid_1's auc: 0.894999
[3000]	training's auc: 0.917343	valid_1's auc: 0.899146
[4000]	training's auc: 0.922849	valid_1's auc: 0.901376
[5000]	training's auc: 0.92745	valid_1's auc: 0.902894
[6000]	training's auc: 0.931529	valid_1's auc: 0.903334
[7000]	training's auc: 0.935219	valid_1's auc: 0.903693
[8000]	training's auc: 0.938708	valid_1's auc: 0.903698
[9000]	training's auc: 0.941979	valid_1's auc: 0.903725
[10000]	training's auc: 0.945183	valid_1's auc: 0.90354
[11000]	training's auc: 0.948237	valid_1's auc: 0.903536
[12000]	training's auc: 0.951218	valid_1's auc: 0.9

In [22]:
predictions_1, oof_1 = run_model_2(train_df, test_df)
#submit(test_df, predictions_1, score, 'model_2')

[INFO]2019-03-12 11:09:46,774:main:Prepare the model
[INFO]2019-03-12 11:09:46,775:main:Run model


Fold 0
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.900817	valid_1's auc: 0.882566
[2000]	training's auc: 0.911609	valid_1's auc: 0.890527
[3000]	training's auc: 0.91882	valid_1's auc: 0.895024
[4000]	training's auc: 0.924238	valid_1's auc: 0.897505
[5000]	training's auc: 0.928813	valid_1's auc: 0.898706
[6000]	training's auc: 0.93292	valid_1's auc: 0.899616
[7000]	training's auc: 0.9367	valid_1's auc: 0.900266
[8000]	training's auc: 0.940384	valid_1's auc: 0.900674
[9000]	training's auc: 0.943791	valid_1's auc: 0.901017
[10000]	training's auc: 0.947041	valid_1's auc: 0.901076
[11000]	training's auc: 0.950164	valid_1's auc: 0.901232
[12000]	training's auc: 0.953156	valid_1's auc: 0.901272
[13000]	training's auc: 0.956073	valid_1's auc: 0.90135
[14000]	training's auc: 0.95885	valid_1's auc: 0.901287
[15000]	training's auc: 0.961534	valid_1's auc: 0.901268
[16000]	training's auc: 0.964052	valid_1's auc: 0.901224
Early stopping, best iteration i

[14000]	training's auc: 0.958843	valid_1's auc: 0.897778
[15000]	training's auc: 0.961467	valid_1's auc: 0.897702
Early stopping, best iteration is:
[11309]	training's auc: 0.951126	valid_1's auc: 0.898232
Fold 9
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.900041	valid_1's auc: 0.887346
[2000]	training's auc: 0.911004	valid_1's auc: 0.895565
[3000]	training's auc: 0.918243	valid_1's auc: 0.899618
[4000]	training's auc: 0.923783	valid_1's auc: 0.901734
[5000]	training's auc: 0.928492	valid_1's auc: 0.903085
[6000]	training's auc: 0.932657	valid_1's auc: 0.903423
[7000]	training's auc: 0.936453	valid_1's auc: 0.903756
[8000]	training's auc: 0.940108	valid_1's auc: 0.903653
[9000]	training's auc: 0.943478	valid_1's auc: 0.903792
[10000]	training's auc: 0.946744	valid_1's auc: 0.903544
[11000]	training's auc: 0.949936	valid_1's auc: 0.903413
[12000]	training's auc: 0.952997	valid_1's auc: 0.903179
Early stopping, best iteration is:
[8554]	traini

In [25]:
predictions_2, oof_2 = run_model_3(train_df, test_df)
#submit(test_df, predictions_1, score, 'model_3')

[INFO]2019-03-12 12:07:41,010:main:Prepare the model
[INFO]2019-03-12 12:07:41,011:main:Run model


Fold 0
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.898963	valid_1's auc: 0.879443
[2000]	training's auc: 0.910857	valid_1's auc: 0.888608
[3000]	training's auc: 0.918607	valid_1's auc: 0.893338
[4000]	training's auc: 0.924418	valid_1's auc: 0.896423
[5000]	training's auc: 0.9292	valid_1's auc: 0.898378
[6000]	training's auc: 0.933456	valid_1's auc: 0.89912
[7000]	training's auc: 0.937393	valid_1's auc: 0.899814
[8000]	training's auc: 0.9411	valid_1's auc: 0.900267
[9000]	training's auc: 0.944591	valid_1's auc: 0.900735
[10000]	training's auc: 0.94786	valid_1's auc: 0.900824
[11000]	training's auc: 0.951008	valid_1's auc: 0.90081
[12000]	training's auc: 0.954086	valid_1's auc: 0.900658
[13000]	training's auc: 0.95704	valid_1's auc: 0.900667
[14000]	training's auc: 0.95991	valid_1's auc: 0.900572
Early stopping, best iteration is:
[10487]	training's auc: 0.949417	valid_1's auc: 0.900918
Fold 1
Training until validation scores don't improve for

[7000]	training's auc: 0.937077	valid_1's auc: 0.903839
[8000]	training's auc: 0.940751	valid_1's auc: 0.904143
[9000]	training's auc: 0.944275	valid_1's auc: 0.904393
[10000]	training's auc: 0.947633	valid_1's auc: 0.904389
[11000]	training's auc: 0.950799	valid_1's auc: 0.904232
[12000]	training's auc: 0.953871	valid_1's auc: 0.903955
[13000]	training's auc: 0.956821	valid_1's auc: 0.903623
Early stopping, best iteration is:
[9613]	training's auc: 0.94636	valid_1's auc: 0.904541
Fold 10
Training until validation scores don't improve for 4000 rounds.
[1000]	training's auc: 0.898088	valid_1's auc: 0.887067
[2000]	training's auc: 0.910235	valid_1's auc: 0.895774
[3000]	training's auc: 0.917874	valid_1's auc: 0.900864
[4000]	training's auc: 0.923744	valid_1's auc: 0.903518
[5000]	training's auc: 0.928703	valid_1's auc: 0.905242
[6000]	training's auc: 0.932989	valid_1's auc: 0.905917
[7000]	training's auc: 0.936934	valid_1's auc: 0.906562
[8000]	training's auc: 0.940591	valid_1's auc: 0.9

In [42]:
roc_auc_score(train_df.target.values, (oof_1+oof+oof_2)/3)

0.9008632381434583

In [55]:
submit(test_df, predictions, 0.90047, 'model_1')
submit(test_df, predictions_1, 0.90031, 'model_2')
submit(test_df, predictions_2, 0.90058, 'model_3')
submit(test_df, (predictions+predictions_1+predictions_2)/3, 0.90086, 'average_3_models')
submit(test_df, 0.42*predictions_1 + 0.58 * predictions_2, 0.90089, 'average(0.42)_2_3_models')

[INFO]2019-03-12 14:29:07,302:main:Prepare submission
[INFO]2019-03-12 14:29:07,831:main:Prepare submission
[INFO]2019-03-12 14:29:08,359:main:Prepare submission
[INFO]2019-03-12 14:29:08,938:main:Prepare submission
[INFO]2019-03-12 14:29:09,511:main:Prepare submission


In [44]:
models_predictions=pd.DataFrame()

In [45]:
models_predictions['target_1'] = predictions
models_predictions['target_2'] = predictions_1
models_predictions['target_3'] = predictions_2

In [46]:
blend_features = pd.DataFrame()

In [47]:
blend_features['target_1'] = oof
blend_features['target_2'] = oof_1
blend_features['target_3'] = oof_2

In [51]:
np.linspace(0., 1., 51)

array([0.  , 0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14, 0.16, 0.18, 0.2 ,
       0.22, 0.24, 0.26, 0.28, 0.3 , 0.32, 0.34, 0.36, 0.38, 0.4 , 0.42,
       0.44, 0.46, 0.48, 0.5 , 0.52, 0.54, 0.56, 0.58, 0.6 , 0.62, 0.64,
       0.66, 0.68, 0.7 , 0.72, 0.74, 0.76, 0.78, 0.8 , 0.82, 0.84, 0.86,
       0.88, 0.9 , 0.92, 0.94, 0.96, 0.98, 1.  ])

In [52]:
max_score = 0.5
max_alpha = 0.
for alpha in np.linspace(0., 1., 51):
    alpha_predictions = alpha * oof_1 + (1.-alpha)*oof_2
    score = roc_auc_score(train_df.target.values, alpha_predictions)
    if max_score < score:
        max_score = score
        max_alpha = alpha

In [57]:
from sklearn.linear_model import LogisticRegression

In [58]:
lr = LogisticRegression()

In [63]:
folds = StratifiedKFold(n_splits=12, shuffle=False, random_state=44000)
oof = np.zeros(len(train_df))
predictions = np.zeros(len(test_df))
for fold_, (trn_idx, val_idx) in enumerate(folds.split(train_df.values, train_df.target.values)):
    print("Fold {}".format(fold_))
    trn_data, trn_label = blend_features.iloc[trn_idx], train_df.target.iloc[trn_idx]
    val_data = blend_features.iloc[val_idx]
    lr.fit(trn_data, trn_label)
    oof[val_idx] = lr.predict_proba(val_data)[:,1]
    predictions += lr.predict_proba(models_predictions)[:,1] / folds.n_splits
score = roc_auc_score(train_df.target, oof)
print("CV score: {:<8.5f}".format(score))

Fold 0
Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10
Fold 11
CV score: 0.90084 


In [12]:
train_df, test_df = read_data()
predictions, score = run_model_1(train_df, test_df)
submit(test_df, predictions, score, 'model_1_bis')
predictions_1, score_1 = run_model_2(train_df, test_df)
submit(test_df, predictions_1, score, 'model_2_bis')


[INFO]2019-03-12 02:36:29,991:main:Input data
[INFO]2019-03-12 02:36:43,425:main:Prepare the model
[INFO]2019-03-12 02:36:43,426:main:Run model


Fold 0
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905372	valid_1's auc: 0.885864
[2000]	training's auc: 0.914113	valid_1's auc: 0.891452
[3000]	training's auc: 0.920626	valid_1's auc: 0.894854
[4000]	training's auc: 0.926155	valid_1's auc: 0.896845
[5000]	training's auc: 0.931039	valid_1's auc: 0.897769
[6000]	training's auc: 0.935557	valid_1's auc: 0.898404
[7000]	training's auc: 0.939719	valid_1's auc: 0.899061
[8000]	training's auc: 0.943735	valid_1's auc: 0.899239
[9000]	training's auc: 0.947461	valid_1's auc: 0.899447
[10000]	training's auc: 0.951051	valid_1's auc: 0.899315
[11000]	training's auc: 0.954376	valid_1's auc: 0.89938
[12000]	training's auc: 0.957558	valid_1's auc: 0.899317
Early stopping, best iteration is:
[9216]	training's auc: 0.948261	valid_1's auc: 0.899542
Fold 1
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.90512	valid_1's auc: 0.888327
[2000]	training's auc: 0.914124	valid_

[9000]	training's auc: 0.947175	valid_1's auc: 0.903622
[10000]	training's auc: 0.950779	valid_1's auc: 0.903488
Early stopping, best iteration is:
[7286]	training's auc: 0.940581	valid_1's auc: 0.903748
Fold 10
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.904531	valid_1's auc: 0.895531
[2000]	training's auc: 0.913261	valid_1's auc: 0.900155
[3000]	training's auc: 0.919932	valid_1's auc: 0.903293
[4000]	training's auc: 0.925489	valid_1's auc: 0.904834
[5000]	training's auc: 0.930442	valid_1's auc: 0.905729
[6000]	training's auc: 0.93489	valid_1's auc: 0.90614
[7000]	training's auc: 0.939135	valid_1's auc: 0.906941
[8000]	training's auc: 0.943118	valid_1's auc: 0.90717
[9000]	training's auc: 0.946916	valid_1's auc: 0.90736
[10000]	training's auc: 0.950525	valid_1's auc: 0.907147
[11000]	training's auc: 0.953976	valid_1's auc: 0.907063
[12000]	training's auc: 0.957182	valid_1's auc: 0.907098
Early stopping, best iteration is:
[9195]	training's 

[INFO]2019-03-12 03:21:42,653:main:Prepare submission


CV score: 0.89942 


[INFO]2019-03-12 03:21:43,139:main:Prepare the model
[INFO]2019-03-12 03:21:43,140:main:Run model


Fold 0
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905684	valid_1's auc: 0.886762
[2000]	training's auc: 0.913304	valid_1's auc: 0.891134
[3000]	training's auc: 0.918813	valid_1's auc: 0.89401
[4000]	training's auc: 0.923336	valid_1's auc: 0.895835
[5000]	training's auc: 0.927349	valid_1's auc: 0.896895
[6000]	training's auc: 0.931076	valid_1's auc: 0.897694
[7000]	training's auc: 0.934621	valid_1's auc: 0.898318
[8000]	training's auc: 0.93798	valid_1's auc: 0.898472
[9000]	training's auc: 0.941235	valid_1's auc: 0.898774
[10000]	training's auc: 0.944421	valid_1's auc: 0.898735
[11000]	training's auc: 0.947437	valid_1's auc: 0.898734
[12000]	training's auc: 0.950359	valid_1's auc: 0.898563
Early stopping, best iteration is:
[9201]	training's auc: 0.941911	valid_1's auc: 0.898869
Fold 1
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905448	valid_1's auc: 0.889827
[2000]	training's auc: 0.913011	valid_

[1000]	training's auc: 0.90517	valid_1's auc: 0.893151
[2000]	training's auc: 0.912774	valid_1's auc: 0.897807
[3000]	training's auc: 0.918268	valid_1's auc: 0.900156
[4000]	training's auc: 0.923024	valid_1's auc: 0.901745
[5000]	training's auc: 0.927196	valid_1's auc: 0.902803
[6000]	training's auc: 0.930958	valid_1's auc: 0.903359
[7000]	training's auc: 0.934545	valid_1's auc: 0.903771
[8000]	training's auc: 0.937977	valid_1's auc: 0.90359
[9000]	training's auc: 0.94123	valid_1's auc: 0.903837
[10000]	training's auc: 0.944376	valid_1's auc: 0.903678
[11000]	training's auc: 0.947374	valid_1's auc: 0.903627
[12000]	training's auc: 0.950254	valid_1's auc: 0.903585
Early stopping, best iteration is:
[8984]	training's auc: 0.941175	valid_1's auc: 0.903877
Fold 10
Training until validation scores don't improve for 3500 rounds.
[1000]	training's auc: 0.905169	valid_1's auc: 0.8963
[2000]	training's auc: 0.912566	valid_1's auc: 0.900695
[3000]	training's auc: 0.917997	valid_1's auc: 0.903323

[INFO]2019-03-12 04:10:38,866:main:Prepare submission


CV score: 0.89956 


In [14]:
submit(test_df, predictions_1, score, 'model_2_bis')

[INFO]2019-03-12 09:44:52,537:main:Prepare submission


In [15]:
submit(test_df, (predictions_1 + predictions)/2, score, 'model_1_2_bis')

[INFO]2019-03-12 09:44:54,443:main:Prepare submission


In [None]:
m

In [None]:
train_df, test_df = read_data()
train_df, test_df = process_data(train_df, test_df)