# Training Models

This notebook contains the code used for training the following learning models:

-  **Standard GBDT** (_baseline 1_)
-  **Adversarial Boosting** (_baseline 2_)
-  **Non-Interferent GBDT** (our proposal)

# Documentation

 - http://lightgbm.readthedocs.io/en/latest/
 - http://lightgbm.readthedocs.io/en/latest/Python-Intro.html
 - https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide

In [1]:
import sys
import os
import numpy as np
import pandas as pd
import lightgbm
import pickle
import json
import functools
from os import listdir
from os.path import isfile, join
from sklearn.svm import SVC
from sklearn.metrics import f1_score

# Data Preparation

In [2]:
def label_encode(dataset, categorical_features):
    dataset_le = dataset.copy()
    for column in dataset_le.columns:
        if column in categorical_features:
            dataset_le[column] = dataset_le[column].astype('category')
            dataset_le[column] = dataset_le[column].cat.codes.astype(np.int32)
    return dataset_le

def load_atk_train_valid_test(atk_train_file, atk_valid_file, atk_test_file, 
                              train_split=0.6, valid_split=0.2, force=False):
    
    
    if  (force or 
          not os.path.exists(atk_train_file+".cat.bz2") or
          not os.path.exists(atk_valid_file+".cat.bz2") or
          not os.path.exists(atk_test_file+".cat.bz2") or 
          not os.path.exists(atk_train_file+".cat.json") ):
    
        print ("Pre-processing original files...")

        print ("Loading:", atk_train_file)
        print ("Loading:", atk_valid_file)
        print ("Loading:", atk_test_file)

        train = pd.read_csv(atk_train_file)
        valid = pd.read_csv(atk_valid_file)
        test  = pd.read_csv(atk_test_file)
        
        print ("Train/Valid/Test sizes:", train.shape, valid.shape, test.shape)
        print ("Train/Valid/Test split: {:.2f} {:.2f} {:.2f}"
                   .format( train.shape[0]/(train.shape[0]+valid.shape[0]+test.shape[0]),
                            valid.shape[0]/(train.shape[0]+valid.shape[0]+test.shape[0]),
                            test.shape[0] /(train.shape[0]+valid.shape[0]+test.shape[0]) ) )


        # split-back into train valid test
        if 'instance_id' in train.columns.values:
            print ('   ... with instance ids')
            valid['instance_id'] += train.iloc[-1,0]
            test['instance_id']  += valid.iloc[-1,0]
            assert max(train['instance_id'])<min(valid['instance_id']), "Instance ID mismatch"
            assert max(valid['instance_id'])<min(test['instance_id']), "Instance ID mismatch"
            
            groups = np.concatenate( [ train['instance_id'].value_counts().sort_index().values,
                                       valid['instance_id'].value_counts().sort_index().values,
                                       test['instance_id'].value_counts().sort_index().values ] )
            
            num_train_groups = int( len(groups)*train_split )
            train_size = sum(groups[:num_train_groups])
            num_valid_groups = int( len(groups)*valid_split )
            valid_size = sum(groups[num_train_groups:num_train_groups+num_valid_groups])
        else:
            full_size = len(train) + len(valid) + len(test)
            train_size = int( full_size*train_split )
            valid_size = int( full_size*valid_split )
        
        # concat to process correctly label encoding
        full = pd.concat( [train, valid, test] )

        # get index of categorical features (-1 because of instance_id)
        cat_fx = full.columns.values[np.where(full.dtypes=='object')[0]]
        cat_fx = list(cat_fx)    
        full = label_encode(full, cat_fx)
        with open(atk_train_file+".cat.json", 'w') as fp:
            json.dump(cat_fx, fp)
        print ("CatFX:", cat_fx)

        train_cat = full.iloc[0:train_size,:]
        valid_cat = full.iloc[train_size:train_size+valid_size,:]
        test_cat  = full.iloc[train_size+valid_size:,:]
        
        assert len(train_cat)+len(valid_cat)+len(test_cat)==len(full), "Split sizes mismatch"
        

        print ("Train/Valid/Test sizes:", train_cat.shape, valid_cat.shape, test_cat.shape)
        print ("Train/Valid/Test split: {:.2f} {:.2f} {:.2f}"
                   .format( train_cat.shape[0]/(train_cat.shape[0]+valid_cat.shape[0]+test_cat.shape[0]),
                            valid_cat.shape[0]/(train_cat.shape[0]+valid_cat.shape[0]+test_cat.shape[0]),
                            test_cat.shape[0] /(train_cat.shape[0]+valid_cat.shape[0]+test_cat.shape[0]) ) )

        # save to file
        print ("Saving processed files *.cat.bz2")
        train_cat.to_csv(atk_train_file+".cat.bz2", compression="bz2", index=False)
        valid_cat.to_csv(atk_valid_file+".cat.bz2", compression="bz2", index=False)
        test_cat.to_csv (atk_test_file+".cat.bz2",  compression="bz2", index=False)
        
    else:
        print ("Loading pre-processed files...")

        train_cat = pd.read_csv(atk_train_file+".cat.bz2")
        valid_cat = pd.read_csv(atk_valid_file+".cat.bz2")
        test_cat  = pd.read_csv(atk_test_file+".cat.bz2")
        
        with open(atk_train_file+".cat.json", 'r') as fp:
            cat_fx = json.load(fp)
    
    # return data
    return train_cat, valid_cat, test_cat, cat_fx


# Objective Functions

## Standard

The following function, called <code>optimize_log_loss</code>, is the one that should be optimized (i.e., minimized) for learning _standard_ and _baseline_ approaches. More specifically, this is the standard binary log loss which is used to train any _standard_ or _baseline_ model.

# $L$ = <code>optimize_log_loss</code>

$$
L = \frac{1}{|\mathcal{D}|} \cdot \sum_{(\mathbf{x},y) \in \mathcal{D}}\ell(h(\mathbf{x}), y)
$$

where:

$$
\ell(h(\mathbf{x}), y) = log(1+e^{(-yh(\mathbf{x}))})
$$

In [3]:
def optimize_log_loss(preds, train_data):
    labels = train_data.get_label()
    exp_pl = np.exp(preds * labels)
    # http://www.wolframalpha.com/input/?i=differentiate+log(1+%2B+exp(-kx)+)
    grads = -labels / (1.0 +  exp_pl)  
    # http://www.wolframalpha.com/input/?i=d%5E2%2Fdx%5E2+log(1+%2B+exp(-kx)+)
    hess = labels**2 * exp_pl / (1.0 + exp_pl)**2 

    # this is to optimize average logloss
    norm = 1.0/len(preds)
    grads *= norm
    hess *= norm
    
    return grads, hess

# Evaluation Metrics

## Standard

The following function is the one used for evaluating the quality of the learned model (either _standard_, _adversarial-boosting_, or _non-interferent_). This is the standard <code>avg_log_loss</code>.

In [4]:
def logistic(x):
    return 1.0/(1.0 + np.exp(-x))

In [5]:
def logit(p):
    return np.log(p/(1-p))

# <code>avg_log_loss</code>

In [6]:
# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
def avg_log_loss(preds, train_data):
    
    labels = train_data.get_label()
    losses = np.log(1.0 + np.exp(-preds*labels))
    avg_loss = np.mean(losses)
    
    return 'avg_binary_log_loss', avg_loss, False

# Training Standard GBDT (_baseline 1_)

In [7]:
def train_gradient_boosting_baseline( train_file, valid_file, test_file,
                                output_model_file):
    
    exp = pd.DataFrame(columns=['num_trees', 'learning_rate', 'num_leaves', 'best_round', 'avg_binary_log_loss'])
    
    # load train/valid/test
    train, valid, test, cat_fx = load_atk_train_valid_test(train_file, valid_file, test_file)
    
    assert "instance_id" not in train.columns.values, "Wrong training set file for GBDT"

    # get index of categorical features 
    cat_fx = np.where(train.columns.isin(cat_fx))[0]
    cat_fx = list([int(x) for x in cat_fx])  
    print ("CatFX:", train.columns.values[cat_fx])
    

    for num_trees in [1000]:
        best_model = None
        best_info = None
        best_loss = np.inf
        for learning_rate in [0.01, 0.05, 0.1]:
            for num_leaves in [8, 16, 24, 32]:
                # datasets
                lgbm_train = lightgbm.Dataset(data=train.iloc[:,:-1].values, 
                                              label=train.iloc[:,-1].values,
                                              categorical_feature = cat_fx)

                lgbm_valid = lightgbm.Dataset(data=valid.iloc[:,:-1].values, 
                                              label=valid.iloc[:,-1].values,
                                              categorical_feature = cat_fx)

                # run train
                lgbm_params = { 'learning_rate': learning_rate, 
                                'num_leaves': num_leaves} 
                lgbm_info = {}
                lgbm_model = lightgbm.train(lgbm_params, lgbm_train, 
                                            num_boost_round = num_trees,
                                            fobj            = optimize_log_loss, 
                                            feval           = avg_log_loss,
                                            evals_result    = lgbm_info,
                                            valid_sets      = [lgbm_train, lgbm_valid], 
                                            valid_names     = ['train', 'valid'],
                                            verbose_eval    = 50)
                
                if np.min(lgbm_info['valid']['avg_binary_log_loss']) < best_loss:
                    best_model = lgbm_model
                    best_info = lgbm_info
                    best_loss = np.min(lgbm_info['valid']['avg_binary_log_loss'])
                    best_info['num_trees'] = num_trees
                    best_info['learning_rate'] = learning_rate
                    best_info['num_leaves'] = num_leaves
                    
                    
                best_valid_iter = np.argmin(lgbm_info['valid']['avg_binary_log_loss'])
                
                # update experimental results
                exp = exp.append({'num_trees': num_trees, 
                                  'learning_rate':learning_rate,
                                  'num_leaves':num_leaves, 
                                  'best_round':best_valid_iter+1, 
                                  'avg_binary_log_loss':lgbm_info['valid']['avg_binary_log_loss'][best_valid_iter]},
                                 ignore_index=True)
                
        
        # save file
        best_valid_iter = np.argmin(best_info['valid']['avg_binary_log_loss'])

        model_file_name = "{:s}_T{:d}_S{:04d}_L{:d}_R{:d}.model".format(output_model_file,
                                                                        best_info['num_trees'],
                                                                        int(best_info['learning_rate']*1000),
                                                                        best_info['num_leaves'],
                                                                        best_valid_iter + 1
                                                                       )
        
        best_model.save_model(model_file_name)
        print ("Model saved to", model_file_name)
        
        best_model = lightgbm.Booster(model_file=model_file_name)
        print ("Check valid score:", avg_log_loss(preds=best_model.predict(valid.iloc[:,:-1].values),
                                                  train_data=lgbm_valid))

    
    return exp

In [8]:
# enable/disable LGBM Baseline
if True:
    experiments = train_gradient_boosting_baseline("../data/census/train_ori.csv.bz2",
                                                     "../data/census/valid_ori.csv.bz2",
                                                     "../data/census/test_ori.csv.bz2",
                                                     "../out/models/std_gbdt_census")  

    experiments.to_csv('../out/models/std_gbdt_census.csv', index=False)

    print (experiments)

Loading pre-processed files...
CatFX: ['workclass' 'marital_status' 'occupation' 'relationship' 'race' 'sex'
 'native_country']




[5]	train's avg_binary_log_loss: 0.669909	valid's avg_binary_log_loss: 0.670078
[10]	train's avg_binary_log_loss: 0.648818	valid's avg_binary_log_loss: 0.649129
[15]	train's avg_binary_log_loss: 0.629588	valid's avg_binary_log_loss: 0.630009
[20]	train's avg_binary_log_loss: 0.611985	valid's avg_binary_log_loss: 0.612517
[25]	train's avg_binary_log_loss: 0.595845	valid's avg_binary_log_loss: 0.59644
[30]	train's avg_binary_log_loss: 0.580993	valid's avg_binary_log_loss: 0.581579
[35]	train's avg_binary_log_loss: 0.567319	valid's avg_binary_log_loss: 0.5679
[40]	train's avg_binary_log_loss: 0.554711	valid's avg_binary_log_loss: 0.555295
[45]	train's avg_binary_log_loss: 0.543066	valid's avg_binary_log_loss: 0.54366
[50]	train's avg_binary_log_loss: 0.532162	valid's avg_binary_log_loss: 0.532738
[55]	train's avg_binary_log_loss: 0.522024	valid's avg_binary_log_loss: 0.52252
[60]	train's avg_binary_log_loss: 0.51261	valid's avg_binary_log_loss: 0.513041
[65]	train's avg_binary_log_loss: 0

[520]	train's avg_binary_log_loss: 0.358103	valid's avg_binary_log_loss: 0.358191
[525]	train's avg_binary_log_loss: 0.357919	valid's avg_binary_log_loss: 0.358036
[530]	train's avg_binary_log_loss: 0.357724	valid's avg_binary_log_loss: 0.357877
[535]	train's avg_binary_log_loss: 0.357482	valid's avg_binary_log_loss: 0.357634
[540]	train's avg_binary_log_loss: 0.35728	valid's avg_binary_log_loss: 0.357434
[545]	train's avg_binary_log_loss: 0.357055	valid's avg_binary_log_loss: 0.357209
[550]	train's avg_binary_log_loss: 0.356865	valid's avg_binary_log_loss: 0.357027
[555]	train's avg_binary_log_loss: 0.356637	valid's avg_binary_log_loss: 0.356794
[560]	train's avg_binary_log_loss: 0.35645	valid's avg_binary_log_loss: 0.356618
[565]	train's avg_binary_log_loss: 0.355358	valid's avg_binary_log_loss: 0.355598
[570]	train's avg_binary_log_loss: 0.353476	valid's avg_binary_log_loss: 0.353847
[575]	train's avg_binary_log_loss: 0.352452	valid's avg_binary_log_loss: 0.352882
[580]	train's avg_

[30]	train's avg_binary_log_loss: 0.57478	valid's avg_binary_log_loss: 0.57532
[35]	train's avg_binary_log_loss: 0.560308	valid's avg_binary_log_loss: 0.560931
[40]	train's avg_binary_log_loss: 0.546978	valid's avg_binary_log_loss: 0.54769
[45]	train's avg_binary_log_loss: 0.534676	valid's avg_binary_log_loss: 0.535443
[50]	train's avg_binary_log_loss: 0.523237	valid's avg_binary_log_loss: 0.524062
[55]	train's avg_binary_log_loss: 0.512658	valid's avg_binary_log_loss: 0.513482
[60]	train's avg_binary_log_loss: 0.502939	valid's avg_binary_log_loss: 0.503755
[65]	train's avg_binary_log_loss: 0.493872	valid's avg_binary_log_loss: 0.494672
[70]	train's avg_binary_log_loss: 0.485491	valid's avg_binary_log_loss: 0.486293
[75]	train's avg_binary_log_loss: 0.477702	valid's avg_binary_log_loss: 0.47848
[80]	train's avg_binary_log_loss: 0.470375	valid's avg_binary_log_loss: 0.471151
[85]	train's avg_binary_log_loss: 0.463571	valid's avg_binary_log_loss: 0.464338
[90]	train's avg_binary_log_loss

[555]	train's avg_binary_log_loss: 0.323467	valid's avg_binary_log_loss: 0.328284
[560]	train's avg_binary_log_loss: 0.322962	valid's avg_binary_log_loss: 0.327853
[565]	train's avg_binary_log_loss: 0.32184	valid's avg_binary_log_loss: 0.32683
[570]	train's avg_binary_log_loss: 0.321095	valid's avg_binary_log_loss: 0.326201
[575]	train's avg_binary_log_loss: 0.320654	valid's avg_binary_log_loss: 0.325841
[580]	train's avg_binary_log_loss: 0.320197	valid's avg_binary_log_loss: 0.325468
[585]	train's avg_binary_log_loss: 0.31922	valid's avg_binary_log_loss: 0.324608
[590]	train's avg_binary_log_loss: 0.318798	valid's avg_binary_log_loss: 0.324265
[595]	train's avg_binary_log_loss: 0.318134	valid's avg_binary_log_loss: 0.323697
[600]	train's avg_binary_log_loss: 0.31773	valid's avg_binary_log_loss: 0.323377
[605]	train's avg_binary_log_loss: 0.316847	valid's avg_binary_log_loss: 0.322596
[610]	train's avg_binary_log_loss: 0.316468	valid's avg_binary_log_loss: 0.32229
[615]	train's avg_bin

[65]	train's avg_binary_log_loss: 0.490004	valid's avg_binary_log_loss: 0.491388
[70]	train's avg_binary_log_loss: 0.481492	valid's avg_binary_log_loss: 0.482869
[75]	train's avg_binary_log_loss: 0.473549	valid's avg_binary_log_loss: 0.474932
[80]	train's avg_binary_log_loss: 0.466175	valid's avg_binary_log_loss: 0.467574
[85]	train's avg_binary_log_loss: 0.459321	valid's avg_binary_log_loss: 0.460729
[90]	train's avg_binary_log_loss: 0.452956	valid's avg_binary_log_loss: 0.454402
[95]	train's avg_binary_log_loss: 0.447022	valid's avg_binary_log_loss: 0.448508
[100]	train's avg_binary_log_loss: 0.441464	valid's avg_binary_log_loss: 0.442947
[105]	train's avg_binary_log_loss: 0.436267	valid's avg_binary_log_loss: 0.437765
[110]	train's avg_binary_log_loss: 0.431449	valid's avg_binary_log_loss: 0.432945
[115]	train's avg_binary_log_loss: 0.426954	valid's avg_binary_log_loss: 0.42846
[120]	train's avg_binary_log_loss: 0.422774	valid's avg_binary_log_loss: 0.424304
[125]	train's avg_binary

[590]	train's avg_binary_log_loss: 0.311059	valid's avg_binary_log_loss: 0.320784
[595]	train's avg_binary_log_loss: 0.310525	valid's avg_binary_log_loss: 0.320358
[600]	train's avg_binary_log_loss: 0.310191	valid's avg_binary_log_loss: 0.320122
[605]	train's avg_binary_log_loss: 0.309859	valid's avg_binary_log_loss: 0.319868
[610]	train's avg_binary_log_loss: 0.309367	valid's avg_binary_log_loss: 0.319484
[615]	train's avg_binary_log_loss: 0.308883	valid's avg_binary_log_loss: 0.319117
[620]	train's avg_binary_log_loss: 0.30858	valid's avg_binary_log_loss: 0.318904
[625]	train's avg_binary_log_loss: 0.308283	valid's avg_binary_log_loss: 0.3187
[630]	train's avg_binary_log_loss: 0.307988	valid's avg_binary_log_loss: 0.318509
[635]	train's avg_binary_log_loss: 0.307687	valid's avg_binary_log_loss: 0.318305
[640]	train's avg_binary_log_loss: 0.307234	valid's avg_binary_log_loss: 0.317962
[645]	train's avg_binary_log_loss: 0.306951	valid's avg_binary_log_loss: 0.317756
[650]	train's avg_b

[95]	train's avg_binary_log_loss: 0.445089	valid's avg_binary_log_loss: 0.447268
[100]	train's avg_binary_log_loss: 0.439488	valid's avg_binary_log_loss: 0.441724
[105]	train's avg_binary_log_loss: 0.434263	valid's avg_binary_log_loss: 0.436567
[110]	train's avg_binary_log_loss: 0.429395	valid's avg_binary_log_loss: 0.431775
[115]	train's avg_binary_log_loss: 0.424854	valid's avg_binary_log_loss: 0.427282
[120]	train's avg_binary_log_loss: 0.420626	valid's avg_binary_log_loss: 0.423081
[125]	train's avg_binary_log_loss: 0.416686	valid's avg_binary_log_loss: 0.41917
[130]	train's avg_binary_log_loss: 0.412978	valid's avg_binary_log_loss: 0.415531
[135]	train's avg_binary_log_loss: 0.409529	valid's avg_binary_log_loss: 0.412135
[140]	train's avg_binary_log_loss: 0.406261	valid's avg_binary_log_loss: 0.408918
[145]	train's avg_binary_log_loss: 0.403116	valid's avg_binary_log_loss: 0.405818
[150]	train's avg_binary_log_loss: 0.400182	valid's avg_binary_log_loss: 0.402927
[155]	train's avg_

[605]	train's avg_binary_log_loss: 0.303355	valid's avg_binary_log_loss: 0.317482
[610]	train's avg_binary_log_loss: 0.303078	valid's avg_binary_log_loss: 0.31733
[615]	train's avg_binary_log_loss: 0.302919	valid's avg_binary_log_loss: 0.317304
[620]	train's avg_binary_log_loss: 0.302451	valid's avg_binary_log_loss: 0.316934
[625]	train's avg_binary_log_loss: 0.302101	valid's avg_binary_log_loss: 0.316693
[630]	train's avg_binary_log_loss: 0.301828	valid's avg_binary_log_loss: 0.316534
[635]	train's avg_binary_log_loss: 0.301447	valid's avg_binary_log_loss: 0.316272
[640]	train's avg_binary_log_loss: 0.300985	valid's avg_binary_log_loss: 0.315899
[645]	train's avg_binary_log_loss: 0.300641	valid's avg_binary_log_loss: 0.315658
[650]	train's avg_binary_log_loss: 0.300184	valid's avg_binary_log_loss: 0.315301
[655]	train's avg_binary_log_loss: 0.29966	valid's avg_binary_log_loss: 0.314867
[660]	train's avg_binary_log_loss: 0.299326	valid's avg_binary_log_loss: 0.314632
[665]	train's avg_

[140]	train's avg_binary_log_loss: 0.328364	valid's avg_binary_log_loss: 0.330197
[145]	train's avg_binary_log_loss: 0.326021	valid's avg_binary_log_loss: 0.328099
[150]	train's avg_binary_log_loss: 0.323929	valid's avg_binary_log_loss: 0.326134
[155]	train's avg_binary_log_loss: 0.323464	valid's avg_binary_log_loss: 0.325791
[160]	train's avg_binary_log_loss: 0.321519	valid's avg_binary_log_loss: 0.324027
[165]	train's avg_binary_log_loss: 0.32117	valid's avg_binary_log_loss: 0.32383
[170]	train's avg_binary_log_loss: 0.319412	valid's avg_binary_log_loss: 0.322193
[175]	train's avg_binary_log_loss: 0.31785	valid's avg_binary_log_loss: 0.320869
[180]	train's avg_binary_log_loss: 0.317439	valid's avg_binary_log_loss: 0.320575
[185]	train's avg_binary_log_loss: 0.315956	valid's avg_binary_log_loss: 0.319213
[190]	train's avg_binary_log_loss: 0.315642	valid's avg_binary_log_loss: 0.319003
[195]	train's avg_binary_log_loss: 0.314288	valid's avg_binary_log_loss: 0.317798
[200]	train's avg_b

[660]	train's avg_binary_log_loss: 0.287535	valid's avg_binary_log_loss: 0.29522
[665]	train's avg_binary_log_loss: 0.287476	valid's avg_binary_log_loss: 0.295191
[670]	train's avg_binary_log_loss: 0.28733	valid's avg_binary_log_loss: 0.295092
[675]	train's avg_binary_log_loss: 0.28726	valid's avg_binary_log_loss: 0.295065
[680]	train's avg_binary_log_loss: 0.287202	valid's avg_binary_log_loss: 0.295082
[685]	train's avg_binary_log_loss: 0.287149	valid's avg_binary_log_loss: 0.295021
[690]	train's avg_binary_log_loss: 0.286971	valid's avg_binary_log_loss: 0.294888
[695]	train's avg_binary_log_loss: 0.286901	valid's avg_binary_log_loss: 0.294867
[700]	train's avg_binary_log_loss: 0.286682	valid's avg_binary_log_loss: 0.294715
[705]	train's avg_binary_log_loss: 0.286503	valid's avg_binary_log_loss: 0.294545
[710]	train's avg_binary_log_loss: 0.286369	valid's avg_binary_log_loss: 0.294493
[715]	train's avg_binary_log_loss: 0.286307	valid's avg_binary_log_loss: 0.29448
[720]	train's avg_bi

[185]	train's avg_binary_log_loss: 0.300285	valid's avg_binary_log_loss: 0.309161
[190]	train's avg_binary_log_loss: 0.299152	valid's avg_binary_log_loss: 0.308213
[195]	train's avg_binary_log_loss: 0.298827	valid's avg_binary_log_loss: 0.308099
[200]	train's avg_binary_log_loss: 0.29748	valid's avg_binary_log_loss: 0.306913
[205]	train's avg_binary_log_loss: 0.29679	valid's avg_binary_log_loss: 0.306448
[210]	train's avg_binary_log_loss: 0.295855	valid's avg_binary_log_loss: 0.305751
[215]	train's avg_binary_log_loss: 0.295548	valid's avg_binary_log_loss: 0.305635
[220]	train's avg_binary_log_loss: 0.294934	valid's avg_binary_log_loss: 0.305144
[225]	train's avg_binary_log_loss: 0.294319	valid's avg_binary_log_loss: 0.304732
[230]	train's avg_binary_log_loss: 0.293493	valid's avg_binary_log_loss: 0.304086
[235]	train's avg_binary_log_loss: 0.292946	valid's avg_binary_log_loss: 0.303757
[240]	train's avg_binary_log_loss: 0.292669	valid's avg_binary_log_loss: 0.303701
[245]	train's avg_

[710]	train's avg_binary_log_loss: 0.269607	valid's avg_binary_log_loss: 0.294755
[715]	train's avg_binary_log_loss: 0.269425	valid's avg_binary_log_loss: 0.29473
[720]	train's avg_binary_log_loss: 0.269321	valid's avg_binary_log_loss: 0.294729
[725]	train's avg_binary_log_loss: 0.26925	valid's avg_binary_log_loss: 0.29471
[730]	train's avg_binary_log_loss: 0.269072	valid's avg_binary_log_loss: 0.294624
[735]	train's avg_binary_log_loss: 0.268968	valid's avg_binary_log_loss: 0.294628
[740]	train's avg_binary_log_loss: 0.268857	valid's avg_binary_log_loss: 0.294675
[745]	train's avg_binary_log_loss: 0.268761	valid's avg_binary_log_loss: 0.294687
[750]	train's avg_binary_log_loss: 0.268674	valid's avg_binary_log_loss: 0.294715
[755]	train's avg_binary_log_loss: 0.268575	valid's avg_binary_log_loss: 0.29475
[760]	train's avg_binary_log_loss: 0.268441	valid's avg_binary_log_loss: 0.294745
[765]	train's avg_binary_log_loss: 0.26834	valid's avg_binary_log_loss: 0.29474
[770]	train's avg_bina

[245]	train's avg_binary_log_loss: 0.284103	valid's avg_binary_log_loss: 0.302488
[250]	train's avg_binary_log_loss: 0.283768	valid's avg_binary_log_loss: 0.30246
[255]	train's avg_binary_log_loss: 0.283477	valid's avg_binary_log_loss: 0.302415
[260]	train's avg_binary_log_loss: 0.282964	valid's avg_binary_log_loss: 0.302158
[265]	train's avg_binary_log_loss: 0.282637	valid's avg_binary_log_loss: 0.302135
[270]	train's avg_binary_log_loss: 0.282317	valid's avg_binary_log_loss: 0.302087
[275]	train's avg_binary_log_loss: 0.281836	valid's avg_binary_log_loss: 0.301903
[280]	train's avg_binary_log_loss: 0.281347	valid's avg_binary_log_loss: 0.301677
[285]	train's avg_binary_log_loss: 0.280915	valid's avg_binary_log_loss: 0.301521
[290]	train's avg_binary_log_loss: 0.280439	valid's avg_binary_log_loss: 0.301296
[295]	train's avg_binary_log_loss: 0.280142	valid's avg_binary_log_loss: 0.301285
[300]	train's avg_binary_log_loss: 0.279584	valid's avg_binary_log_loss: 0.300941
[305]	train's avg

[755]	train's avg_binary_log_loss: 0.257237	valid's avg_binary_log_loss: 0.298359
[760]	train's avg_binary_log_loss: 0.257096	valid's avg_binary_log_loss: 0.298413
[765]	train's avg_binary_log_loss: 0.256938	valid's avg_binary_log_loss: 0.298451
[770]	train's avg_binary_log_loss: 0.256807	valid's avg_binary_log_loss: 0.298493
[775]	train's avg_binary_log_loss: 0.256672	valid's avg_binary_log_loss: 0.298539
[780]	train's avg_binary_log_loss: 0.256472	valid's avg_binary_log_loss: 0.298474
[785]	train's avg_binary_log_loss: 0.256331	valid's avg_binary_log_loss: 0.298537
[790]	train's avg_binary_log_loss: 0.256113	valid's avg_binary_log_loss: 0.298583
[795]	train's avg_binary_log_loss: 0.25598	valid's avg_binary_log_loss: 0.298638
[800]	train's avg_binary_log_loss: 0.255837	valid's avg_binary_log_loss: 0.298647
[805]	train's avg_binary_log_loss: 0.255652	valid's avg_binary_log_loss: 0.298587
[810]	train's avg_binary_log_loss: 0.255527	valid's avg_binary_log_loss: 0.298679
[815]	train's avg

[260]	train's avg_binary_log_loss: 0.277312	valid's avg_binary_log_loss: 0.304017
[265]	train's avg_binary_log_loss: 0.276786	valid's avg_binary_log_loss: 0.303853
[270]	train's avg_binary_log_loss: 0.276114	valid's avg_binary_log_loss: 0.303552
[275]	train's avg_binary_log_loss: 0.275733	valid's avg_binary_log_loss: 0.303569
[280]	train's avg_binary_log_loss: 0.275406	valid's avg_binary_log_loss: 0.303599
[285]	train's avg_binary_log_loss: 0.275084	valid's avg_binary_log_loss: 0.303597
[290]	train's avg_binary_log_loss: 0.274572	valid's avg_binary_log_loss: 0.30351
[295]	train's avg_binary_log_loss: 0.274232	valid's avg_binary_log_loss: 0.303464
[300]	train's avg_binary_log_loss: 0.273799	valid's avg_binary_log_loss: 0.303416
[305]	train's avg_binary_log_loss: 0.273351	valid's avg_binary_log_loss: 0.303259
[310]	train's avg_binary_log_loss: 0.273053	valid's avg_binary_log_loss: 0.303188
[315]	train's avg_binary_log_loss: 0.272789	valid's avg_binary_log_loss: 0.303188
[320]	train's avg

[770]	train's avg_binary_log_loss: 0.246455	valid's avg_binary_log_loss: 0.301866
[775]	train's avg_binary_log_loss: 0.246121	valid's avg_binary_log_loss: 0.301824
[780]	train's avg_binary_log_loss: 0.245948	valid's avg_binary_log_loss: 0.301891
[785]	train's avg_binary_log_loss: 0.24576	valid's avg_binary_log_loss: 0.302015
[790]	train's avg_binary_log_loss: 0.245587	valid's avg_binary_log_loss: 0.302068
[795]	train's avg_binary_log_loss: 0.245416	valid's avg_binary_log_loss: 0.302148
[800]	train's avg_binary_log_loss: 0.245269	valid's avg_binary_log_loss: 0.302187
[805]	train's avg_binary_log_loss: 0.245113	valid's avg_binary_log_loss: 0.302236
[810]	train's avg_binary_log_loss: 0.244948	valid's avg_binary_log_loss: 0.30225
[815]	train's avg_binary_log_loss: 0.244798	valid's avg_binary_log_loss: 0.3023
[820]	train's avg_binary_log_loss: 0.244629	valid's avg_binary_log_loss: 0.302347
[825]	train's avg_binary_log_loss: 0.244476	valid's avg_binary_log_loss: 0.302416
[830]	train's avg_bi

[285]	train's avg_binary_log_loss: 0.289856	valid's avg_binary_log_loss: 0.297265
[290]	train's avg_binary_log_loss: 0.289715	valid's avg_binary_log_loss: 0.297142
[295]	train's avg_binary_log_loss: 0.289543	valid's avg_binary_log_loss: 0.297002
[300]	train's avg_binary_log_loss: 0.289085	valid's avg_binary_log_loss: 0.296578
[305]	train's avg_binary_log_loss: 0.288972	valid's avg_binary_log_loss: 0.296536
[310]	train's avg_binary_log_loss: 0.288552	valid's avg_binary_log_loss: 0.29616
[315]	train's avg_binary_log_loss: 0.288101	valid's avg_binary_log_loss: 0.295799
[320]	train's avg_binary_log_loss: 0.287976	valid's avg_binary_log_loss: 0.295777
[325]	train's avg_binary_log_loss: 0.287799	valid's avg_binary_log_loss: 0.295721
[330]	train's avg_binary_log_loss: 0.28768	valid's avg_binary_log_loss: 0.295654
[335]	train's avg_binary_log_loss: 0.287585	valid's avg_binary_log_loss: 0.295618
[340]	train's avg_binary_log_loss: 0.287482	valid's avg_binary_log_loss: 0.295567
[345]	train's avg_

[805]	train's avg_binary_log_loss: 0.273858	valid's avg_binary_log_loss: 0.291219
[810]	train's avg_binary_log_loss: 0.273803	valid's avg_binary_log_loss: 0.291205
[815]	train's avg_binary_log_loss: 0.273727	valid's avg_binary_log_loss: 0.291215
[820]	train's avg_binary_log_loss: 0.273678	valid's avg_binary_log_loss: 0.291225
[825]	train's avg_binary_log_loss: 0.27359	valid's avg_binary_log_loss: 0.291271
[830]	train's avg_binary_log_loss: 0.273518	valid's avg_binary_log_loss: 0.291251
[835]	train's avg_binary_log_loss: 0.273441	valid's avg_binary_log_loss: 0.291326
[840]	train's avg_binary_log_loss: 0.273395	valid's avg_binary_log_loss: 0.291338
[845]	train's avg_binary_log_loss: 0.273343	valid's avg_binary_log_loss: 0.29133
[850]	train's avg_binary_log_loss: 0.273124	valid's avg_binary_log_loss: 0.291236
[855]	train's avg_binary_log_loss: 0.27305	valid's avg_binary_log_loss: 0.291246
[860]	train's avg_binary_log_loss: 0.27292	valid's avg_binary_log_loss: 0.291225
[865]	train's avg_bi

[320]	train's avg_binary_log_loss: 0.271861	valid's avg_binary_log_loss: 0.295991
[325]	train's avg_binary_log_loss: 0.271586	valid's avg_binary_log_loss: 0.296049
[330]	train's avg_binary_log_loss: 0.271186	valid's avg_binary_log_loss: 0.295909
[335]	train's avg_binary_log_loss: 0.270944	valid's avg_binary_log_loss: 0.295982
[340]	train's avg_binary_log_loss: 0.270492	valid's avg_binary_log_loss: 0.295852
[345]	train's avg_binary_log_loss: 0.27025	valid's avg_binary_log_loss: 0.295909
[350]	train's avg_binary_log_loss: 0.269972	valid's avg_binary_log_loss: 0.295878
[355]	train's avg_binary_log_loss: 0.269555	valid's avg_binary_log_loss: 0.295865
[360]	train's avg_binary_log_loss: 0.269356	valid's avg_binary_log_loss: 0.295909
[365]	train's avg_binary_log_loss: 0.269146	valid's avg_binary_log_loss: 0.295876
[370]	train's avg_binary_log_loss: 0.268957	valid's avg_binary_log_loss: 0.295838
[375]	train's avg_binary_log_loss: 0.268421	valid's avg_binary_log_loss: 0.29575
[380]	train's avg_

[845]	train's avg_binary_log_loss: 0.25129	valid's avg_binary_log_loss: 0.297428
[850]	train's avg_binary_log_loss: 0.251155	valid's avg_binary_log_loss: 0.297438
[855]	train's avg_binary_log_loss: 0.251006	valid's avg_binary_log_loss: 0.297512
[860]	train's avg_binary_log_loss: 0.250903	valid's avg_binary_log_loss: 0.297567
[865]	train's avg_binary_log_loss: 0.250786	valid's avg_binary_log_loss: 0.297669
[870]	train's avg_binary_log_loss: 0.250492	valid's avg_binary_log_loss: 0.297485
[875]	train's avg_binary_log_loss: 0.250342	valid's avg_binary_log_loss: 0.29754
[880]	train's avg_binary_log_loss: 0.250108	valid's avg_binary_log_loss: 0.297557
[885]	train's avg_binary_log_loss: 0.249994	valid's avg_binary_log_loss: 0.29756
[890]	train's avg_binary_log_loss: 0.249871	valid's avg_binary_log_loss: 0.297565
[895]	train's avg_binary_log_loss: 0.249706	valid's avg_binary_log_loss: 0.297622
[900]	train's avg_binary_log_loss: 0.249554	valid's avg_binary_log_loss: 0.297701
[905]	train's avg_b

[350]	train's avg_binary_log_loss: 0.258877	valid's avg_binary_log_loss: 0.299266
[355]	train's avg_binary_log_loss: 0.258215	valid's avg_binary_log_loss: 0.299036
[360]	train's avg_binary_log_loss: 0.257831	valid's avg_binary_log_loss: 0.298982
[365]	train's avg_binary_log_loss: 0.257506	valid's avg_binary_log_loss: 0.299113
[370]	train's avg_binary_log_loss: 0.257179	valid's avg_binary_log_loss: 0.299145
[375]	train's avg_binary_log_loss: 0.256742	valid's avg_binary_log_loss: 0.299044
[380]	train's avg_binary_log_loss: 0.256415	valid's avg_binary_log_loss: 0.299049
[385]	train's avg_binary_log_loss: 0.256175	valid's avg_binary_log_loss: 0.299026
[390]	train's avg_binary_log_loss: 0.25588	valid's avg_binary_log_loss: 0.299095
[395]	train's avg_binary_log_loss: 0.255629	valid's avg_binary_log_loss: 0.299091
[400]	train's avg_binary_log_loss: 0.255017	valid's avg_binary_log_loss: 0.298998
[405]	train's avg_binary_log_loss: 0.254752	valid's avg_binary_log_loss: 0.299128
[410]	train's avg

[875]	train's avg_binary_log_loss: 0.233273	valid's avg_binary_log_loss: 0.304623
[880]	train's avg_binary_log_loss: 0.233107	valid's avg_binary_log_loss: 0.304582
[885]	train's avg_binary_log_loss: 0.232932	valid's avg_binary_log_loss: 0.304628
[890]	train's avg_binary_log_loss: 0.232729	valid's avg_binary_log_loss: 0.304652
[895]	train's avg_binary_log_loss: 0.232547	valid's avg_binary_log_loss: 0.30477
[900]	train's avg_binary_log_loss: 0.232301	valid's avg_binary_log_loss: 0.304647
[905]	train's avg_binary_log_loss: 0.232166	valid's avg_binary_log_loss: 0.304736
[910]	train's avg_binary_log_loss: 0.232015	valid's avg_binary_log_loss: 0.304736
[915]	train's avg_binary_log_loss: 0.231782	valid's avg_binary_log_loss: 0.304865
[920]	train's avg_binary_log_loss: 0.231617	valid's avg_binary_log_loss: 0.30494
[925]	train's avg_binary_log_loss: 0.231451	valid's avg_binary_log_loss: 0.304995
[930]	train's avg_binary_log_loss: 0.231278	valid's avg_binary_log_loss: 0.305036
[935]	train's avg_

[395]	train's avg_binary_log_loss: 0.245897	valid's avg_binary_log_loss: 0.301369
[400]	train's avg_binary_log_loss: 0.245551	valid's avg_binary_log_loss: 0.301422
[405]	train's avg_binary_log_loss: 0.245218	valid's avg_binary_log_loss: 0.301505
[410]	train's avg_binary_log_loss: 0.244901	valid's avg_binary_log_loss: 0.30163
[415]	train's avg_binary_log_loss: 0.244565	valid's avg_binary_log_loss: 0.301771
[420]	train's avg_binary_log_loss: 0.244267	valid's avg_binary_log_loss: 0.301832
[425]	train's avg_binary_log_loss: 0.243947	valid's avg_binary_log_loss: 0.301879
[430]	train's avg_binary_log_loss: 0.243629	valid's avg_binary_log_loss: 0.301932
[435]	train's avg_binary_log_loss: 0.2433	valid's avg_binary_log_loss: 0.302096
[440]	train's avg_binary_log_loss: 0.242848	valid's avg_binary_log_loss: 0.302176
[445]	train's avg_binary_log_loss: 0.242437	valid's avg_binary_log_loss: 0.302247
[450]	train's avg_binary_log_loss: 0.242125	valid's avg_binary_log_loss: 0.302394
[455]	train's avg_b

[905]	train's avg_binary_log_loss: 0.217825	valid's avg_binary_log_loss: 0.310889
[910]	train's avg_binary_log_loss: 0.21762	valid's avg_binary_log_loss: 0.310993
[915]	train's avg_binary_log_loss: 0.217439	valid's avg_binary_log_loss: 0.31103
[920]	train's avg_binary_log_loss: 0.21713	valid's avg_binary_log_loss: 0.311057
[925]	train's avg_binary_log_loss: 0.216913	valid's avg_binary_log_loss: 0.311215
[930]	train's avg_binary_log_loss: 0.216726	valid's avg_binary_log_loss: 0.311218
[935]	train's avg_binary_log_loss: 0.216538	valid's avg_binary_log_loss: 0.311361
[940]	train's avg_binary_log_loss: 0.216333	valid's avg_binary_log_loss: 0.311421
[945]	train's avg_binary_log_loss: 0.216134	valid's avg_binary_log_loss: 0.311524
[950]	train's avg_binary_log_loss: 0.215922	valid's avg_binary_log_loss: 0.311636
[955]	train's avg_binary_log_loss: 0.215702	valid's avg_binary_log_loss: 0.311738
[960]	train's avg_binary_log_loss: 0.215515	valid's avg_binary_log_loss: 0.311809
[965]	train's avg_b