# Light Gradient-Boosting Machine (LightGBM)

## Import Libraries and Data Loading

In [55]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
import pickle
import lightgbm as lgb

train_df_raw = pd.read_feather('../data/processed/train_dataset.feather')
val_df_raw = pd.read_feather('../data/processed/validation_dataset.feather')
test_df_raw = pd.read_feather('../data/processed/test_dataset.feather')

## Drop unneccessary columns

In [56]:
train_df = train_df_raw.drop(['index','qid1','qid2'],axis=1)
val_df = val_df_raw.drop(['index','qid1','qid2'],axis=1)
test_df = test_df_raw.drop(['index','qid1','qid2'],axis=1)
train_df.head(5)

Unnamed: 0,question1,question2,q1_cleaned,q2_cleaned,q1_trimmed,q2_trimmed,q1_start,q2_start,q1_topic,q2_topic,...,same_starting,same_ending,wmdistance,dist_cosine,dist_cityblock,dist_canberra,dist_euclidean,dist_minkowski,is_duplicate,bert_prob_0
0,Has Ancient Chaldea been scientifically tested?,Has Ancient History been scientifically tested...,ha ancient chaldea been scientifically tested,ha ancient history been scientifically tested ...,ha ancient chaldea been scientifically tested,ha ancient history been scientifically tested ...,ha,ha,Free/Ocatopm/Software/Website,Relationship/Girl/Guy/People/Life,...,1,0,0.192389,0.238296,34.632359,167.718247,2.658521,2.658521,0,0.412712
1,What makes someone a good teacher?,What should be the first and most important qu...,what make someone a good teacher,what should be the first and most important qu...,what make someone a good teacher,what should be the first and most important qu...,what,what,Social Media/Gadget/Email,English/Law/Writing,...,1,1,0.34309,0.097663,24.251799,145.539053,1.837212,1.837212,1,0.210485
2,Which is the best state for education for a in...,What is the best state in the U.S for internat...,which is the best state for education for a in...,what is the best state in the united state for...,which is the best state for education for a in...,what is the best state in the united state for...,which,what,Purpose/Energy,Job/College/University,...,0,0,0.098867,0.015203,9.078124,89.63496,0.659214,0.659214,1,0.153916
3,What are the most common ridge patterns in fin...,Which ridge pattern is the least common in fin...,what are the most common ridge pattern in fing...,which ridge pattern is the least common in fin...,what are the most common ridge pattern in fing...,which ridge pattern is the least common in fin...,what,which,Language/Relationship,Language/Relationship,...,0,1,0.129774,0.039147,13.554628,110.654286,0.986643,0.986643,0,0.906878
4,Social History: When did Americans start givin...,Today is my nephew 6th birthday. What is the b...,social history when did american start giving ...,today is my nephew 6th birthday what is the be...,social history when did american start giving ...,today is my nephew 6th birthday what is the be...,social,today,Self-help/Learn/Business,Job/College/University,...,0,0,0.230667,0.115745,25.036501,168.859774,1.817673,1.817673,0,0.999865


### Here, we take only columns that are not string, hence omitting questions, topics, starting words, etc.

In [57]:
X_train, X_val, X_test = train_df.select_dtypes(include=np.number).drop('is_duplicate',axis = 1), val_df.select_dtypes(include=np.number).drop('is_duplicate', axis =1), test_df.select_dtypes(include=np.number).drop('is_duplicate', axis =1)
y_train, y_val = train_df[['is_duplicate']], val_df[['is_duplicate']]

In [58]:
X_train.shape
y_train.shape

(323429, 25)

(323429, 1)

In [59]:
X_train.head(5)

Unnamed: 0,length_diff,same_question,lc_substring,lc_subsequence,jaccard_dist,common_words,common_ratio,levenshtein,fuzz_qratio,fuzz_wratio,...,same_topic,same_starting,same_ending,wmdistance,dist_cosine,dist_cityblock,dist_canberra,dist_euclidean,dist_minkowski,bert_prob_0
0,-95,0,27,39,0.219697,5,0.027473,0.428571,43,86,...,0,1,0,0.192389,0.238296,34.632359,167.718247,2.658521,2.658521,0.412712
1,-36,0,15,27,0.148148,4,0.039604,0.534653,53,86,...,0,1,1,0.34309,0.097663,24.251799,145.539053,1.837212,1.837212,0.210485
2,13,0,22,52,0.494382,9,0.060403,0.697987,77,90,...,0,0,0,0.098867,0.015203,9.078124,89.63496,0.659214,0.659214,0.153916
3,0,0,16,34,0.455882,6,0.056075,0.635514,64,81,...,1,0,1,0.129774,0.039147,13.554628,110.654286,0.986643,0.986643,0.906878
4,-7,0,10,27,0.0625,2,0.015748,0.425197,43,51,...,0,0,0,0.230667,0.115745,25.036501,168.859774,1.817673,1.817673,0.999865


In [60]:
X_train.describe()

Unnamed: 0,length_diff,same_question,lc_substring,lc_subsequence,jaccard_dist,common_words,common_ratio,levenshtein,fuzz_qratio,fuzz_wratio,...,same_topic,same_starting,same_ending,wmdistance,dist_cosine,dist_cityblock,dist_canberra,dist_euclidean,dist_minkowski,bert_prob_0
count,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,...,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0,323429.0
mean,-0.509824,0.0,18.301099,34.451104,0.283078,5.062957,0.047562,0.620982,61.946223,76.51121,...,0.477765,0.494501,0.285806,inf,0.0938,20.409918,131.646328,1.505318,1.505318,0.639239
std,32.680816,0.0,14.550269,16.76143,0.227183,3.085492,0.025286,0.183955,18.376092,15.286261,...,0.499506,0.499971,0.451798,,0.081258,8.979431,36.493995,0.670176,0.670176,0.425
min,-1080.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000812
25%,-11.0,0.0,10.0,24.0,0.106599,3.0,0.028571,0.46729,47.0,66.0,...,0.0,0.0,0.0,0.1613421,0.040284,14.751558,110.957854,1.082978,1.082978,0.112546
50%,0.0,0.0,15.0,31.0,0.225,5.0,0.046512,0.608696,61.0,85.0,...,0.0,0.0,0.0,0.2195602,0.073056,19.610207,134.248227,1.441646,1.441646,0.950934
75%,12.0,0.0,22.0,41.0,0.410959,7.0,0.066667,0.767123,76.0,86.0,...,1.0,1.0,1.0,0.2815077,0.121864,25.196007,155.439775,1.858509,1.858509,0.999382
max,487.0,0.0,221.0,280.0,1.0,41.0,0.2,1.0,100.0,100.0,...,1.0,1.0,1.0,inf,1.032621,118.01944,300.0,8.676815,8.676815,0.999884


## Data Processing

As earlier found, there are some infinity values caused by divison error. We proceed to replace them with 0.

In [61]:
X_train.replace([np.inf, -np.inf], 0, inplace=True)
X_val.replace([np.inf, -np.inf], 0, inplace=True)
X_test.replace([np.inf, -np.inf], 0, inplace=True)

In [62]:
columns_to_scale = ['length_diff','jaccard_dist','common_ratio','levenshtein','fuzz_qratio','fuzz_wratio','wmdistance','dist_cosine','dist_cityblock',
                   'dist_canberra','dist_euclidean','dist_minkowski']

scaler = StandardScaler()

X_train.loc[:,columns_to_scale] = scaler.fit_transform(X_train.loc[:,columns_to_scale])
X_val.loc[:,columns_to_scale] = scaler.transform(X_val.loc[:,columns_to_scale])
X_test.loc[:,columns_to_scale] = scaler.transform(X_test.loc[:,columns_to_scale])

## 1. Building Base LightGBM

In [63]:
lgb_params = {"boosting": "gbdt", 'learning_rate': 0.05,
 "feature_fraction": 0.6, "bagging_freq": 1, "bagging_fraction": 0.8 , 'n_estimators': 100000,
 "metric": 'mae', "lambda_l1": 0.1, 'num_leaves': 32, 'min_data_in_leaf': 50, "num_threads": 8,
 "bagging_seed" : 1024,
 "seed": 1024,
 'feature_fraction_seed': 1024,
 }

In [64]:
clf = lgb.LGBMClassifier(**lgb_params)
clf.fit(X_train, y_train, eval_set=[(X_train, y_train),(X_val, y_val)], verbose = 1, eval_metric ='logloss',early_stopping_rounds=200)     

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[1]	training's binary_logloss: 0.617363	training's l1: 0.446079	valid_1's binary_logloss: 0.617394	valid_1's l1: 0.446076
[2]	training's binary_logloss: 0.580554	training's l1: 0.427401	valid_1's binary_logloss: 0.580609	valid_1's l1: 0.427398
[3]	training's binary_logloss: 0.547408	training's l1: 0.409698	valid_1's binary_logloss: 0.547502	valid_1's l1: 0.409704
[4]	training's binary_logloss: 0.538216	training's l1: 0.404609	valid_1's binary_logloss: 0.538308	valid_1's l1: 0.404616
[5]	training's binary_logloss: 0.52617	training's l1: 0.397762	valid_1's binary_logloss: 0.526283	valid_1's l1: 0.397794
[6]	training's binary_logloss: 0.518561	training's l1: 0.393279	valid_1's binary_logloss: 0.518668	valid_1's l1: 0.393315
[7]	training's binary_logloss: 0.507397	training's l1: 0.386622	valid_1's binary_logloss: 0.507475	valid_1's l1: 0.386636
[8]	training's binary_logloss: 0.497087	training's l1: 0.380248	valid_1's binary_logloss: 0.497147	valid_1's l1: 0.380243
[9]	training's binary_log

[65]	training's binary_logloss: 0.1683	training's l1: 0.127999	valid_1's binary_logloss: 0.168986	valid_1's l1: 0.128268
[66]	training's binary_logloss: 0.165563	training's l1: 0.125182	valid_1's binary_logloss: 0.166272	valid_1's l1: 0.125465
[67]	training's binary_logloss: 0.162961	training's l1: 0.122496	valid_1's binary_logloss: 0.163662	valid_1's l1: 0.122783
[68]	training's binary_logloss: 0.160509	training's l1: 0.119947	valid_1's binary_logloss: 0.161225	valid_1's l1: 0.12024
[69]	training's binary_logloss: 0.158231	training's l1: 0.117524	valid_1's binary_logloss: 0.158933	valid_1's l1: 0.117816
[70]	training's binary_logloss: 0.158028	training's l1: 0.117292	valid_1's binary_logloss: 0.158756	valid_1's l1: 0.117593
[71]	training's binary_logloss: 0.157819	training's l1: 0.117056	valid_1's binary_logloss: 0.158576	valid_1's l1: 0.117368
[72]	training's binary_logloss: 0.155678	training's l1: 0.114772	valid_1's binary_logloss: 0.156439	valid_1's l1: 0.115096
[73]	training's bin

[141]	training's binary_logloss: 0.120885	training's l1: 0.0735334	valid_1's binary_logloss: 0.122835	valid_1's l1: 0.0742966
[142]	training's binary_logloss: 0.120838	training's l1: 0.0735208	valid_1's binary_logloss: 0.122814	valid_1's l1: 0.0742919
[143]	training's binary_logloss: 0.120686	training's l1: 0.0732883	valid_1's binary_logloss: 0.122667	valid_1's l1: 0.0740634
[144]	training's binary_logloss: 0.120538	training's l1: 0.0730645	valid_1's binary_logloss: 0.122538	valid_1's l1: 0.073847
[145]	training's binary_logloss: 0.120451	training's l1: 0.0730027	valid_1's binary_logloss: 0.122475	valid_1's l1: 0.073795
[146]	training's binary_logloss: 0.120395	training's l1: 0.0729889	valid_1's binary_logloss: 0.122445	valid_1's l1: 0.0737846
[147]	training's binary_logloss: 0.120245	training's l1: 0.0727777	valid_1's binary_logloss: 0.122325	valid_1's l1: 0.073574
[148]	training's binary_logloss: 0.120195	training's l1: 0.0727681	valid_1's binary_logloss: 0.122315	valid_1's l1: 0.073

[212]	training's binary_logloss: 0.115911	training's l1: 0.0684277	valid_1's binary_logloss: 0.119982	valid_1's l1: 0.0696584
[213]	training's binary_logloss: 0.11586	training's l1: 0.0683951	valid_1's binary_logloss: 0.119962	valid_1's l1: 0.069631
[214]	training's binary_logloss: 0.115796	training's l1: 0.0683652	valid_1's binary_logloss: 0.119955	valid_1's l1: 0.0696032
[215]	training's binary_logloss: 0.115751	training's l1: 0.0683263	valid_1's binary_logloss: 0.119949	valid_1's l1: 0.0695681
[216]	training's binary_logloss: 0.115703	training's l1: 0.0683039	valid_1's binary_logloss: 0.119916	valid_1's l1: 0.0695536
[217]	training's binary_logloss: 0.115668	training's l1: 0.0682974	valid_1's binary_logloss: 0.119916	valid_1's l1: 0.0695575
[218]	training's binary_logloss: 0.115625	training's l1: 0.068271	valid_1's binary_logloss: 0.119901	valid_1's l1: 0.0695399
[219]	training's binary_logloss: 0.115575	training's l1: 0.0682432	valid_1's binary_logloss: 0.119873	valid_1's l1: 0.069

[286]	training's binary_logloss: 0.112974	training's l1: 0.0670205	valid_1's binary_logloss: 0.119624	valid_1's l1: 0.0687525
[287]	training's binary_logloss: 0.112945	training's l1: 0.0670106	valid_1's binary_logloss: 0.119618	valid_1's l1: 0.0687496
[288]	training's binary_logloss: 0.112913	training's l1: 0.0669959	valid_1's binary_logloss: 0.11961	valid_1's l1: 0.0687388
[289]	training's binary_logloss: 0.112873	training's l1: 0.0669855	valid_1's binary_logloss: 0.119603	valid_1's l1: 0.0687354
[290]	training's binary_logloss: 0.112837	training's l1: 0.0669705	valid_1's binary_logloss: 0.119598	valid_1's l1: 0.0687296
[291]	training's binary_logloss: 0.112803	training's l1: 0.0669471	valid_1's binary_logloss: 0.119594	valid_1's l1: 0.0687116
[292]	training's binary_logloss: 0.112766	training's l1: 0.0669179	valid_1's binary_logloss: 0.119576	valid_1's l1: 0.0686921
[293]	training's binary_logloss: 0.112734	training's l1: 0.0669105	valid_1's binary_logloss: 0.119581	valid_1's l1: 0.0

[364]	training's binary_logloss: 0.110582	training's l1: 0.0661052	valid_1's binary_logloss: 0.119511	valid_1's l1: 0.068381
[365]	training's binary_logloss: 0.110552	training's l1: 0.0660938	valid_1's binary_logloss: 0.119516	valid_1's l1: 0.068378
[366]	training's binary_logloss: 0.110517	training's l1: 0.0660723	valid_1's binary_logloss: 0.119516	valid_1's l1: 0.0683646
[367]	training's binary_logloss: 0.110487	training's l1: 0.0660674	valid_1's binary_logloss: 0.119523	valid_1's l1: 0.0683696
[368]	training's binary_logloss: 0.110456	training's l1: 0.0660587	valid_1's binary_logloss: 0.119519	valid_1's l1: 0.0683705
[369]	training's binary_logloss: 0.11043	training's l1: 0.0660444	valid_1's binary_logloss: 0.119514	valid_1's l1: 0.0683608
[370]	training's binary_logloss: 0.110396	training's l1: 0.0660364	valid_1's binary_logloss: 0.119503	valid_1's l1: 0.0683595
[371]	training's binary_logloss: 0.110373	training's l1: 0.0660335	valid_1's binary_logloss: 0.119505	valid_1's l1: 0.068

[430]	training's binary_logloss: 0.108924	training's l1: 0.0655977	valid_1's binary_logloss: 0.1198	valid_1's l1: 0.068331
[431]	training's binary_logloss: 0.108894	training's l1: 0.0655848	valid_1's binary_logloss: 0.119791	valid_1's l1: 0.0683293
[432]	training's binary_logloss: 0.108857	training's l1: 0.0655649	valid_1's binary_logloss: 0.119774	valid_1's l1: 0.0683171
[433]	training's binary_logloss: 0.10883	training's l1: 0.0655565	valid_1's binary_logloss: 0.119773	valid_1's l1: 0.0683179
[434]	training's binary_logloss: 0.108792	training's l1: 0.0655427	valid_1's binary_logloss: 0.119748	valid_1's l1: 0.0683099
[435]	training's binary_logloss: 0.10876	training's l1: 0.0655291	valid_1's binary_logloss: 0.119749	valid_1's l1: 0.068308
[436]	training's binary_logloss: 0.108727	training's l1: 0.0655197	valid_1's binary_logloss: 0.119747	valid_1's l1: 0.0683103
[437]	training's binary_logloss: 0.10869	training's l1: 0.0655071	valid_1's binary_logloss: 0.119739	valid_1's l1: 0.0683047

[504]	training's binary_logloss: 0.106859	training's l1: 0.0647958	valid_1's binary_logloss: 0.119565	valid_1's l1: 0.0680445
[505]	training's binary_logloss: 0.106833	training's l1: 0.0647879	valid_1's binary_logloss: 0.119556	valid_1's l1: 0.0680395
[506]	training's binary_logloss: 0.10681	training's l1: 0.0647735	valid_1's binary_logloss: 0.119543	valid_1's l1: 0.0680278
[507]	training's binary_logloss: 0.106784	training's l1: 0.0647669	valid_1's binary_logloss: 0.119549	valid_1's l1: 0.0680274
[508]	training's binary_logloss: 0.106754	training's l1: 0.0647592	valid_1's binary_logloss: 0.119556	valid_1's l1: 0.0680285
[509]	training's binary_logloss: 0.106719	training's l1: 0.0647379	valid_1's binary_logloss: 0.119559	valid_1's l1: 0.0680138
[510]	training's binary_logloss: 0.106694	training's l1: 0.0647244	valid_1's binary_logloss: 0.119549	valid_1's l1: 0.0680047
[511]	training's binary_logloss: 0.10667	training's l1: 0.0647257	valid_1's binary_logloss: 0.119544	valid_1's l1: 0.06

LGBMClassifier(bagging_fraction=0.8, bagging_freq=1, bagging_seed=1024,
               boosting='gbdt', feature_fraction=0.6,
               feature_fraction_seed=1024, lambda_l1=0.1, learning_rate=0.05,
               metric='mae', min_data_in_leaf=50, n_estimators=100000,
               num_leaves=32, num_threads=8, seed=1024)

In [92]:
preds_train = clf.predict(X_train)
preds_prob_train = clf.predict_proba(X_train)
preds_val = clf.predict(X_val)
preds_prob_val = clf.predict_proba(X_val)

In [66]:
print("The train log loss is:", log_loss(y_train, preds_prob_train))
print("The train precision is:", precision_score(y_train, preds_train))
print("The validation log loss is:", log_loss(y_val, preds_prob_val))
print("The validation precision is:", precision_score(y_val, preds_val))

The train log loss is: 0.11092481595205556
The train precision is: 0.9342233826801652
The validation log loss is: 0.1194613686613149
The validation precision is: 0.9332585899887885


#### Get predicted probablity for test set for second layer stacking
- Here, we only want the predicted probability of one class, hence we will only fetch the second column, which is predicted probability of being duplicated.

In [67]:
preds_prob_test = clf.predict_proba(X_test)[:, 1]

## 2. Refine with more features from TFIDF
- Now we will try with the dataset with vectorized features

In [68]:
train_df_raw = pd.read_feather('../data/processed/x_train_dataset.feather')
val_df_raw = pd.read_feather('../data/processed/x_validation_dataset.feather')
test_df_raw = pd.read_feather('../data/processed/x_test_dataset.feather')

In [69]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    train_df_raw.head(5)

Unnamed: 0,index,question1,question2,q1_start,q2_start,q1_topic,q2_topic,length_diff,same_question,lc_substring,lc_subsequence,jaccard_dist,common_words,common_ratio,levenshtein,fuzz_qratio,fuzz_wratio,q2_question_mark_count,q1_question_mark_count,question_mark_count_diff,freq_q1+q2,freq_q1-q2,same_topic,same_starting,same_ending,wmdistance,dist_cosine,dist_cityblock,dist_canberra,dist_euclidean,dist_minkowski,q1_cleaned_v2,q2_cleaned_v2,q1_vector,q2_vector,q1_vec_0,q1_vec_1,q1_vec_2,q1_vec_3,q1_vec_4,q2_vec_0,q2_vec_1,q2_vec_2,q2_vec_3,q2_vec_4,q1_Assessment/Word/Home,q1_Best/Way/Visit,q1_Country/Car/Show/Television,q1_Engine/Password/Search,q1_English/Law/Writing,q1_Food/Health,q1_Free/Ocatopm/Software/Website,q1_India/Government/China,q1_Interview/Difference/Drug,q1_Job/College/University,q1_Language/Relationship,q1_Long/Review/Work/Compare,q1_Lose/Weight/Time/Travel/Salary,q1_Money/Bank/Online,q1_Movie/Video Game/Youtube,q1_Politics/Trump/Election,q1_Problem/Increase,q1_Purpose/Energy,q1_Quora/Question/Google/Answer,q1_Relationship/Girl/Guy/People/Life,q1_Self-help/Learn/Business,q1_Sex/Woman/Man,q1_Social Media/Gadget/Email,q1_United States/Day,q1_Year/New/Stock/Company,q2_Assessment/Word/Home,q2_Best/Way/Visit,q2_Country/Car/Show/Television,q2_Engine/Password/Search,q2_English/Law/Writing,q2_Food/Health,q2_Free/Ocatopm/Software/Website,q2_India/Government/China,q2_Interview/Difference/Drug,q2_Job/College/University,q2_Language/Relationship,q2_Long/Review/Work/Compare,q2_Lose/Weight/Time/Travel/Salary,q2_Money/Bank/Online,q2_Movie/Video Game/Youtube,q2_Politics/Trump/Election,q2_Problem/Increase,q2_Purpose/Energy,q2_Quora/Question/Google/Answer,q2_Relationship/Girl/Guy/People/Life,q2_Self-help/Learn/Business,q2_Sex/Woman/Man,q2_Social Media/Gadget/Email,q2_United States/Day,q2_Year/New/Stock/Company,is_duplicate
0,33525,Has Ancient Chaldea been scientifically tested?,Has Ancient History been scientifically tested...,ha,ha,Free/Ocatopm/Software/Website,Relationship/Girl/Guy/People/Life,-95,0,27,39,0.219697,5,0.027473,0.428571,43,86,4,1,-3,11,-9,0,1,0,0.192389,0.238296,34.632359,167.718247,2.658521,2.658521,ha ancient chaldea scientifically tested,ha ancient history scientifically tested real ...,"[0.031408057, 0.10707768, 0.32202494, -0.23581...","[0.14967845, 0.2725273, -0.03749804, -0.142553...",0.031408,0.107078,0.322025,-0.235812,0.065765,0.149678,0.272527,-0.037498,-0.142554,0.114115,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,369925,What makes someone a good teacher?,What should be the first and most important qu...,what,what,Social Media/Gadget/Email,English/Law/Writing,-36,0,15,27,0.148148,4,0.039604,0.534653,53,86,1,1,0,5,-3,0,1,1,0.34309,0.097663,24.251799,145.539053,1.837212,1.837212,make someone good teacher,first important quality good teacher,"[-0.005436562, 0.03173224, 0.027050504, 0.0323...","[0.008946306, 0.057755146, 0.05656157, 0.01483...",-0.005437,0.031732,0.027051,0.032324,0.018609,0.008946,0.057755,0.056562,0.014834,0.035993,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,176858,Which is the best state for education for a in...,What is the best state in the U.S for internat...,which,what,Purpose/Energy,Job/College/University,13,0,22,52,0.494382,9,0.060403,0.697987,77,90,1,1,0,2,0,0,0,0,0.098867,0.015203,9.078124,89.63496,0.659214,0.659214,best state education international student uni...,best state united state international student,"[0.17608762, 0.09855839, 0.28985745, -0.232633...","[0.11724252, 0.14971006, 0.29985344, -0.303659...",0.176088,0.098558,0.289857,-0.232634,-0.038018,0.117243,0.14971,0.299853,-0.30366,-0.060581,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,293958,What are the most common ridge patterns in fin...,Which ridge pattern is the least common in fin...,what,which,Language/Relationship,Language/Relationship,0,0,16,34,0.455882,6,0.056075,0.635514,64,81,1,1,0,2,0,1,0,1,0.129774,0.039147,13.554628,110.654286,0.986643,0.986643,common ridge pattern fingerprint,ridge pattern least common fingerprint,"[0.25171572, -0.024394771, 0.11217328, -0.0639...","[0.29362243, 0.050006576, 0.08588519, -0.15508...",0.251716,-0.024395,0.112173,-0.063949,-0.12859,0.293622,0.050007,0.085885,-0.15508,-0.202898,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,182359,Social History: When did Americans start givin...,Today is my nephew 6th birthday. What is the b...,social,today,Self-help/Learn/Business,Job/College/University,-7,0,10,27,0.0625,2,0.015748,0.425197,43,51,1,1,0,2,0,0,0,0,0.230667,0.115745,25.036501,168.859774,1.817673,1.817673,social history american start giving birthday ...,today nephew 6th birthday best gift give,"[0.09621305, 0.1520336, 0.030658064, 0.1090956...","[-0.21848702, 0.02774278, -0.096341625, 0.3227...",0.096213,0.152034,0.030658,0.109096,0.195408,-0.218487,0.027743,-0.096342,0.322707,0.008506,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [70]:
train_df = train_df_raw.drop(['index','q1_start','q2_start','q1_topic','q2_topic','question1','question2','q1_cleaned_v2','q2_cleaned_v2'],axis=1)
val_df = val_df_raw.drop(['index','q1_start','q2_start','q1_topic','q2_topic','question1','question2','q1_cleaned_v2','q2_cleaned_v2'],axis=1)
test_df = test_df_raw.drop(['index','q1_start','q2_start','q1_topic','q2_topic','question1','question2','q1_cleaned_v2','q2_cleaned_v2'],axis=1)

In [71]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    train_df.head(5)
train_df['same_question'].unique()
train_df['same_topic'].unique()

Unnamed: 0,length_diff,same_question,lc_substring,lc_subsequence,jaccard_dist,common_words,common_ratio,levenshtein,fuzz_qratio,fuzz_wratio,q2_question_mark_count,q1_question_mark_count,question_mark_count_diff,freq_q1+q2,freq_q1-q2,same_topic,same_starting,same_ending,wmdistance,dist_cosine,dist_cityblock,dist_canberra,dist_euclidean,dist_minkowski,q1_vector,q2_vector,q1_vec_0,q1_vec_1,q1_vec_2,q1_vec_3,q1_vec_4,q2_vec_0,q2_vec_1,q2_vec_2,q2_vec_3,q2_vec_4,q1_Assessment/Word/Home,q1_Best/Way/Visit,q1_Country/Car/Show/Television,q1_Engine/Password/Search,q1_English/Law/Writing,q1_Food/Health,q1_Free/Ocatopm/Software/Website,q1_India/Government/China,q1_Interview/Difference/Drug,q1_Job/College/University,q1_Language/Relationship,q1_Long/Review/Work/Compare,q1_Lose/Weight/Time/Travel/Salary,q1_Money/Bank/Online,q1_Movie/Video Game/Youtube,q1_Politics/Trump/Election,q1_Problem/Increase,q1_Purpose/Energy,q1_Quora/Question/Google/Answer,q1_Relationship/Girl/Guy/People/Life,q1_Self-help/Learn/Business,q1_Sex/Woman/Man,q1_Social Media/Gadget/Email,q1_United States/Day,q1_Year/New/Stock/Company,q2_Assessment/Word/Home,q2_Best/Way/Visit,q2_Country/Car/Show/Television,q2_Engine/Password/Search,q2_English/Law/Writing,q2_Food/Health,q2_Free/Ocatopm/Software/Website,q2_India/Government/China,q2_Interview/Difference/Drug,q2_Job/College/University,q2_Language/Relationship,q2_Long/Review/Work/Compare,q2_Lose/Weight/Time/Travel/Salary,q2_Money/Bank/Online,q2_Movie/Video Game/Youtube,q2_Politics/Trump/Election,q2_Problem/Increase,q2_Purpose/Energy,q2_Quora/Question/Google/Answer,q2_Relationship/Girl/Guy/People/Life,q2_Self-help/Learn/Business,q2_Sex/Woman/Man,q2_Social Media/Gadget/Email,q2_United States/Day,q2_Year/New/Stock/Company,is_duplicate
0,-95,0,27,39,0.219697,5,0.027473,0.428571,43,86,4,1,-3,11,-9,0,1,0,0.192389,0.238296,34.632359,167.718247,2.658521,2.658521,"[0.031408057, 0.10707768, 0.32202494, -0.23581...","[0.14967845, 0.2725273, -0.03749804, -0.142553...",0.031408,0.107078,0.322025,-0.235812,0.065765,0.149678,0.272527,-0.037498,-0.142554,0.114115,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,-36,0,15,27,0.148148,4,0.039604,0.534653,53,86,1,1,0,5,-3,0,1,1,0.34309,0.097663,24.251799,145.539053,1.837212,1.837212,"[-0.005436562, 0.03173224, 0.027050504, 0.0323...","[0.008946306, 0.057755146, 0.05656157, 0.01483...",-0.005437,0.031732,0.027051,0.032324,0.018609,0.008946,0.057755,0.056562,0.014834,0.035993,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,13,0,22,52,0.494382,9,0.060403,0.697987,77,90,1,1,0,2,0,0,0,0,0.098867,0.015203,9.078124,89.63496,0.659214,0.659214,"[0.17608762, 0.09855839, 0.28985745, -0.232633...","[0.11724252, 0.14971006, 0.29985344, -0.303659...",0.176088,0.098558,0.289857,-0.232634,-0.038018,0.117243,0.14971,0.299853,-0.30366,-0.060581,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,0,0,16,34,0.455882,6,0.056075,0.635514,64,81,1,1,0,2,0,1,0,1,0.129774,0.039147,13.554628,110.654286,0.986643,0.986643,"[0.25171572, -0.024394771, 0.11217328, -0.0639...","[0.29362243, 0.050006576, 0.08588519, -0.15508...",0.251716,-0.024395,0.112173,-0.063949,-0.12859,0.293622,0.050007,0.085885,-0.15508,-0.202898,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,-7,0,10,27,0.0625,2,0.015748,0.425197,43,51,1,1,0,2,0,0,0,0,0.230667,0.115745,25.036501,168.859774,1.817673,1.817673,"[0.09621305, 0.1520336, 0.030658064, 0.1090956...","[-0.21848702, 0.02774278, -0.096341625, 0.3227...",0.096213,0.152034,0.030658,0.109096,0.195408,-0.218487,0.027743,-0.096342,0.322707,0.008506,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


array([0])

array([0, 1])

### Data Preparation
- Scaling
- Filtering

In [72]:
train_df = train_df.select_dtypes(include=np.number)
val_df = val_df.select_dtypes(include=np.number)
test_df = test_df.select_dtypes(include=np.number)

In [73]:
column_names = train_df.columns

In [74]:
train_df['same_question'].unique()
train_df['same_topic'].unique()

array([0])

array([0, 1])

In [75]:
train_df.replace([np.inf, -np.inf], 0, inplace=True)
val_df.replace([np.inf, -np.inf], 0, inplace=True)
test_df.replace([np.inf, -np.inf], 0, inplace=True)

columns_to_scale = ['length_diff','jaccard_dist','common_ratio','levenshtein','fuzz_qratio','fuzz_wratio','wmdistance','dist_cosine','dist_cityblock',
                   'dist_canberra','dist_euclidean','dist_minkowski']

scaler = StandardScaler()
    

train_df.loc[:,columns_to_scale] = scaler.fit_transform(train_df.loc[:,columns_to_scale])
val_df.loc[:,columns_to_scale] = scaler.transform(val_df.loc[:,columns_to_scale])
test_df.loc[:,columns_to_scale] = scaler.transform(test_df.loc[:,columns_to_scale])


In [76]:
train_df = pd.DataFrame(train_df, columns=column_names)
val_df =  pd.DataFrame(val_df, columns=column_names)
test_df =  pd.DataFrame(test_df, columns=column_names)

In [77]:
train_df['is_duplicate'] = train_df['is_duplicate'].astype(int)
val_df['is_duplicate'] = val_df['is_duplicate'].astype(int)
test_df['is_duplicate'] = test_df['is_duplicate'].astype(int)

In [78]:
predictors = train_df.loc[:, train_df.columns != 'is_duplicate'].columns.tolist()
target = 'is_duplicate'
categorical_features = ['same_question','same_topic','same_starting','same_ending']

In [79]:
train_df['same_question'].unique()
train_df['same_topic'].unique()
train_df.head(5)

array([0])

array([0, 1])

Unnamed: 0,length_diff,same_question,lc_substring,lc_subsequence,jaccard_dist,common_words,common_ratio,levenshtein,fuzz_qratio,fuzz_wratio,...,q2_Problem/Increase,q2_Purpose/Energy,q2_Quora/Question/Google/Answer,q2_Relationship/Girl/Guy/People/Life,q2_Self-help/Learn/Business,q2_Sex/Woman/Man,q2_Social Media/Gadget/Email,q2_United States/Day,q2_Year/New/Stock/Company,is_duplicate
0,-2.891308,0,27,39,-0.278987,5,-0.794481,-1.045966,-1.031027,0.620741,...,0,0,0,1,0,0,0,0,0,0
1,-1.085965,0,15,27,-0.593926,4,-0.314716,-0.469292,-0.486841,0.620741,...,0,0,0,0,0,0,0,0,0,1
2,0.413388,0,22,52,0.930104,9,0.507816,0.418606,0.819206,0.882414,...,0,0,0,0,0,0,0,0,0,1
3,0.0156,0,16,34,0.760639,6,0.336659,0.078998,0.111764,0.293649,...,0,0,0,0,0,0,0,0,0,0
4,-0.198593,0,10,27,-0.970927,2,-1.258153,-1.064311,-1.031027,-1.668901,...,0,0,0,0,0,0,0,0,0,0


In [80]:
def lgb_modelfit_nocv(params, dtrain, dvalid, predictors, target='target', objective='binary', metrics='binary_logloss',
                 feval=None, early_stopping=20, num_boost_round=3000, categorical_features=None):
    lgb_params = {
        'boosting_type': 'gbdt',
        'objective': objective,
        'metric':metrics,
        'n_estimators': 100000,
        'lambda_l1': 0.1, 
        'bagging_seed' : 1024,
        'categorical_feature' : categorical_features
    }

    lgb_params.update(params)

    lgbtrain= lgb.Dataset(dtrain[predictors].values, label=dtrain[target].values,
                          feature_name=predictors,
                          categorical_feature=categorical_features
                          )
    lgbval = lgb.Dataset(dvalid[predictors].values, label=dvalid[target].values,
                          feature_name=predictors,
                          categorical_feature=categorical_features
                          )

    evals_results = {}

    bst = lgb.train(lgb_params, 
                     lgbtrain, 
                     valid_sets=[lgbtrain, lgbval], 
                     valid_names=['train','valid'], 
                     evals_result=evals_results, 
                     num_boost_round=num_boost_round,
                     early_stopping_rounds = early_stopping,
                     feval=feval)

    n_estimators = bst.best_iteration
    print("\nModel Report")
    print("n_estimators : ", n_estimators)
    for metric in metrics:
        print(metric+":", evals_results['valid'][metric][n_estimators-1])
    
    return bst

In [81]:
params = {
    'learning_rate': 0.05,
    'num_leaves': 7,  # 2^max_depth - 1
    'max_depth': 3,  # -1 means no limit
    'min_child_samples': 100,  # Minimum number of data need in a child(min_data_in_leaf)
    'max_bin': 100,  # Number of bucketed bin for feature values
    'scale_pos_weight':0.360 # because training data is unbalanced 
}
bst = lgb_modelfit_nocv(params, 
                        train_df, 
                        val_df, 
                        predictors, 
                        target, 
                        objective='binary',
                        metrics=['binary_logloss','average_precision'],
                        early_stopping=30, 
                        categorical_features = categorical_features)

Please use categorical_feature argument of the Dataset constructor to pass this parameter.


[LightGBM] [Info] Number of positive: 119410, number of negative: 204019
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2690
[LightGBM] [Info] Number of data points in the train set: 323429, number of used features: 83
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.369200 -> initscore=-0.535650
[LightGBM] [Info] Start training from score -0.535650
[1]	train's binary_logloss: 0.647678	train's average_precision: 0.673146	valid's binary_logloss: 0.647588	valid's average_precision: 0.675135
Training until validation scores don't improve for 30 rounds
[2]	train's binary_logloss: 0.637821	train's average_precision: 0.68599	valid's binary_logloss: 0.637649	valid's average_precision: 0.687449
[3]	train's binary_logloss: 0.628679	train's average_precision: 0.700316	valid's binary_logloss: 0.628473	valid's average_precision: 0.701806
[4]	train's binary_logloss: 0.620416	train's average_precision: 0.710175	valid's binary_logloss: 0.620081	valid's average



[5]	train's binary_logloss: 0.612574	train's average_precision: 0.716263	valid's binary_logloss: 0.612165	valid's average_precision: 0.718058
[6]	train's binary_logloss: 0.605425	train's average_precision: 0.725147	valid's binary_logloss: 0.604911	valid's average_precision: 0.727264
[7]	train's binary_logloss: 0.598932	train's average_precision: 0.727127	valid's binary_logloss: 0.598349	valid's average_precision: 0.729199
[8]	train's binary_logloss: 0.593121	train's average_precision: 0.741294	valid's binary_logloss: 0.592436	valid's average_precision: 0.743525
[9]	train's binary_logloss: 0.587822	train's average_precision: 0.743312	valid's binary_logloss: 0.587135	valid's average_precision: 0.745037
[10]	train's binary_logloss: 0.583135	train's average_precision: 0.745408	valid's binary_logloss: 0.582396	valid's average_precision: 0.747273
[11]	train's binary_logloss: 0.578721	train's average_precision: 0.746081	valid's binary_logloss: 0.577966	valid's average_precision: 0.747598
[12]

[65]	train's binary_logloss: 0.508241	train's average_precision: 0.792933	valid's binary_logloss: 0.50717	valid's average_precision: 0.794333
[66]	train's binary_logloss: 0.507788	train's average_precision: 0.793281	valid's binary_logloss: 0.50668	valid's average_precision: 0.794782
[67]	train's binary_logloss: 0.507386	train's average_precision: 0.793592	valid's binary_logloss: 0.506321	valid's average_precision: 0.795065
[68]	train's binary_logloss: 0.506777	train's average_precision: 0.794072	valid's binary_logloss: 0.505769	valid's average_precision: 0.795573
[69]	train's binary_logloss: 0.506461	train's average_precision: 0.794357	valid's binary_logloss: 0.505494	valid's average_precision: 0.795823
[70]	train's binary_logloss: 0.50589	train's average_precision: 0.794884	valid's binary_logloss: 0.504922	valid's average_precision: 0.796072
[71]	train's binary_logloss: 0.505184	train's average_precision: 0.795362	valid's binary_logloss: 0.504247	valid's average_precision: 0.796447
[7

[125]	train's binary_logloss: 0.484338	train's average_precision: 0.814586	valid's binary_logloss: 0.483864	valid's average_precision: 0.815122
[126]	train's binary_logloss: 0.484089	train's average_precision: 0.81485	valid's binary_logloss: 0.483609	valid's average_precision: 0.815344
[127]	train's binary_logloss: 0.48372	train's average_precision: 0.81527	valid's binary_logloss: 0.483253	valid's average_precision: 0.815721
[128]	train's binary_logloss: 0.483454	train's average_precision: 0.815441	valid's binary_logloss: 0.482978	valid's average_precision: 0.815895
[129]	train's binary_logloss: 0.48293	train's average_precision: 0.815979	valid's binary_logloss: 0.48245	valid's average_precision: 0.816451
[130]	train's binary_logloss: 0.482659	train's average_precision: 0.816186	valid's binary_logloss: 0.482169	valid's average_precision: 0.816659
[131]	train's binary_logloss: 0.482407	train's average_precision: 0.816479	valid's binary_logloss: 0.481942	valid's average_precision: 0.8169

[185]	train's binary_logloss: 0.468704	train's average_precision: 0.827822	valid's binary_logloss: 0.468706	valid's average_precision: 0.827742
[186]	train's binary_logloss: 0.468578	train's average_precision: 0.827857	valid's binary_logloss: 0.468578	valid's average_precision: 0.82776
[187]	train's binary_logloss: 0.468446	train's average_precision: 0.827937	valid's binary_logloss: 0.468458	valid's average_precision: 0.827819
[188]	train's binary_logloss: 0.468276	train's average_precision: 0.828112	valid's binary_logloss: 0.4683	valid's average_precision: 0.828005
[189]	train's binary_logloss: 0.468008	train's average_precision: 0.828234	valid's binary_logloss: 0.468055	valid's average_precision: 0.828086
[190]	train's binary_logloss: 0.467721	train's average_precision: 0.828445	valid's binary_logloss: 0.467765	valid's average_precision: 0.828301
[191]	train's binary_logloss: 0.467625	train's average_precision: 0.828551	valid's binary_logloss: 0.467679	valid's average_precision: 0.82

[244]	train's binary_logloss: 0.459183	train's average_precision: 0.834689	valid's binary_logloss: 0.459515	valid's average_precision: 0.834406
[245]	train's binary_logloss: 0.459087	train's average_precision: 0.834795	valid's binary_logloss: 0.459422	valid's average_precision: 0.834514
[246]	train's binary_logloss: 0.458963	train's average_precision: 0.834915	valid's binary_logloss: 0.459291	valid's average_precision: 0.834662
[247]	train's binary_logloss: 0.458723	train's average_precision: 0.835067	valid's binary_logloss: 0.459067	valid's average_precision: 0.834808
[248]	train's binary_logloss: 0.45861	train's average_precision: 0.835126	valid's binary_logloss: 0.458959	valid's average_precision: 0.83485
[249]	train's binary_logloss: 0.458478	train's average_precision: 0.835203	valid's binary_logloss: 0.458841	valid's average_precision: 0.834906
[250]	train's binary_logloss: 0.458364	train's average_precision: 0.835296	valid's binary_logloss: 0.458732	valid's average_precision: 0.8

[305]	train's binary_logloss: 0.452648	train's average_precision: 0.839112	valid's binary_logloss: 0.453233	valid's average_precision: 0.838583
[306]	train's binary_logloss: 0.452537	train's average_precision: 0.839195	valid's binary_logloss: 0.453118	valid's average_precision: 0.838684
[307]	train's binary_logloss: 0.452437	train's average_precision: 0.839263	valid's binary_logloss: 0.453045	valid's average_precision: 0.838718
[308]	train's binary_logloss: 0.452302	train's average_precision: 0.83938	valid's binary_logloss: 0.452926	valid's average_precision: 0.838806
[309]	train's binary_logloss: 0.452232	train's average_precision: 0.83942	valid's binary_logloss: 0.45287	valid's average_precision: 0.838833
[310]	train's binary_logloss: 0.452169	train's average_precision: 0.839456	valid's binary_logloss: 0.45281	valid's average_precision: 0.838868
[311]	train's binary_logloss: 0.452086	train's average_precision: 0.83948	valid's binary_logloss: 0.452737	valid's average_precision: 0.8388

[364]	train's binary_logloss: 0.447741	train's average_precision: 0.842404	valid's binary_logloss: 0.448703	valid's average_precision: 0.841646
[365]	train's binary_logloss: 0.447663	train's average_precision: 0.842464	valid's binary_logloss: 0.448644	valid's average_precision: 0.841696
[366]	train's binary_logloss: 0.447632	train's average_precision: 0.842488	valid's binary_logloss: 0.448623	valid's average_precision: 0.841706
[367]	train's binary_logloss: 0.447571	train's average_precision: 0.842507	valid's binary_logloss: 0.448569	valid's average_precision: 0.841718
[368]	train's binary_logloss: 0.447508	train's average_precision: 0.842551	valid's binary_logloss: 0.448511	valid's average_precision: 0.841762
[369]	train's binary_logloss: 0.447444	train's average_precision: 0.842616	valid's binary_logloss: 0.448443	valid's average_precision: 0.841843
[370]	train's binary_logloss: 0.447405	train's average_precision: 0.842656	valid's binary_logloss: 0.448397	valid's average_precision: 0

[422]	train's binary_logloss: 0.444259	train's average_precision: 0.844685	valid's binary_logloss: 0.445567	valid's average_precision: 0.843659
[423]	train's binary_logloss: 0.444213	train's average_precision: 0.844736	valid's binary_logloss: 0.445524	valid's average_precision: 0.843702
[424]	train's binary_logloss: 0.444145	train's average_precision: 0.844797	valid's binary_logloss: 0.445468	valid's average_precision: 0.843744
[425]	train's binary_logloss: 0.444098	train's average_precision: 0.84484	valid's binary_logloss: 0.445432	valid's average_precision: 0.843777
[426]	train's binary_logloss: 0.444049	train's average_precision: 0.844885	valid's binary_logloss: 0.445371	valid's average_precision: 0.843853
[427]	train's binary_logloss: 0.444003	train's average_precision: 0.844924	valid's binary_logloss: 0.445327	valid's average_precision: 0.843886
[428]	train's binary_logloss: 0.443962	train's average_precision: 0.844934	valid's binary_logloss: 0.445289	valid's average_precision: 0.

[480]	train's binary_logloss: 0.441259	train's average_precision: 0.846725	valid's binary_logloss: 0.442914	valid's average_precision: 0.845365
[481]	train's binary_logloss: 0.441199	train's average_precision: 0.846763	valid's binary_logloss: 0.44287	valid's average_precision: 0.845385
[482]	train's binary_logloss: 0.441155	train's average_precision: 0.846797	valid's binary_logloss: 0.442843	valid's average_precision: 0.845409
[483]	train's binary_logloss: 0.441067	train's average_precision: 0.846846	valid's binary_logloss: 0.442769	valid's average_precision: 0.845449
[484]	train's binary_logloss: 0.44103	train's average_precision: 0.846885	valid's binary_logloss: 0.442745	valid's average_precision: 0.845468
[485]	train's binary_logloss: 0.440996	train's average_precision: 0.846914	valid's binary_logloss: 0.442714	valid's average_precision: 0.845495
[486]	train's binary_logloss: 0.440948	train's average_precision: 0.846944	valid's binary_logloss: 0.442662	valid's average_precision: 0.8

[538]	train's binary_logloss: 0.438776	train's average_precision: 0.848372	valid's binary_logloss: 0.440734	valid's average_precision: 0.846765
[539]	train's binary_logloss: 0.438724	train's average_precision: 0.848418	valid's binary_logloss: 0.440689	valid's average_precision: 0.846798
[540]	train's binary_logloss: 0.438689	train's average_precision: 0.848437	valid's binary_logloss: 0.440647	valid's average_precision: 0.846822
[541]	train's binary_logloss: 0.438607	train's average_precision: 0.848507	valid's binary_logloss: 0.440568	valid's average_precision: 0.84689
[542]	train's binary_logloss: 0.438571	train's average_precision: 0.848539	valid's binary_logloss: 0.440539	valid's average_precision: 0.846921
[543]	train's binary_logloss: 0.438516	train's average_precision: 0.848569	valid's binary_logloss: 0.440479	valid's average_precision: 0.846958
[544]	train's binary_logloss: 0.43846	train's average_precision: 0.8486	valid's binary_logloss: 0.440424	valid's average_precision: 0.846

[595]	train's binary_logloss: 0.436506	train's average_precision: 0.849852	valid's binary_logloss: 0.438751	valid's average_precision: 0.848042
[596]	train's binary_logloss: 0.436484	train's average_precision: 0.849864	valid's binary_logloss: 0.438728	valid's average_precision: 0.848056
[597]	train's binary_logloss: 0.436473	train's average_precision: 0.849875	valid's binary_logloss: 0.43872	valid's average_precision: 0.848066
[598]	train's binary_logloss: 0.436414	train's average_precision: 0.849904	valid's binary_logloss: 0.438669	valid's average_precision: 0.848086
[599]	train's binary_logloss: 0.436374	train's average_precision: 0.84995	valid's binary_logloss: 0.438644	valid's average_precision: 0.848112
[600]	train's binary_logloss: 0.43634	train's average_precision: 0.849975	valid's binary_logloss: 0.438605	valid's average_precision: 0.848143
[601]	train's binary_logloss: 0.436264	train's average_precision: 0.850027	valid's binary_logloss: 0.438551	valid's average_precision: 0.84

[652]	train's binary_logloss: 0.434595	train's average_precision: 0.851126	valid's binary_logloss: 0.437101	valid's average_precision: 0.849096
[653]	train's binary_logloss: 0.434555	train's average_precision: 0.851147	valid's binary_logloss: 0.437063	valid's average_precision: 0.849115
[654]	train's binary_logloss: 0.434526	train's average_precision: 0.851173	valid's binary_logloss: 0.437034	valid's average_precision: 0.849139
[655]	train's binary_logloss: 0.434487	train's average_precision: 0.85119	valid's binary_logloss: 0.437002	valid's average_precision: 0.849151
[656]	train's binary_logloss: 0.434444	train's average_precision: 0.851207	valid's binary_logloss: 0.436966	valid's average_precision: 0.849162
[657]	train's binary_logloss: 0.434404	train's average_precision: 0.851231	valid's binary_logloss: 0.436928	valid's average_precision: 0.849188
[658]	train's binary_logloss: 0.434373	train's average_precision: 0.851233	valid's binary_logloss: 0.436899	valid's average_precision: 0.

[710]	train's binary_logloss: 0.432855	train's average_precision: 0.852254	valid's binary_logloss: 0.435634	valid's average_precision: 0.850033
[711]	train's binary_logloss: 0.432815	train's average_precision: 0.852279	valid's binary_logloss: 0.435595	valid's average_precision: 0.850059
[712]	train's binary_logloss: 0.432797	train's average_precision: 0.852282	valid's binary_logloss: 0.435578	valid's average_precision: 0.850064
[713]	train's binary_logloss: 0.43276	train's average_precision: 0.852317	valid's binary_logloss: 0.435558	valid's average_precision: 0.850081
[714]	train's binary_logloss: 0.432739	train's average_precision: 0.852333	valid's binary_logloss: 0.435532	valid's average_precision: 0.850103
[715]	train's binary_logloss: 0.432719	train's average_precision: 0.852356	valid's binary_logloss: 0.435512	valid's average_precision: 0.850122
[716]	train's binary_logloss: 0.432692	train's average_precision: 0.852372	valid's binary_logloss: 0.435486	valid's average_precision: 0.

[768]	train's binary_logloss: 0.431306	train's average_precision: 0.853321	valid's binary_logloss: 0.434289	valid's average_precision: 0.850938
[769]	train's binary_logloss: 0.431277	train's average_precision: 0.853346	valid's binary_logloss: 0.434261	valid's average_precision: 0.85096
[770]	train's binary_logloss: 0.431269	train's average_precision: 0.853353	valid's binary_logloss: 0.434255	valid's average_precision: 0.850964
[771]	train's binary_logloss: 0.431254	train's average_precision: 0.853365	valid's binary_logloss: 0.434253	valid's average_precision: 0.850968
[772]	train's binary_logloss: 0.431225	train's average_precision: 0.853391	valid's binary_logloss: 0.434235	valid's average_precision: 0.850981
[773]	train's binary_logloss: 0.4312	train's average_precision: 0.853407	valid's binary_logloss: 0.434211	valid's average_precision: 0.850999
[774]	train's binary_logloss: 0.431165	train's average_precision: 0.853432	valid's binary_logloss: 0.434179	valid's average_precision: 0.85

[825]	train's binary_logloss: 0.429774	train's average_precision: 0.854379	valid's binary_logloss: 0.433012	valid's average_precision: 0.851784
[826]	train's binary_logloss: 0.42976	train's average_precision: 0.854391	valid's binary_logloss: 0.433003	valid's average_precision: 0.851795
[827]	train's binary_logloss: 0.429736	train's average_precision: 0.854397	valid's binary_logloss: 0.432991	valid's average_precision: 0.851793
[828]	train's binary_logloss: 0.429723	train's average_precision: 0.8544	valid's binary_logloss: 0.432983	valid's average_precision: 0.851796
[829]	train's binary_logloss: 0.429672	train's average_precision: 0.854427	valid's binary_logloss: 0.432947	valid's average_precision: 0.851817
[830]	train's binary_logloss: 0.429638	train's average_precision: 0.854445	valid's binary_logloss: 0.432916	valid's average_precision: 0.851826
[831]	train's binary_logloss: 0.42961	train's average_precision: 0.854461	valid's binary_logloss: 0.432886	valid's average_precision: 0.851

[884]	train's binary_logloss: 0.428236	train's average_precision: 0.855385	valid's binary_logloss: 0.431774	valid's average_precision: 0.852548
[885]	train's binary_logloss: 0.428223	train's average_precision: 0.855395	valid's binary_logloss: 0.431761	valid's average_precision: 0.852563
[886]	train's binary_logloss: 0.428199	train's average_precision: 0.855408	valid's binary_logloss: 0.431745	valid's average_precision: 0.852571
[887]	train's binary_logloss: 0.428174	train's average_precision: 0.855418	valid's binary_logloss: 0.431726	valid's average_precision: 0.852574
[888]	train's binary_logloss: 0.428156	train's average_precision: 0.855428	valid's binary_logloss: 0.431706	valid's average_precision: 0.852587
[889]	train's binary_logloss: 0.428131	train's average_precision: 0.855448	valid's binary_logloss: 0.431687	valid's average_precision: 0.852599
[890]	train's binary_logloss: 0.428102	train's average_precision: 0.855469	valid's binary_logloss: 0.431656	valid's average_precision: 0

[942]	train's binary_logloss: 0.426928	train's average_precision: 0.85627	valid's binary_logloss: 0.43073	valid's average_precision: 0.853211
[943]	train's binary_logloss: 0.426905	train's average_precision: 0.856284	valid's binary_logloss: 0.430702	valid's average_precision: 0.853228
[944]	train's binary_logloss: 0.426891	train's average_precision: 0.856293	valid's binary_logloss: 0.43069	valid's average_precision: 0.853239
[945]	train's binary_logloss: 0.42687	train's average_precision: 0.856303	valid's binary_logloss: 0.430678	valid's average_precision: 0.853238
[946]	train's binary_logloss: 0.426856	train's average_precision: 0.856316	valid's binary_logloss: 0.430671	valid's average_precision: 0.853248
[947]	train's binary_logloss: 0.426843	train's average_precision: 0.856328	valid's binary_logloss: 0.430652	valid's average_precision: 0.853268
[948]	train's binary_logloss: 0.426828	train's average_precision: 0.856339	valid's binary_logloss: 0.43064	valid's average_precision: 0.8532

[1000]	train's binary_logloss: 0.425892	train's average_precision: 0.856957	valid's binary_logloss: 0.429919	valid's average_precision: 0.853712
[1001]	train's binary_logloss: 0.425874	train's average_precision: 0.856964	valid's binary_logloss: 0.429914	valid's average_precision: 0.853712
[1002]	train's binary_logloss: 0.425845	train's average_precision: 0.856986	valid's binary_logloss: 0.429885	valid's average_precision: 0.85373
[1003]	train's binary_logloss: 0.42582	train's average_precision: 0.857005	valid's binary_logloss: 0.429873	valid's average_precision: 0.853744
[1004]	train's binary_logloss: 0.425815	train's average_precision: 0.857009	valid's binary_logloss: 0.429869	valid's average_precision: 0.853748
[1005]	train's binary_logloss: 0.4258	train's average_precision: 0.857024	valid's binary_logloss: 0.429867	valid's average_precision: 0.853752
[1006]	train's binary_logloss: 0.425784	train's average_precision: 0.857032	valid's binary_logloss: 0.429848	valid's average_precision

[1057]	train's binary_logloss: 0.424739	train's average_precision: 0.857752	valid's binary_logloss: 0.429031	valid's average_precision: 0.854292
[1058]	train's binary_logloss: 0.424726	train's average_precision: 0.857762	valid's binary_logloss: 0.429024	valid's average_precision: 0.854299
[1059]	train's binary_logloss: 0.424712	train's average_precision: 0.857771	valid's binary_logloss: 0.429014	valid's average_precision: 0.854306
[1060]	train's binary_logloss: 0.424689	train's average_precision: 0.857783	valid's binary_logloss: 0.428996	valid's average_precision: 0.85431
[1061]	train's binary_logloss: 0.424681	train's average_precision: 0.857788	valid's binary_logloss: 0.428992	valid's average_precision: 0.854311
[1062]	train's binary_logloss: 0.424647	train's average_precision: 0.85781	valid's binary_logloss: 0.428975	valid's average_precision: 0.85432
[1063]	train's binary_logloss: 0.424627	train's average_precision: 0.857825	valid's binary_logloss: 0.42897	valid's average_precision

[1116]	train's binary_logloss: 0.423714	train's average_precision: 0.858465	valid's binary_logloss: 0.428281	valid's average_precision: 0.854781
[1117]	train's binary_logloss: 0.423705	train's average_precision: 0.858472	valid's binary_logloss: 0.428274	valid's average_precision: 0.854785
[1118]	train's binary_logloss: 0.423691	train's average_precision: 0.858485	valid's binary_logloss: 0.428269	valid's average_precision: 0.854793
[1119]	train's binary_logloss: 0.423683	train's average_precision: 0.858492	valid's binary_logloss: 0.428265	valid's average_precision: 0.854798
[1120]	train's binary_logloss: 0.423669	train's average_precision: 0.858502	valid's binary_logloss: 0.428252	valid's average_precision: 0.854807
[1121]	train's binary_logloss: 0.423654	train's average_precision: 0.858514	valid's binary_logloss: 0.42825	valid's average_precision: 0.854809
[1122]	train's binary_logloss: 0.423622	train's average_precision: 0.858532	valid's binary_logloss: 0.428227	valid's average_precis

[1176]	train's binary_logloss: 0.422659	train's average_precision: 0.859159	valid's binary_logloss: 0.427509	valid's average_precision: 0.855283
[1177]	train's binary_logloss: 0.422637	train's average_precision: 0.859174	valid's binary_logloss: 0.42748	valid's average_precision: 0.855304
[1178]	train's binary_logloss: 0.422614	train's average_precision: 0.859187	valid's binary_logloss: 0.427463	valid's average_precision: 0.855306
[1179]	train's binary_logloss: 0.422589	train's average_precision: 0.859211	valid's binary_logloss: 0.427442	valid's average_precision: 0.855321
[1180]	train's binary_logloss: 0.422579	train's average_precision: 0.859218	valid's binary_logloss: 0.42744	valid's average_precision: 0.855323
[1181]	train's binary_logloss: 0.422569	train's average_precision: 0.859225	valid's binary_logloss: 0.427431	valid's average_precision: 0.855332
[1182]	train's binary_logloss: 0.422555	train's average_precision: 0.859238	valid's binary_logloss: 0.427416	valid's average_precisi

[1233]	train's binary_logloss: 0.421884	train's average_precision: 0.859692	valid's binary_logloss: 0.426938	valid's average_precision: 0.855678
[1234]	train's binary_logloss: 0.421869	train's average_precision: 0.859706	valid's binary_logloss: 0.426925	valid's average_precision: 0.855692
[1235]	train's binary_logloss: 0.421844	train's average_precision: 0.85972	valid's binary_logloss: 0.426893	valid's average_precision: 0.85571
[1236]	train's binary_logloss: 0.421832	train's average_precision: 0.859729	valid's binary_logloss: 0.426888	valid's average_precision: 0.855713
[1237]	train's binary_logloss: 0.421807	train's average_precision: 0.85974	valid's binary_logloss: 0.426876	valid's average_precision: 0.855714
[1238]	train's binary_logloss: 0.421786	train's average_precision: 0.859754	valid's binary_logloss: 0.426855	valid's average_precision: 0.855722
[1239]	train's binary_logloss: 0.421765	train's average_precision: 0.85977	valid's binary_logloss: 0.426837	valid's average_precision

[1291]	train's binary_logloss: 0.420987	train's average_precision: 0.860315	valid's binary_logloss: 0.426265	valid's average_precision: 0.856081
[1292]	train's binary_logloss: 0.420978	train's average_precision: 0.860319	valid's binary_logloss: 0.426264	valid's average_precision: 0.856083
[1293]	train's binary_logloss: 0.420967	train's average_precision: 0.860329	valid's binary_logloss: 0.42625	valid's average_precision: 0.856094
[1294]	train's binary_logloss: 0.420943	train's average_precision: 0.860346	valid's binary_logloss: 0.426224	valid's average_precision: 0.85611
[1295]	train's binary_logloss: 0.420911	train's average_precision: 0.860368	valid's binary_logloss: 0.426193	valid's average_precision: 0.85613
[1296]	train's binary_logloss: 0.420897	train's average_precision: 0.86038	valid's binary_logloss: 0.426181	valid's average_precision: 0.856141
[1297]	train's binary_logloss: 0.420876	train's average_precision: 0.860392	valid's binary_logloss: 0.42616	valid's average_precision:

[1350]	train's binary_logloss: 0.42007	train's average_precision: 0.860938	valid's binary_logloss: 0.425557	valid's average_precision: 0.856546
[1351]	train's binary_logloss: 0.42006	train's average_precision: 0.860943	valid's binary_logloss: 0.425552	valid's average_precision: 0.856548
[1352]	train's binary_logloss: 0.420033	train's average_precision: 0.860964	valid's binary_logloss: 0.425519	valid's average_precision: 0.856569
[1353]	train's binary_logloss: 0.420007	train's average_precision: 0.86098	valid's binary_logloss: 0.425503	valid's average_precision: 0.856572
[1354]	train's binary_logloss: 0.419975	train's average_precision: 0.861006	valid's binary_logloss: 0.425471	valid's average_precision: 0.856595
[1355]	train's binary_logloss: 0.419961	train's average_precision: 0.861014	valid's binary_logloss: 0.425461	valid's average_precision: 0.856596
[1356]	train's binary_logloss: 0.419948	train's average_precision: 0.86102	valid's binary_logloss: 0.425453	valid's average_precision

[1409]	train's binary_logloss: 0.419267	train's average_precision: 0.861472	valid's binary_logloss: 0.424988	valid's average_precision: 0.856849
[1410]	train's binary_logloss: 0.419262	train's average_precision: 0.861475	valid's binary_logloss: 0.424984	valid's average_precision: 0.856852
[1411]	train's binary_logloss: 0.419253	train's average_precision: 0.861484	valid's binary_logloss: 0.424979	valid's average_precision: 0.856857
[1412]	train's binary_logloss: 0.41924	train's average_precision: 0.861494	valid's binary_logloss: 0.424977	valid's average_precision: 0.856857
[1413]	train's binary_logloss: 0.419229	train's average_precision: 0.8615	valid's binary_logloss: 0.424972	valid's average_precision: 0.856858
[1414]	train's binary_logloss: 0.419217	train's average_precision: 0.861504	valid's binary_logloss: 0.424963	valid's average_precision: 0.856859
[1415]	train's binary_logloss: 0.41921	train's average_precision: 0.861509	valid's binary_logloss: 0.424964	valid's average_precision

[1466]	train's binary_logloss: 0.418527	train's average_precision: 0.861971	valid's binary_logloss: 0.424508	valid's average_precision: 0.857114
[1467]	train's binary_logloss: 0.418509	train's average_precision: 0.86198	valid's binary_logloss: 0.424482	valid's average_precision: 0.857131
[1468]	train's binary_logloss: 0.4185	train's average_precision: 0.861982	valid's binary_logloss: 0.424475	valid's average_precision: 0.857133
[1469]	train's binary_logloss: 0.418491	train's average_precision: 0.861989	valid's binary_logloss: 0.424471	valid's average_precision: 0.857137
[1470]	train's binary_logloss: 0.418473	train's average_precision: 0.862001	valid's binary_logloss: 0.42446	valid's average_precision: 0.85714
[1471]	train's binary_logloss: 0.418452	train's average_precision: 0.862014	valid's binary_logloss: 0.424436	valid's average_precision: 0.857152
[1472]	train's binary_logloss: 0.418439	train's average_precision: 0.862026	valid's binary_logloss: 0.424428	valid's average_precision:

[1522]	train's binary_logloss: 0.417829	train's average_precision: 0.862431	valid's binary_logloss: 0.424026	valid's average_precision: 0.857413
[1523]	train's binary_logloss: 0.417802	train's average_precision: 0.862443	valid's binary_logloss: 0.424008	valid's average_precision: 0.85742
[1524]	train's binary_logloss: 0.417789	train's average_precision: 0.862449	valid's binary_logloss: 0.423992	valid's average_precision: 0.85743
[1525]	train's binary_logloss: 0.417769	train's average_precision: 0.862458	valid's binary_logloss: 0.42397	valid's average_precision: 0.857439
[1526]	train's binary_logloss: 0.417757	train's average_precision: 0.862466	valid's binary_logloss: 0.423965	valid's average_precision: 0.857441
[1527]	train's binary_logloss: 0.417729	train's average_precision: 0.862489	valid's binary_logloss: 0.423945	valid's average_precision: 0.857454
[1528]	train's binary_logloss: 0.417711	train's average_precision: 0.862503	valid's binary_logloss: 0.423927	valid's average_precisio

[1579]	train's binary_logloss: 0.41701	train's average_precision: 0.862976	valid's binary_logloss: 0.423417	valid's average_precision: 0.857802
[1580]	train's binary_logloss: 0.416998	train's average_precision: 0.862984	valid's binary_logloss: 0.42342	valid's average_precision: 0.857798
[1581]	train's binary_logloss: 0.416967	train's average_precision: 0.863004	valid's binary_logloss: 0.423391	valid's average_precision: 0.857811
[1582]	train's binary_logloss: 0.416956	train's average_precision: 0.863009	valid's binary_logloss: 0.423384	valid's average_precision: 0.857814
[1583]	train's binary_logloss: 0.416932	train's average_precision: 0.863028	valid's binary_logloss: 0.423362	valid's average_precision: 0.857829
[1584]	train's binary_logloss: 0.416922	train's average_precision: 0.863033	valid's binary_logloss: 0.423358	valid's average_precision: 0.857828
[1585]	train's binary_logloss: 0.416909	train's average_precision: 0.863045	valid's binary_logloss: 0.423354	valid's average_precisi

[1637]	train's binary_logloss: 0.416285	train's average_precision: 0.863479	valid's binary_logloss: 0.422953	valid's average_precision: 0.858088
[1638]	train's binary_logloss: 0.416277	train's average_precision: 0.863485	valid's binary_logloss: 0.422945	valid's average_precision: 0.858092
[1639]	train's binary_logloss: 0.416268	train's average_precision: 0.863489	valid's binary_logloss: 0.42294	valid's average_precision: 0.858095
[1640]	train's binary_logloss: 0.41626	train's average_precision: 0.863492	valid's binary_logloss: 0.422931	valid's average_precision: 0.858098
[1641]	train's binary_logloss: 0.416255	train's average_precision: 0.863497	valid's binary_logloss: 0.422928	valid's average_precision: 0.858098
[1642]	train's binary_logloss: 0.416246	train's average_precision: 0.8635	valid's binary_logloss: 0.422927	valid's average_precision: 0.858096
[1643]	train's binary_logloss: 0.416226	train's average_precision: 0.863517	valid's binary_logloss: 0.422921	valid's average_precision

[1698]	train's binary_logloss: 0.415546	train's average_precision: 0.86399	valid's binary_logloss: 0.422426	valid's average_precision: 0.858434
[1699]	train's binary_logloss: 0.415538	train's average_precision: 0.863995	valid's binary_logloss: 0.422418	valid's average_precision: 0.858438
[1700]	train's binary_logloss: 0.41553	train's average_precision: 0.863998	valid's binary_logloss: 0.42242	valid's average_precision: 0.858438
[1701]	train's binary_logloss: 0.415518	train's average_precision: 0.864008	valid's binary_logloss: 0.422418	valid's average_precision: 0.858443
[1702]	train's binary_logloss: 0.415507	train's average_precision: 0.864015	valid's binary_logloss: 0.422412	valid's average_precision: 0.858449
[1703]	train's binary_logloss: 0.415488	train's average_precision: 0.864024	valid's binary_logloss: 0.422402	valid's average_precision: 0.858445
[1704]	train's binary_logloss: 0.415475	train's average_precision: 0.864033	valid's binary_logloss: 0.422397	valid's average_precisio

[1756]	train's binary_logloss: 0.414921	train's average_precision: 0.864398	valid's binary_logloss: 0.42205	valid's average_precision: 0.858631
[1757]	train's binary_logloss: 0.414908	train's average_precision: 0.864406	valid's binary_logloss: 0.422033	valid's average_precision: 0.858644
[1758]	train's binary_logloss: 0.414894	train's average_precision: 0.864418	valid's binary_logloss: 0.422024	valid's average_precision: 0.85865
[1759]	train's binary_logloss: 0.41488	train's average_precision: 0.864434	valid's binary_logloss: 0.422012	valid's average_precision: 0.85867
[1760]	train's binary_logloss: 0.414858	train's average_precision: 0.864449	valid's binary_logloss: 0.421989	valid's average_precision: 0.858686
[1761]	train's binary_logloss: 0.414843	train's average_precision: 0.864463	valid's binary_logloss: 0.421979	valid's average_precision: 0.858698
[1762]	train's binary_logloss: 0.414832	train's average_precision: 0.864469	valid's binary_logloss: 0.421973	valid's average_precision

[1815]	train's binary_logloss: 0.414188	train's average_precision: 0.864931	valid's binary_logloss: 0.421544	valid's average_precision: 0.858984
[1816]	train's binary_logloss: 0.414174	train's average_precision: 0.864939	valid's binary_logloss: 0.421531	valid's average_precision: 0.858989
[1817]	train's binary_logloss: 0.414161	train's average_precision: 0.864951	valid's binary_logloss: 0.421522	valid's average_precision: 0.858992
[1818]	train's binary_logloss: 0.414153	train's average_precision: 0.864955	valid's binary_logloss: 0.421518	valid's average_precision: 0.858993
[1819]	train's binary_logloss: 0.414136	train's average_precision: 0.864966	valid's binary_logloss: 0.421504	valid's average_precision: 0.859
[1820]	train's binary_logloss: 0.414122	train's average_precision: 0.864976	valid's binary_logloss: 0.4215	valid's average_precision: 0.859
[1821]	train's binary_logloss: 0.414114	train's average_precision: 0.864982	valid's binary_logloss: 0.421495	valid's average_precision: 0.

[1874]	train's binary_logloss: 0.413581	train's average_precision: 0.865334	valid's binary_logloss: 0.421217	valid's average_precision: 0.859164
[1875]	train's binary_logloss: 0.413571	train's average_precision: 0.865341	valid's binary_logloss: 0.421208	valid's average_precision: 0.859168
[1876]	train's binary_logloss: 0.413562	train's average_precision: 0.865345	valid's binary_logloss: 0.421201	valid's average_precision: 0.85917
[1877]	train's binary_logloss: 0.413557	train's average_precision: 0.865348	valid's binary_logloss: 0.421197	valid's average_precision: 0.859172
[1878]	train's binary_logloss: 0.413552	train's average_precision: 0.865351	valid's binary_logloss: 0.421197	valid's average_precision: 0.859172
[1879]	train's binary_logloss: 0.413545	train's average_precision: 0.865358	valid's binary_logloss: 0.421196	valid's average_precision: 0.859172
[1880]	train's binary_logloss: 0.413538	train's average_precision: 0.865364	valid's binary_logloss: 0.421188	valid's average_precis

[1932]	train's binary_logloss: 0.412989	train's average_precision: 0.865722	valid's binary_logloss: 0.420867	valid's average_precision: 0.859356
[1933]	train's binary_logloss: 0.412978	train's average_precision: 0.865729	valid's binary_logloss: 0.420863	valid's average_precision: 0.85936
[1934]	train's binary_logloss: 0.412971	train's average_precision: 0.865732	valid's binary_logloss: 0.420866	valid's average_precision: 0.85936
[1935]	train's binary_logloss: 0.412963	train's average_precision: 0.865736	valid's binary_logloss: 0.420864	valid's average_precision: 0.859359
[1936]	train's binary_logloss: 0.412954	train's average_precision: 0.865745	valid's binary_logloss: 0.420862	valid's average_precision: 0.859363
[1937]	train's binary_logloss: 0.412948	train's average_precision: 0.865748	valid's binary_logloss: 0.420862	valid's average_precision: 0.859362
[1938]	train's binary_logloss: 0.412941	train's average_precision: 0.865752	valid's binary_logloss: 0.420857	valid's average_precisi

[1989]	train's binary_logloss: 0.412359	train's average_precision: 0.866171	valid's binary_logloss: 0.420492	valid's average_precision: 0.859605
[1990]	train's binary_logloss: 0.412352	train's average_precision: 0.866175	valid's binary_logloss: 0.420485	valid's average_precision: 0.859606
[1991]	train's binary_logloss: 0.41234	train's average_precision: 0.866185	valid's binary_logloss: 0.420476	valid's average_precision: 0.859613
[1992]	train's binary_logloss: 0.412331	train's average_precision: 0.86619	valid's binary_logloss: 0.420471	valid's average_precision: 0.859618
[1993]	train's binary_logloss: 0.412321	train's average_precision: 0.866202	valid's binary_logloss: 0.420462	valid's average_precision: 0.859633
[1994]	train's binary_logloss: 0.412314	train's average_precision: 0.866207	valid's binary_logloss: 0.420454	valid's average_precision: 0.859637
[1995]	train's binary_logloss: 0.412299	train's average_precision: 0.866215	valid's binary_logloss: 0.420441	valid's average_precisi

[2048]	train's binary_logloss: 0.411783	train's average_precision: 0.866562	valid's binary_logloss: 0.420154	valid's average_precision: 0.859811
[2049]	train's binary_logloss: 0.411776	train's average_precision: 0.866566	valid's binary_logloss: 0.42015	valid's average_precision: 0.859811
[2050]	train's binary_logloss: 0.411768	train's average_precision: 0.866572	valid's binary_logloss: 0.420151	valid's average_precision: 0.859813
[2051]	train's binary_logloss: 0.411762	train's average_precision: 0.866577	valid's binary_logloss: 0.420149	valid's average_precision: 0.859812
[2052]	train's binary_logloss: 0.411752	train's average_precision: 0.866585	valid's binary_logloss: 0.420139	valid's average_precision: 0.859818
[2053]	train's binary_logloss: 0.411732	train's average_precision: 0.866599	valid's binary_logloss: 0.420121	valid's average_precision: 0.859829
[2054]	train's binary_logloss: 0.411713	train's average_precision: 0.866611	valid's binary_logloss: 0.420101	valid's average_precis

[2106]	train's binary_logloss: 0.411222	train's average_precision: 0.866952	valid's binary_logloss: 0.419839	valid's average_precision: 0.859972
[2107]	train's binary_logloss: 0.411216	train's average_precision: 0.866955	valid's binary_logloss: 0.419839	valid's average_precision: 0.859971
[2108]	train's binary_logloss: 0.411208	train's average_precision: 0.86696	valid's binary_logloss: 0.419842	valid's average_precision: 0.859966
[2109]	train's binary_logloss: 0.411201	train's average_precision: 0.866965	valid's binary_logloss: 0.41984	valid's average_precision: 0.859969
[2110]	train's binary_logloss: 0.411197	train's average_precision: 0.866968	valid's binary_logloss: 0.419836	valid's average_precision: 0.859972
[2111]	train's binary_logloss: 0.411191	train's average_precision: 0.866971	valid's binary_logloss: 0.419834	valid's average_precision: 0.85997
[2112]	train's binary_logloss: 0.411176	train's average_precision: 0.866985	valid's binary_logloss: 0.419822	valid's average_precisio

[2163]	train's binary_logloss: 0.410663	train's average_precision: 0.867348	valid's binary_logloss: 0.419521	valid's average_precision: 0.860139
[2164]	train's binary_logloss: 0.410656	train's average_precision: 0.867354	valid's binary_logloss: 0.419521	valid's average_precision: 0.860137
[2165]	train's binary_logloss: 0.410651	train's average_precision: 0.867358	valid's binary_logloss: 0.419519	valid's average_precision: 0.860138
[2166]	train's binary_logloss: 0.410646	train's average_precision: 0.867361	valid's binary_logloss: 0.419517	valid's average_precision: 0.860138
[2167]	train's binary_logloss: 0.410631	train's average_precision: 0.867374	valid's binary_logloss: 0.419507	valid's average_precision: 0.860148
[2168]	train's binary_logloss: 0.410624	train's average_precision: 0.867379	valid's binary_logloss: 0.4195	valid's average_precision: 0.860154
[2169]	train's binary_logloss: 0.41061	train's average_precision: 0.867389	valid's binary_logloss: 0.419486	valid's average_precisio

[2221]	train's binary_logloss: 0.410132	train's average_precision: 0.867692	valid's binary_logloss: 0.419256	valid's average_precision: 0.86028
[2222]	train's binary_logloss: 0.41012	train's average_precision: 0.867703	valid's binary_logloss: 0.419243	valid's average_precision: 0.860289
[2223]	train's binary_logloss: 0.410111	train's average_precision: 0.867712	valid's binary_logloss: 0.419244	valid's average_precision: 0.860286
[2224]	train's binary_logloss: 0.410103	train's average_precision: 0.867719	valid's binary_logloss: 0.419238	valid's average_precision: 0.860292
[2225]	train's binary_logloss: 0.410092	train's average_precision: 0.867725	valid's binary_logloss: 0.419228	valid's average_precision: 0.8603
[2226]	train's binary_logloss: 0.410085	train's average_precision: 0.867729	valid's binary_logloss: 0.419219	valid's average_precision: 0.860304
[2227]	train's binary_logloss: 0.410073	train's average_precision: 0.867736	valid's binary_logloss: 0.419209	valid's average_precision

[2279]	train's binary_logloss: 0.409616	train's average_precision: 0.868071	valid's binary_logloss: 0.41898	valid's average_precision: 0.860452
[2280]	train's binary_logloss: 0.409601	train's average_precision: 0.868077	valid's binary_logloss: 0.418974	valid's average_precision: 0.860451
[2281]	train's binary_logloss: 0.409592	train's average_precision: 0.868082	valid's binary_logloss: 0.418964	valid's average_precision: 0.860452
[2282]	train's binary_logloss: 0.409582	train's average_precision: 0.868091	valid's binary_logloss: 0.418953	valid's average_precision: 0.860457
[2283]	train's binary_logloss: 0.409575	train's average_precision: 0.868096	valid's binary_logloss: 0.418956	valid's average_precision: 0.860453
[2284]	train's binary_logloss: 0.409569	train's average_precision: 0.8681	valid's binary_logloss: 0.418956	valid's average_precision: 0.860452
[2285]	train's binary_logloss: 0.409559	train's average_precision: 0.868108	valid's binary_logloss: 0.418945	valid's average_precisio

[2336]	train's binary_logloss: 0.409117	train's average_precision: 0.868419	valid's binary_logloss: 0.418675	valid's average_precision: 0.860618
[2337]	train's binary_logloss: 0.40911	train's average_precision: 0.868424	valid's binary_logloss: 0.418668	valid's average_precision: 0.860621
[2338]	train's binary_logloss: 0.409108	train's average_precision: 0.868425	valid's binary_logloss: 0.418663	valid's average_precision: 0.860624
[2339]	train's binary_logloss: 0.409101	train's average_precision: 0.86843	valid's binary_logloss: 0.418662	valid's average_precision: 0.860622
[2340]	train's binary_logloss: 0.409089	train's average_precision: 0.868436	valid's binary_logloss: 0.418645	valid's average_precision: 0.860631
[2341]	train's binary_logloss: 0.409079	train's average_precision: 0.868441	valid's binary_logloss: 0.41864	valid's average_precision: 0.860634
[2342]	train's binary_logloss: 0.409073	train's average_precision: 0.868446	valid's binary_logloss: 0.418637	valid's average_precisio

[2393]	train's binary_logloss: 0.408651	train's average_precision: 0.868737	valid's binary_logloss: 0.418434	valid's average_precision: 0.86075
[2394]	train's binary_logloss: 0.408642	train's average_precision: 0.868744	valid's binary_logloss: 0.418424	valid's average_precision: 0.860758
[2395]	train's binary_logloss: 0.408632	train's average_precision: 0.868751	valid's binary_logloss: 0.41842	valid's average_precision: 0.860759
[2396]	train's binary_logloss: 0.408627	train's average_precision: 0.868756	valid's binary_logloss: 0.418421	valid's average_precision: 0.86076
[2397]	train's binary_logloss: 0.408618	train's average_precision: 0.868761	valid's binary_logloss: 0.418409	valid's average_precision: 0.860764
[2398]	train's binary_logloss: 0.408612	train's average_precision: 0.868765	valid's binary_logloss: 0.418405	valid's average_precision: 0.860764
[2399]	train's binary_logloss: 0.408598	train's average_precision: 0.868774	valid's binary_logloss: 0.418399	valid's average_precisio

[2450]	train's binary_logloss: 0.40817	train's average_precision: 0.869065	valid's binary_logloss: 0.418149	valid's average_precision: 0.86093
[2451]	train's binary_logloss: 0.408158	train's average_precision: 0.869072	valid's binary_logloss: 0.418145	valid's average_precision: 0.860933
[2452]	train's binary_logloss: 0.408149	train's average_precision: 0.869081	valid's binary_logloss: 0.418137	valid's average_precision: 0.860938
[2453]	train's binary_logloss: 0.408143	train's average_precision: 0.869085	valid's binary_logloss: 0.418134	valid's average_precision: 0.860941
[2454]	train's binary_logloss: 0.408134	train's average_precision: 0.869093	valid's binary_logloss: 0.418129	valid's average_precision: 0.860946
[2455]	train's binary_logloss: 0.408124	train's average_precision: 0.869102	valid's binary_logloss: 0.418118	valid's average_precision: 0.860952
[2456]	train's binary_logloss: 0.408118	train's average_precision: 0.869107	valid's binary_logloss: 0.418119	valid's average_precisi

[2508]	train's binary_logloss: 0.407678	train's average_precision: 0.869417	valid's binary_logloss: 0.417904	valid's average_precision: 0.861062
[2509]	train's binary_logloss: 0.407666	train's average_precision: 0.869428	valid's binary_logloss: 0.417905	valid's average_precision: 0.861063
[2510]	train's binary_logloss: 0.407654	train's average_precision: 0.869437	valid's binary_logloss: 0.417906	valid's average_precision: 0.861059
[2511]	train's binary_logloss: 0.40764	train's average_precision: 0.869448	valid's binary_logloss: 0.417902	valid's average_precision: 0.861063
[2512]	train's binary_logloss: 0.407631	train's average_precision: 0.869454	valid's binary_logloss: 0.417901	valid's average_precision: 0.86106
[2513]	train's binary_logloss: 0.407625	train's average_precision: 0.869458	valid's binary_logloss: 0.417904	valid's average_precision: 0.861055
[2514]	train's binary_logloss: 0.407618	train's average_precision: 0.869462	valid's binary_logloss: 0.417898	valid's average_precisi

[2565]	train's binary_logloss: 0.407201	train's average_precision: 0.869741	valid's binary_logloss: 0.417702	valid's average_precision: 0.861158
[2566]	train's binary_logloss: 0.407198	train's average_precision: 0.869742	valid's binary_logloss: 0.417701	valid's average_precision: 0.861158
[2567]	train's binary_logloss: 0.407191	train's average_precision: 0.869746	valid's binary_logloss: 0.417699	valid's average_precision: 0.86116
[2568]	train's binary_logloss: 0.40718	train's average_precision: 0.869754	valid's binary_logloss: 0.417692	valid's average_precision: 0.861165
[2569]	train's binary_logloss: 0.407171	train's average_precision: 0.869762	valid's binary_logloss: 0.417684	valid's average_precision: 0.861167
[2570]	train's binary_logloss: 0.407164	train's average_precision: 0.869766	valid's binary_logloss: 0.417678	valid's average_precision: 0.861172
[2571]	train's binary_logloss: 0.407157	train's average_precision: 0.869771	valid's binary_logloss: 0.41768	valid's average_precisio

[2621]	train's binary_logloss: 0.40673	train's average_precision: 0.870078	valid's binary_logloss: 0.417453	valid's average_precision: 0.861283
[2622]	train's binary_logloss: 0.406721	train's average_precision: 0.870083	valid's binary_logloss: 0.417445	valid's average_precision: 0.861288
[2623]	train's binary_logloss: 0.406706	train's average_precision: 0.870092	valid's binary_logloss: 0.41743	valid's average_precision: 0.861298
[2624]	train's binary_logloss: 0.4067	train's average_precision: 0.870097	valid's binary_logloss: 0.417433	valid's average_precision: 0.861295
[2625]	train's binary_logloss: 0.406692	train's average_precision: 0.870106	valid's binary_logloss: 0.417427	valid's average_precision: 0.861302
[2626]	train's binary_logloss: 0.406684	train's average_precision: 0.870114	valid's binary_logloss: 0.417423	valid's average_precision: 0.861302
[2627]	train's binary_logloss: 0.406679	train's average_precision: 0.870117	valid's binary_logloss: 0.417424	valid's average_precision

[2680]	train's binary_logloss: 0.406288	train's average_precision: 0.870401	valid's binary_logloss: 0.417208	valid's average_precision: 0.861436
[2681]	train's binary_logloss: 0.406279	train's average_precision: 0.870406	valid's binary_logloss: 0.417204	valid's average_precision: 0.86144
[2682]	train's binary_logloss: 0.40627	train's average_precision: 0.870414	valid's binary_logloss: 0.417199	valid's average_precision: 0.861443
[2683]	train's binary_logloss: 0.406255	train's average_precision: 0.870423	valid's binary_logloss: 0.417197	valid's average_precision: 0.861442
[2684]	train's binary_logloss: 0.406248	train's average_precision: 0.870429	valid's binary_logloss: 0.417193	valid's average_precision: 0.861445
[2685]	train's binary_logloss: 0.406244	train's average_precision: 0.870432	valid's binary_logloss: 0.417195	valid's average_precision: 0.861443
[2686]	train's binary_logloss: 0.406238	train's average_precision: 0.870435	valid's binary_logloss: 0.417195	valid's average_precisi

[2737]	train's binary_logloss: 0.405817	train's average_precision: 0.870728	valid's binary_logloss: 0.417006	valid's average_precision: 0.861572
[2738]	train's binary_logloss: 0.40581	train's average_precision: 0.870731	valid's binary_logloss: 0.417005	valid's average_precision: 0.861575
[2739]	train's binary_logloss: 0.405797	train's average_precision: 0.870737	valid's binary_logloss: 0.416991	valid's average_precision: 0.861582
[2740]	train's binary_logloss: 0.40579	train's average_precision: 0.870743	valid's binary_logloss: 0.416989	valid's average_precision: 0.861584
[2741]	train's binary_logloss: 0.40578	train's average_precision: 0.870752	valid's binary_logloss: 0.416981	valid's average_precision: 0.861589
[2742]	train's binary_logloss: 0.405769	train's average_precision: 0.870759	valid's binary_logloss: 0.416975	valid's average_precision: 0.861594
[2743]	train's binary_logloss: 0.405767	train's average_precision: 0.870761	valid's binary_logloss: 0.416974	valid's average_precisio

[2794]	train's binary_logloss: 0.405317	train's average_precision: 0.871072	valid's binary_logloss: 0.416725	valid's average_precision: 0.861738
[2795]	train's binary_logloss: 0.405303	train's average_precision: 0.871083	valid's binary_logloss: 0.416717	valid's average_precision: 0.861742
[2796]	train's binary_logloss: 0.405298	train's average_precision: 0.87109	valid's binary_logloss: 0.416714	valid's average_precision: 0.861747
[2797]	train's binary_logloss: 0.405289	train's average_precision: 0.871097	valid's binary_logloss: 0.416705	valid's average_precision: 0.861751
[2798]	train's binary_logloss: 0.405278	train's average_precision: 0.871106	valid's binary_logloss: 0.4167	valid's average_precision: 0.861754
[2799]	train's binary_logloss: 0.405272	train's average_precision: 0.871111	valid's binary_logloss: 0.4167	valid's average_precision: 0.861753
[2800]	train's binary_logloss: 0.405266	train's average_precision: 0.871114	valid's binary_logloss: 0.416694	valid's average_precision:

[2851]	train's binary_logloss: 0.404912	train's average_precision: 0.871368	valid's binary_logloss: 0.416537	valid's average_precision: 0.861825
[2852]	train's binary_logloss: 0.404902	train's average_precision: 0.871372	valid's binary_logloss: 0.416534	valid's average_precision: 0.861825
[2853]	train's binary_logloss: 0.404896	train's average_precision: 0.871375	valid's binary_logloss: 0.416529	valid's average_precision: 0.861826
[2854]	train's binary_logloss: 0.404884	train's average_precision: 0.871385	valid's binary_logloss: 0.416526	valid's average_precision: 0.861829
[2855]	train's binary_logloss: 0.404875	train's average_precision: 0.871391	valid's binary_logloss: 0.416521	valid's average_precision: 0.861832
[2856]	train's binary_logloss: 0.404873	train's average_precision: 0.871392	valid's binary_logloss: 0.416517	valid's average_precision: 0.861834
[2857]	train's binary_logloss: 0.404869	train's average_precision: 0.871394	valid's binary_logloss: 0.416517	valid's average_preci

[2907]	train's binary_logloss: 0.404472	train's average_precision: 0.871669	valid's binary_logloss: 0.416282	valid's average_precision: 0.861947
[2908]	train's binary_logloss: 0.404465	train's average_precision: 0.871673	valid's binary_logloss: 0.416275	valid's average_precision: 0.86195
[2909]	train's binary_logloss: 0.40446	train's average_precision: 0.871676	valid's binary_logloss: 0.416268	valid's average_precision: 0.861957
[2910]	train's binary_logloss: 0.404455	train's average_precision: 0.871679	valid's binary_logloss: 0.416262	valid's average_precision: 0.86196
[2911]	train's binary_logloss: 0.404446	train's average_precision: 0.871684	valid's binary_logloss: 0.416258	valid's average_precision: 0.861961
[2912]	train's binary_logloss: 0.404438	train's average_precision: 0.87169	valid's binary_logloss: 0.416251	valid's average_precision: 0.861969
[2913]	train's binary_logloss: 0.404431	train's average_precision: 0.871695	valid's binary_logloss: 0.416249	valid's average_precision

[2964]	train's binary_logloss: 0.404067	train's average_precision: 0.871961	valid's binary_logloss: 0.416092	valid's average_precision: 0.862073
[2965]	train's binary_logloss: 0.404058	train's average_precision: 0.871968	valid's binary_logloss: 0.41609	valid's average_precision: 0.862074
[2966]	train's binary_logloss: 0.404047	train's average_precision: 0.871978	valid's binary_logloss: 0.416081	valid's average_precision: 0.862079
[2967]	train's binary_logloss: 0.404038	train's average_precision: 0.871983	valid's binary_logloss: 0.416072	valid's average_precision: 0.862081
[2968]	train's binary_logloss: 0.404032	train's average_precision: 0.871992	valid's binary_logloss: 0.416067	valid's average_precision: 0.862085
[2969]	train's binary_logloss: 0.404024	train's average_precision: 0.871996	valid's binary_logloss: 0.416061	valid's average_precision: 0.862088
[2970]	train's binary_logloss: 0.404017	train's average_precision: 0.872002	valid's binary_logloss: 0.416059	valid's average_precis

[3025]	train's binary_logloss: 0.403621	train's average_precision: 0.872272	valid's binary_logloss: 0.415849	valid's average_precision: 0.862167
[3026]	train's binary_logloss: 0.403616	train's average_precision: 0.872274	valid's binary_logloss: 0.415849	valid's average_precision: 0.862166
[3027]	train's binary_logloss: 0.403612	train's average_precision: 0.872277	valid's binary_logloss: 0.415849	valid's average_precision: 0.862166
[3028]	train's binary_logloss: 0.403601	train's average_precision: 0.872284	valid's binary_logloss: 0.415838	valid's average_precision: 0.862169
[3029]	train's binary_logloss: 0.40359	train's average_precision: 0.872293	valid's binary_logloss: 0.415835	valid's average_precision: 0.862171
[3030]	train's binary_logloss: 0.40358	train's average_precision: 0.872299	valid's binary_logloss: 0.415826	valid's average_precision: 0.862172
[3031]	train's binary_logloss: 0.403573	train's average_precision: 0.872303	valid's binary_logloss: 0.415823	valid's average_precisi

[3083]	train's binary_logloss: 0.403211	train's average_precision: 0.872558	valid's binary_logloss: 0.415614	valid's average_precision: 0.86229
[3084]	train's binary_logloss: 0.403205	train's average_precision: 0.872562	valid's binary_logloss: 0.41561	valid's average_precision: 0.862292
[3085]	train's binary_logloss: 0.403198	train's average_precision: 0.872567	valid's binary_logloss: 0.415605	valid's average_precision: 0.862294
[3086]	train's binary_logloss: 0.403193	train's average_precision: 0.872571	valid's binary_logloss: 0.415607	valid's average_precision: 0.862291
[3087]	train's binary_logloss: 0.403186	train's average_precision: 0.872576	valid's binary_logloss: 0.415603	valid's average_precision: 0.862293
[3088]	train's binary_logloss: 0.403178	train's average_precision: 0.872583	valid's binary_logloss: 0.415597	valid's average_precision: 0.862298
[3089]	train's binary_logloss: 0.403173	train's average_precision: 0.872587	valid's binary_logloss: 0.415596	valid's average_precisi

[3140]	train's binary_logloss: 0.40279	train's average_precision: 0.872837	valid's binary_logloss: 0.415408	valid's average_precision: 0.862405
[3141]	train's binary_logloss: 0.402785	train's average_precision: 0.872841	valid's binary_logloss: 0.415398	valid's average_precision: 0.862416
[3142]	train's binary_logloss: 0.402779	train's average_precision: 0.872845	valid's binary_logloss: 0.415396	valid's average_precision: 0.862418
[3143]	train's binary_logloss: 0.402773	train's average_precision: 0.872849	valid's binary_logloss: 0.415397	valid's average_precision: 0.862416
[3144]	train's binary_logloss: 0.402769	train's average_precision: 0.872852	valid's binary_logloss: 0.415397	valid's average_precision: 0.862413
[3145]	train's binary_logloss: 0.402763	train's average_precision: 0.872856	valid's binary_logloss: 0.415392	valid's average_precision: 0.862415
[3146]	train's binary_logloss: 0.402756	train's average_precision: 0.872861	valid's binary_logloss: 0.415391	valid's average_precis

[3198]	train's binary_logloss: 0.402407	train's average_precision: 0.873112	valid's binary_logloss: 0.415231	valid's average_precision: 0.862503
[3199]	train's binary_logloss: 0.402398	train's average_precision: 0.873117	valid's binary_logloss: 0.415225	valid's average_precision: 0.862505
[3200]	train's binary_logloss: 0.402391	train's average_precision: 0.873121	valid's binary_logloss: 0.415218	valid's average_precision: 0.862509
[3201]	train's binary_logloss: 0.402384	train's average_precision: 0.873125	valid's binary_logloss: 0.41522	valid's average_precision: 0.862507
[3202]	train's binary_logloss: 0.402378	train's average_precision: 0.87313	valid's binary_logloss: 0.415216	valid's average_precision: 0.862512
[3203]	train's binary_logloss: 0.402373	train's average_precision: 0.873133	valid's binary_logloss: 0.415219	valid's average_precision: 0.862511
[3204]	train's binary_logloss: 0.402366	train's average_precision: 0.873138	valid's binary_logloss: 0.415218	valid's average_precisi

[3255]	train's binary_logloss: 0.402027	train's average_precision: 0.87337	valid's binary_logloss: 0.415082	valid's average_precision: 0.862566
[3256]	train's binary_logloss: 0.402019	train's average_precision: 0.873375	valid's binary_logloss: 0.415077	valid's average_precision: 0.862566
[3257]	train's binary_logloss: 0.402013	train's average_precision: 0.873379	valid's binary_logloss: 0.415075	valid's average_precision: 0.862568
[3258]	train's binary_logloss: 0.402005	train's average_precision: 0.873385	valid's binary_logloss: 0.415063	valid's average_precision: 0.862576
[3259]	train's binary_logloss: 0.401996	train's average_precision: 0.87339	valid's binary_logloss: 0.415064	valid's average_precision: 0.862571
[3260]	train's binary_logloss: 0.401989	train's average_precision: 0.873395	valid's binary_logloss: 0.415061	valid's average_precision: 0.862574
[3261]	train's binary_logloss: 0.401981	train's average_precision: 0.873402	valid's binary_logloss: 0.41506	valid's average_precisio

[3313]	train's binary_logloss: 0.401646	train's average_precision: 0.873641	valid's binary_logloss: 0.414917	valid's average_precision: 0.862632
[3314]	train's binary_logloss: 0.401638	train's average_precision: 0.873645	valid's binary_logloss: 0.414915	valid's average_precision: 0.862632
[3315]	train's binary_logloss: 0.401633	train's average_precision: 0.87365	valid's binary_logloss: 0.414913	valid's average_precision: 0.862633
[3316]	train's binary_logloss: 0.401628	train's average_precision: 0.873653	valid's binary_logloss: 0.414913	valid's average_precision: 0.86263
[3317]	train's binary_logloss: 0.40162	train's average_precision: 0.873657	valid's binary_logloss: 0.414907	valid's average_precision: 0.862635
[3318]	train's binary_logloss: 0.401615	train's average_precision: 0.87366	valid's binary_logloss: 0.414906	valid's average_precision: 0.862633
[3319]	train's binary_logloss: 0.401606	train's average_precision: 0.873666	valid's binary_logloss: 0.414898	valid's average_precision

[3372]	train's binary_logloss: 0.401256	train's average_precision: 0.873912	valid's binary_logloss: 0.414751	valid's average_precision: 0.862727
[3373]	train's binary_logloss: 0.40125	train's average_precision: 0.873915	valid's binary_logloss: 0.414746	valid's average_precision: 0.86273
[3374]	train's binary_logloss: 0.401238	train's average_precision: 0.873922	valid's binary_logloss: 0.414737	valid's average_precision: 0.862735
[3375]	train's binary_logloss: 0.40123	train's average_precision: 0.873926	valid's binary_logloss: 0.414738	valid's average_precision: 0.86273
[3376]	train's binary_logloss: 0.401223	train's average_precision: 0.873932	valid's binary_logloss: 0.414731	valid's average_precision: 0.862737
[3377]	train's binary_logloss: 0.401217	train's average_precision: 0.873936	valid's binary_logloss: 0.414729	valid's average_precision: 0.862739
[3378]	train's binary_logloss: 0.401208	train's average_precision: 0.873943	valid's binary_logloss: 0.414727	valid's average_precision

[3431]	train's binary_logloss: 0.400875	train's average_precision: 0.874173	valid's binary_logloss: 0.414601	valid's average_precision: 0.86278
[3432]	train's binary_logloss: 0.400869	train's average_precision: 0.874177	valid's binary_logloss: 0.414595	valid's average_precision: 0.862782
[3433]	train's binary_logloss: 0.400864	train's average_precision: 0.874179	valid's binary_logloss: 0.414595	valid's average_precision: 0.862781
[3434]	train's binary_logloss: 0.40086	train's average_precision: 0.874182	valid's binary_logloss: 0.414594	valid's average_precision: 0.862782
[3435]	train's binary_logloss: 0.400855	train's average_precision: 0.874185	valid's binary_logloss: 0.414595	valid's average_precision: 0.862781
[3436]	train's binary_logloss: 0.400846	train's average_precision: 0.874188	valid's binary_logloss: 0.414587	valid's average_precision: 0.862786
[3437]	train's binary_logloss: 0.40084	train's average_precision: 0.874192	valid's binary_logloss: 0.414581	valid's average_precisio

[3488]	train's binary_logloss: 0.400488	train's average_precision: 0.874429	valid's binary_logloss: 0.414402	valid's average_precision: 0.862912
[3489]	train's binary_logloss: 0.400485	train's average_precision: 0.874432	valid's binary_logloss: 0.414402	valid's average_precision: 0.862911
[3490]	train's binary_logloss: 0.400479	train's average_precision: 0.874435	valid's binary_logloss: 0.414403	valid's average_precision: 0.862912
[3491]	train's binary_logloss: 0.400473	train's average_precision: 0.87444	valid's binary_logloss: 0.4144	valid's average_precision: 0.862916
[3492]	train's binary_logloss: 0.400466	train's average_precision: 0.874444	valid's binary_logloss: 0.414401	valid's average_precision: 0.862913
[3493]	train's binary_logloss: 0.400458	train's average_precision: 0.87445	valid's binary_logloss: 0.414396	valid's average_precision: 0.862916
[3494]	train's binary_logloss: 0.400453	train's average_precision: 0.874453	valid's binary_logloss: 0.414395	valid's average_precision

[3545]	train's binary_logloss: 0.400116	train's average_precision: 0.874675	valid's binary_logloss: 0.414255	valid's average_precision: 0.862972
[3546]	train's binary_logloss: 0.400112	train's average_precision: 0.874678	valid's binary_logloss: 0.414257	valid's average_precision: 0.86297
[3547]	train's binary_logloss: 0.400102	train's average_precision: 0.874686	valid's binary_logloss: 0.414252	valid's average_precision: 0.862968
[3548]	train's binary_logloss: 0.400099	train's average_precision: 0.874688	valid's binary_logloss: 0.414252	valid's average_precision: 0.862969
[3549]	train's binary_logloss: 0.400095	train's average_precision: 0.874693	valid's binary_logloss: 0.41425	valid's average_precision: 0.862971
[3550]	train's binary_logloss: 0.400088	train's average_precision: 0.874697	valid's binary_logloss: 0.414247	valid's average_precision: 0.862972
[3551]	train's binary_logloss: 0.400083	train's average_precision: 0.874702	valid's binary_logloss: 0.414244	valid's average_precisi

[3602]	train's binary_logloss: 0.399731	train's average_precision: 0.87494	valid's binary_logloss: 0.414092	valid's average_precision: 0.863059
[3603]	train's binary_logloss: 0.399728	train's average_precision: 0.874943	valid's binary_logloss: 0.414088	valid's average_precision: 0.863061
[3604]	train's binary_logloss: 0.399724	train's average_precision: 0.874944	valid's binary_logloss: 0.414092	valid's average_precision: 0.863061
[3605]	train's binary_logloss: 0.399715	train's average_precision: 0.874951	valid's binary_logloss: 0.414089	valid's average_precision: 0.863061
[3606]	train's binary_logloss: 0.399708	train's average_precision: 0.874955	valid's binary_logloss: 0.414085	valid's average_precision: 0.863061
[3607]	train's binary_logloss: 0.399702	train's average_precision: 0.874958	valid's binary_logloss: 0.414086	valid's average_precision: 0.863059
[3608]	train's binary_logloss: 0.399698	train's average_precision: 0.874961	valid's binary_logloss: 0.414086	valid's average_precis

[3661]	train's binary_logloss: 0.399383	train's average_precision: 0.875162	valid's binary_logloss: 0.41394	valid's average_precision: 0.863148
[3662]	train's binary_logloss: 0.399376	train's average_precision: 0.875169	valid's binary_logloss: 0.413934	valid's average_precision: 0.863151
[3663]	train's binary_logloss: 0.399368	train's average_precision: 0.875175	valid's binary_logloss: 0.413934	valid's average_precision: 0.863151
[3664]	train's binary_logloss: 0.39936	train's average_precision: 0.875183	valid's binary_logloss: 0.413932	valid's average_precision: 0.863153
[3665]	train's binary_logloss: 0.399357	train's average_precision: 0.875185	valid's binary_logloss: 0.413931	valid's average_precision: 0.863153
[3666]	train's binary_logloss: 0.399352	train's average_precision: 0.875188	valid's binary_logloss: 0.413933	valid's average_precision: 0.863152
[3667]	train's binary_logloss: 0.399344	train's average_precision: 0.875196	valid's binary_logloss: 0.413929	valid's average_precisi

[3719]	train's binary_logloss: 0.399047	train's average_precision: 0.875397	valid's binary_logloss: 0.413781	valid's average_precision: 0.863242
[3720]	train's binary_logloss: 0.39904	train's average_precision: 0.875403	valid's binary_logloss: 0.413778	valid's average_precision: 0.863244
[3721]	train's binary_logloss: 0.399035	train's average_precision: 0.875406	valid's binary_logloss: 0.41378	valid's average_precision: 0.863241
[3722]	train's binary_logloss: 0.39903	train's average_precision: 0.87541	valid's binary_logloss: 0.413781	valid's average_precision: 0.863239
[3723]	train's binary_logloss: 0.399023	train's average_precision: 0.875416	valid's binary_logloss: 0.413773	valid's average_precision: 0.863245
[3724]	train's binary_logloss: 0.399019	train's average_precision: 0.875421	valid's binary_logloss: 0.413771	valid's average_precision: 0.863246
[3725]	train's binary_logloss: 0.399017	train's average_precision: 0.875422	valid's binary_logloss: 0.413771	valid's average_precision

[3776]	train's binary_logloss: 0.39869	train's average_precision: 0.875649	valid's binary_logloss: 0.413571	valid's average_precision: 0.863356
[3777]	train's binary_logloss: 0.398688	train's average_precision: 0.875651	valid's binary_logloss: 0.41357	valid's average_precision: 0.863357
[3778]	train's binary_logloss: 0.398683	train's average_precision: 0.875653	valid's binary_logloss: 0.41357	valid's average_precision: 0.863356
[3779]	train's binary_logloss: 0.398678	train's average_precision: 0.875657	valid's binary_logloss: 0.413565	valid's average_precision: 0.863357
[3780]	train's binary_logloss: 0.398673	train's average_precision: 0.875661	valid's binary_logloss: 0.413565	valid's average_precision: 0.863357
[3781]	train's binary_logloss: 0.398669	train's average_precision: 0.875663	valid's binary_logloss: 0.413566	valid's average_precision: 0.863356
[3782]	train's binary_logloss: 0.398668	train's average_precision: 0.875664	valid's binary_logloss: 0.413566	valid's average_precisio

[3836]	train's binary_logloss: 0.398329	train's average_precision: 0.875896	valid's binary_logloss: 0.413449	valid's average_precision: 0.863424
[3837]	train's binary_logloss: 0.398322	train's average_precision: 0.875901	valid's binary_logloss: 0.413445	valid's average_precision: 0.863425
[3838]	train's binary_logloss: 0.398315	train's average_precision: 0.875907	valid's binary_logloss: 0.413444	valid's average_precision: 0.863421
[3839]	train's binary_logloss: 0.398309	train's average_precision: 0.875912	valid's binary_logloss: 0.413442	valid's average_precision: 0.863424
[3840]	train's binary_logloss: 0.398302	train's average_precision: 0.875915	valid's binary_logloss: 0.413444	valid's average_precision: 0.863424
[3841]	train's binary_logloss: 0.398296	train's average_precision: 0.875921	valid's binary_logloss: 0.413437	valid's average_precision: 0.863427
[3842]	train's binary_logloss: 0.398289	train's average_precision: 0.875926	valid's binary_logloss: 0.413437	valid's average_preci

[3894]	train's binary_logloss: 0.397967	train's average_precision: 0.876136	valid's binary_logloss: 0.413309	valid's average_precision: 0.863492
[3895]	train's binary_logloss: 0.39796	train's average_precision: 0.876139	valid's binary_logloss: 0.413305	valid's average_precision: 0.863493
[3896]	train's binary_logloss: 0.397956	train's average_precision: 0.876141	valid's binary_logloss: 0.413308	valid's average_precision: 0.863493
[3897]	train's binary_logloss: 0.397948	train's average_precision: 0.876146	valid's binary_logloss: 0.413303	valid's average_precision: 0.863494
[3898]	train's binary_logloss: 0.397942	train's average_precision: 0.876151	valid's binary_logloss: 0.413299	valid's average_precision: 0.863496
[3899]	train's binary_logloss: 0.397937	train's average_precision: 0.876156	valid's binary_logloss: 0.413301	valid's average_precision: 0.863493
[3900]	train's binary_logloss: 0.397932	train's average_precision: 0.876159	valid's binary_logloss: 0.413304	valid's average_precis

[3951]	train's binary_logloss: 0.397621	train's average_precision: 0.876374	valid's binary_logloss: 0.413189	valid's average_precision: 0.863555
[3952]	train's binary_logloss: 0.397616	train's average_precision: 0.876378	valid's binary_logloss: 0.413184	valid's average_precision: 0.863557
[3953]	train's binary_logloss: 0.397611	train's average_precision: 0.876382	valid's binary_logloss: 0.413183	valid's average_precision: 0.863558
[3954]	train's binary_logloss: 0.397607	train's average_precision: 0.876386	valid's binary_logloss: 0.413178	valid's average_precision: 0.86356
[3955]	train's binary_logloss: 0.397602	train's average_precision: 0.876389	valid's binary_logloss: 0.413178	valid's average_precision: 0.863561
[3956]	train's binary_logloss: 0.397596	train's average_precision: 0.876392	valid's binary_logloss: 0.413174	valid's average_precision: 0.863566
[3957]	train's binary_logloss: 0.397591	train's average_precision: 0.876396	valid's binary_logloss: 0.413172	valid's average_precis

[4008]	train's binary_logloss: 0.397307	train's average_precision: 0.876594	valid's binary_logloss: 0.413046	valid's average_precision: 0.863627
[4009]	train's binary_logloss: 0.397302	train's average_precision: 0.876596	valid's binary_logloss: 0.41304	valid's average_precision: 0.86363
[4010]	train's binary_logloss: 0.397297	train's average_precision: 0.8766	valid's binary_logloss: 0.413036	valid's average_precision: 0.863634
[4011]	train's binary_logloss: 0.397294	train's average_precision: 0.876602	valid's binary_logloss: 0.413035	valid's average_precision: 0.863636
[4012]	train's binary_logloss: 0.39729	train's average_precision: 0.876605	valid's binary_logloss: 0.413033	valid's average_precision: 0.86364
[4013]	train's binary_logloss: 0.397289	train's average_precision: 0.876606	valid's binary_logloss: 0.413034	valid's average_precision: 0.863639
[4014]	train's binary_logloss: 0.397282	train's average_precision: 0.876611	valid's binary_logloss: 0.413035	valid's average_precision: 

[4065]	train's binary_logloss: 0.396993	train's average_precision: 0.87681	valid's binary_logloss: 0.412902	valid's average_precision: 0.863707
[4066]	train's binary_logloss: 0.396988	train's average_precision: 0.876812	valid's binary_logloss: 0.412901	valid's average_precision: 0.863707
[4067]	train's binary_logloss: 0.396983	train's average_precision: 0.876815	valid's binary_logloss: 0.412903	valid's average_precision: 0.863706
[4068]	train's binary_logloss: 0.396978	train's average_precision: 0.876819	valid's binary_logloss: 0.412903	valid's average_precision: 0.863707
[4069]	train's binary_logloss: 0.396975	train's average_precision: 0.876822	valid's binary_logloss: 0.412902	valid's average_precision: 0.863709
[4070]	train's binary_logloss: 0.39697	train's average_precision: 0.876824	valid's binary_logloss: 0.4129	valid's average_precision: 0.863707
[4071]	train's binary_logloss: 0.396963	train's average_precision: 0.876829	valid's binary_logloss: 0.412892	valid's average_precision

[4122]	train's binary_logloss: 0.396666	train's average_precision: 0.877034	valid's binary_logloss: 0.412767	valid's average_precision: 0.863788
[4123]	train's binary_logloss: 0.396664	train's average_precision: 0.877035	valid's binary_logloss: 0.412769	valid's average_precision: 0.863785
[4124]	train's binary_logloss: 0.39666	train's average_precision: 0.877038	valid's binary_logloss: 0.412768	valid's average_precision: 0.863786
[4125]	train's binary_logloss: 0.396653	train's average_precision: 0.877043	valid's binary_logloss: 0.412762	valid's average_precision: 0.863793
[4126]	train's binary_logloss: 0.396647	train's average_precision: 0.877048	valid's binary_logloss: 0.412767	valid's average_precision: 0.863789
[4127]	train's binary_logloss: 0.39664	train's average_precision: 0.877052	valid's binary_logloss: 0.412767	valid's average_precision: 0.863789
[4128]	train's binary_logloss: 0.396633	train's average_precision: 0.877057	valid's binary_logloss: 0.412759	valid's average_precisi

[4181]	train's binary_logloss: 0.396315	train's average_precision: 0.87728	valid's binary_logloss: 0.412638	valid's average_precision: 0.863843
[4182]	train's binary_logloss: 0.396309	train's average_precision: 0.877284	valid's binary_logloss: 0.412637	valid's average_precision: 0.863843
[4183]	train's binary_logloss: 0.396302	train's average_precision: 0.877289	valid's binary_logloss: 0.412633	valid's average_precision: 0.863846
[4184]	train's binary_logloss: 0.396295	train's average_precision: 0.877296	valid's binary_logloss: 0.412628	valid's average_precision: 0.863849
[4185]	train's binary_logloss: 0.396292	train's average_precision: 0.877298	valid's binary_logloss: 0.412627	valid's average_precision: 0.863849
[4186]	train's binary_logloss: 0.396285	train's average_precision: 0.877301	valid's binary_logloss: 0.41262	valid's average_precision: 0.86385
[4187]	train's binary_logloss: 0.396278	train's average_precision: 0.877306	valid's binary_logloss: 0.412617	valid's average_precisio

[4238]	train's binary_logloss: 0.39598	train's average_precision: 0.877507	valid's binary_logloss: 0.412485	valid's average_precision: 0.863926
[4239]	train's binary_logloss: 0.395975	train's average_precision: 0.87751	valid's binary_logloss: 0.412486	valid's average_precision: 0.863926
[4240]	train's binary_logloss: 0.395969	train's average_precision: 0.877516	valid's binary_logloss: 0.412488	valid's average_precision: 0.863924
[4241]	train's binary_logloss: 0.395965	train's average_precision: 0.877517	valid's binary_logloss: 0.412487	valid's average_precision: 0.863925
[4242]	train's binary_logloss: 0.39596	train's average_precision: 0.87752	valid's binary_logloss: 0.412483	valid's average_precision: 0.863924
[4243]	train's binary_logloss: 0.395956	train's average_precision: 0.877524	valid's binary_logloss: 0.412486	valid's average_precision: 0.863922
[4244]	train's binary_logloss: 0.39595	train's average_precision: 0.87753	valid's binary_logloss: 0.412482	valid's average_precision: 

[4298]	train's binary_logloss: 0.395643	train's average_precision: 0.877743	valid's binary_logloss: 0.412389	valid's average_precision: 0.863973
[4299]	train's binary_logloss: 0.395637	train's average_precision: 0.877748	valid's binary_logloss: 0.412385	valid's average_precision: 0.863975
[4300]	train's binary_logloss: 0.395633	train's average_precision: 0.87775	valid's binary_logloss: 0.412383	valid's average_precision: 0.863977
[4301]	train's binary_logloss: 0.395628	train's average_precision: 0.877753	valid's binary_logloss: 0.412386	valid's average_precision: 0.863973
[4302]	train's binary_logloss: 0.39562	train's average_precision: 0.87776	valid's binary_logloss: 0.412386	valid's average_precision: 0.863973
[4303]	train's binary_logloss: 0.395616	train's average_precision: 0.877763	valid's binary_logloss: 0.412387	valid's average_precision: 0.863973
[4304]	train's binary_logloss: 0.395608	train's average_precision: 0.877769	valid's binary_logloss: 0.412387	valid's average_precisio

[4356]	train's binary_logloss: 0.395309	train's average_precision: 0.877985	valid's binary_logloss: 0.412277	valid's average_precision: 0.864021
[4357]	train's binary_logloss: 0.395304	train's average_precision: 0.877989	valid's binary_logloss: 0.412273	valid's average_precision: 0.864024
[4358]	train's binary_logloss: 0.395296	train's average_precision: 0.877993	valid's binary_logloss: 0.412271	valid's average_precision: 0.864024
[4359]	train's binary_logloss: 0.395291	train's average_precision: 0.877996	valid's binary_logloss: 0.412274	valid's average_precision: 0.864023
[4360]	train's binary_logloss: 0.395287	train's average_precision: 0.877998	valid's binary_logloss: 0.412275	valid's average_precision: 0.864021
[4361]	train's binary_logloss: 0.395281	train's average_precision: 0.878004	valid's binary_logloss: 0.412278	valid's average_precision: 0.864018
[4362]	train's binary_logloss: 0.395274	train's average_precision: 0.87801	valid's binary_logloss: 0.412276	valid's average_precis

[4413]	train's binary_logloss: 0.394999	train's average_precision: 0.878188	valid's binary_logloss: 0.412211	valid's average_precision: 0.864053
[4414]	train's binary_logloss: 0.394994	train's average_precision: 0.878191	valid's binary_logloss: 0.412208	valid's average_precision: 0.864055
[4415]	train's binary_logloss: 0.394989	train's average_precision: 0.878197	valid's binary_logloss: 0.412202	valid's average_precision: 0.864059
[4416]	train's binary_logloss: 0.394983	train's average_precision: 0.878201	valid's binary_logloss: 0.4122	valid's average_precision: 0.86406
[4417]	train's binary_logloss: 0.394977	train's average_precision: 0.878204	valid's binary_logloss: 0.4122	valid's average_precision: 0.864059
[4418]	train's binary_logloss: 0.394973	train's average_precision: 0.878207	valid's binary_logloss: 0.412202	valid's average_precision: 0.864059
[4419]	train's binary_logloss: 0.394966	train's average_precision: 0.878213	valid's binary_logloss: 0.412199	valid's average_precision:

[4470]	train's binary_logloss: 0.39469	train's average_precision: 0.878403	valid's binary_logloss: 0.412148	valid's average_precision: 0.864067
[4471]	train's binary_logloss: 0.394685	train's average_precision: 0.878406	valid's binary_logloss: 0.412148	valid's average_precision: 0.864066
[4472]	train's binary_logloss: 0.39468	train's average_precision: 0.878408	valid's binary_logloss: 0.412146	valid's average_precision: 0.864067
[4473]	train's binary_logloss: 0.394673	train's average_precision: 0.878414	valid's binary_logloss: 0.412144	valid's average_precision: 0.86407
[4474]	train's binary_logloss: 0.394667	train's average_precision: 0.878419	valid's binary_logloss: 0.412144	valid's average_precision: 0.864069
[4475]	train's binary_logloss: 0.394664	train's average_precision: 0.878421	valid's binary_logloss: 0.412145	valid's average_precision: 0.86407
[4476]	train's binary_logloss: 0.394659	train's average_precision: 0.878424	valid's binary_logloss: 0.412143	valid's average_precision

[4528]	train's binary_logloss: 0.39437	train's average_precision: 0.878629	valid's binary_logloss: 0.412056	valid's average_precision: 0.864105
[4529]	train's binary_logloss: 0.394364	train's average_precision: 0.878633	valid's binary_logloss: 0.412058	valid's average_precision: 0.864103
[4530]	train's binary_logloss: 0.394355	train's average_precision: 0.878639	valid's binary_logloss: 0.412052	valid's average_precision: 0.864107
[4531]	train's binary_logloss: 0.394349	train's average_precision: 0.878643	valid's binary_logloss: 0.412046	valid's average_precision: 0.86411
[4532]	train's binary_logloss: 0.394341	train's average_precision: 0.87865	valid's binary_logloss: 0.412044	valid's average_precision: 0.86411
[4533]	train's binary_logloss: 0.394335	train's average_precision: 0.878653	valid's binary_logloss: 0.412045	valid's average_precision: 0.864108
[4534]	train's binary_logloss: 0.39433	train's average_precision: 0.878657	valid's binary_logloss: 0.412046	valid's average_precision:

[4585]	train's binary_logloss: 0.39405	train's average_precision: 0.878843	valid's binary_logloss: 0.411949	valid's average_precision: 0.864146
[4586]	train's binary_logloss: 0.394045	train's average_precision: 0.878846	valid's binary_logloss: 0.411947	valid's average_precision: 0.864147
[4587]	train's binary_logloss: 0.394041	train's average_precision: 0.878849	valid's binary_logloss: 0.411945	valid's average_precision: 0.864149
[4588]	train's binary_logloss: 0.394034	train's average_precision: 0.878854	valid's binary_logloss: 0.41195	valid's average_precision: 0.864147
[4589]	train's binary_logloss: 0.39403	train's average_precision: 0.878857	valid's binary_logloss: 0.411951	valid's average_precision: 0.864145
[4590]	train's binary_logloss: 0.394024	train's average_precision: 0.87886	valid's binary_logloss: 0.411951	valid's average_precision: 0.864145
[4591]	train's binary_logloss: 0.39402	train's average_precision: 0.878864	valid's binary_logloss: 0.41195	valid's average_precision: 

[4644]	train's binary_logloss: 0.393724	train's average_precision: 0.879064	valid's binary_logloss: 0.411872	valid's average_precision: 0.864182
[4645]	train's binary_logloss: 0.393719	train's average_precision: 0.879068	valid's binary_logloss: 0.411869	valid's average_precision: 0.864183
[4646]	train's binary_logloss: 0.393715	train's average_precision: 0.879071	valid's binary_logloss: 0.411866	valid's average_precision: 0.864187
[4647]	train's binary_logloss: 0.393708	train's average_precision: 0.879076	valid's binary_logloss: 0.411859	valid's average_precision: 0.864188
[4648]	train's binary_logloss: 0.3937	train's average_precision: 0.879082	valid's binary_logloss: 0.411858	valid's average_precision: 0.864188
[4649]	train's binary_logloss: 0.393695	train's average_precision: 0.879085	valid's binary_logloss: 0.411854	valid's average_precision: 0.86419
[4650]	train's binary_logloss: 0.393689	train's average_precision: 0.87909	valid's binary_logloss: 0.411849	valid's average_precision

[4701]	train's binary_logloss: 0.393447	train's average_precision: 0.879255	valid's binary_logloss: 0.411775	valid's average_precision: 0.864222
[4702]	train's binary_logloss: 0.393442	train's average_precision: 0.879259	valid's binary_logloss: 0.411773	valid's average_precision: 0.864222
[4703]	train's binary_logloss: 0.393436	train's average_precision: 0.879262	valid's binary_logloss: 0.411775	valid's average_precision: 0.86422
[4704]	train's binary_logloss: 0.393429	train's average_precision: 0.879265	valid's binary_logloss: 0.411773	valid's average_precision: 0.864222
[4705]	train's binary_logloss: 0.393424	train's average_precision: 0.879268	valid's binary_logloss: 0.411772	valid's average_precision: 0.864221
[4706]	train's binary_logloss: 0.393417	train's average_precision: 0.879273	valid's binary_logloss: 0.411763	valid's average_precision: 0.864227
[4707]	train's binary_logloss: 0.39341	train's average_precision: 0.879279	valid's binary_logloss: 0.411759	valid's average_precisi

[4761]	train's binary_logloss: 0.393148	train's average_precision: 0.879453	valid's binary_logloss: 0.411682	valid's average_precision: 0.864256
[4762]	train's binary_logloss: 0.393141	train's average_precision: 0.879459	valid's binary_logloss: 0.411673	valid's average_precision: 0.864262
[4763]	train's binary_logloss: 0.393136	train's average_precision: 0.879461	valid's binary_logloss: 0.411673	valid's average_precision: 0.864262
[4764]	train's binary_logloss: 0.393132	train's average_precision: 0.879465	valid's binary_logloss: 0.41167	valid's average_precision: 0.864266
[4765]	train's binary_logloss: 0.393127	train's average_precision: 0.879471	valid's binary_logloss: 0.411664	valid's average_precision: 0.864269
[4766]	train's binary_logloss: 0.393121	train's average_precision: 0.879475	valid's binary_logloss: 0.411667	valid's average_precision: 0.864268
[4767]	train's binary_logloss: 0.393115	train's average_precision: 0.879478	valid's binary_logloss: 0.411667	valid's average_precis

[4818]	train's binary_logloss: 0.392812	train's average_precision: 0.879699	valid's binary_logloss: 0.411558	valid's average_precision: 0.864314
[4819]	train's binary_logloss: 0.392807	train's average_precision: 0.879703	valid's binary_logloss: 0.411562	valid's average_precision: 0.864309
[4820]	train's binary_logloss: 0.392801	train's average_precision: 0.879707	valid's binary_logloss: 0.411568	valid's average_precision: 0.864302
[4821]	train's binary_logloss: 0.392795	train's average_precision: 0.879712	valid's binary_logloss: 0.411567	valid's average_precision: 0.864302
[4822]	train's binary_logloss: 0.392789	train's average_precision: 0.879716	valid's binary_logloss: 0.411567	valid's average_precision: 0.864301
[4823]	train's binary_logloss: 0.392785	train's average_precision: 0.879718	valid's binary_logloss: 0.411563	valid's average_precision: 0.864304
[4824]	train's binary_logloss: 0.392779	train's average_precision: 0.879723	valid's binary_logloss: 0.411557	valid's average_preci

Build Predictions

In [82]:
preds_prob_train2 = bst.predict(train_df.drop(["is_duplicate"], axis=1), num_iteration=bst.best_iteration)
preds_prob_val2 = bst.predict(val_df.drop(["is_duplicate"], axis=1), num_iteration=bst.best_iteration)
print("The train log loss is:", log_loss(y_train, preds_prob_train2))
print("The validation log loss is:", log_loss(y_val, preds_prob_val2))


preds_prob_test2 = bst.predict(test_df.drop(["is_duplicate"], axis=1), num_iteration=bst.best_iteration)

The train log loss is: 0.3926922301868105
The validation log loss is: 0.41152569558955826


### We can see that based on this model, the log loss and precision performs slightly worse than the previous vanilla model.
- We will proceed to perform bagging; taking the bagged of these two boosted trees.
<br>Bagged Boosted Trees:

    1. Predict with each N
    2. Average the predictions to get the final prediction


#### We first evaluate the bagged boosted trees with validation.

In [91]:
preds_prob_val_final = (0.5 * preds_prob_val[:, 1]) + (0.5 * preds_prob_val2)
print("The bagged validation log loss is:", log_loss(y_val, preds_prob_val_final))

preds_val_final = [1 if n >= 0.5 else 0 for n in preds_prob_val_final]
print("The bagged validation precision is:", precision_score(y_val, preds_val_final ))

The bagged validation log loss is: 0.20539651589152416
The bagged validation precision is: 0.9539632584351899


#### We can see that there is a slight increase in log loss but a much significant gain in precision.

### We now proceed to build the features for the final model
#### We standardise all predicted probabilities to be the predicted probabilities of class 0, since these values are the probabilities of class 1 (duplicated), we subtract them from 1 and output as the feature for the final model.

In [93]:
preds_prob_train = 1 - preds_prob_train[:, 1] # First Boosted Tree
preds_prob_train2 = 1 - preds_prob_train2 # Second Boosted Tree


preds_prob_test = 1 - preds_prob_test # First Boosted Tree
preds_prob_test2 = 1 - preds_prob_test2# Second Boosted Tree

In [94]:
preds_train_final = (0.5 * preds_prob_train) + (0.5 * preds_prob_train2)
preds_train_final


preds_test_final = (0.5 * preds_prob_test) + (0.5 * preds_prob_test2)
preds_test_final

array([0.85745791, 0.43762612, 0.34171523, ..., 0.85739481, 0.93460334,
       0.99927077])

array([0.97370239, 0.79672051, 0.99710103, ..., 0.94932581, 0.56722549,
       0.33803203])

#### Output predicted probabilities into pickle for final model

In [96]:
pickle.dump(preds_test_final, open("../data/processed/predictions_full_LGBM_test.p", "wb" ) )
np.savetxt("../data/processed/predictions_full_LGBM_train.csv", preds_train_final, delimiter=",")
np.savetxt("../data/processed/predictions_full_LGBM_test.csv", preds_test_final, delimiter=",")