In [30]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder

import pandas_profiling
from tqdm import tqdm_notebook
import numpy as np
%matplotlib inline

In [31]:
df_train = pd.read_csv('onetwotrip_challenge_train.csv')
df_test = pd.read_csv('onetwotrip_challenge_test.csv')

In [32]:
ltr = len(df_train)
df_train = df_train.sort_values(by=['userid', 'field4'])
df_test = df_test.sort_values(by=['userid', 'field4'])
df_test['orderid'] += ltr

In [33]:
data = pd.concat([df_train, df_test], axis=0).reset_index(drop=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [5]:
data['userid'] = LabelEncoder().fit_transform(data['userid'])

In [6]:
data['order_diff'] = data['orderid'] - data.groupby('userid')['orderid'].shift()
data['order_pct_change'] = data.groupby('userid')['orderid'].pct_change() 
data['order_diff_shift'] = data.groupby('userid')['order_diff'].shift(-1)
data['diff_order_cumsum'] = data.groupby('userid')['order_diff'].cumsum()
data['num_orders'] = data.groupby('userid')['userid'].transform('count')
data['num_orders_bin'] = np.where(data['num_orders'] > 1, 1, 0)
data['adult_pct'] = data['field24'] / data['field15']
data['child_pct'] = data['field28'] / data['field15']
data['baby_pct'] = data['field9'] / data['field15']
data['is_first'] = np.where(data['field4'] == 1, 1, 0)
data['first_order_diff'] = data.groupby('userid')['field0'].cumsum()
data['field0_adj'] = data.groupby('userid')['field0'].apply(lambda x: x.replace(0, method='ffill'))

In [7]:
target_cols = list(set(data.columns) - {'goal1', 'goal21', 'goal22', 'goal23', 'goal24', 'goal25', 'orderid'})
for col in tqdm_notebook(target_cols):
    data['vc_' + col] = data.groupby(col)[col].transform("count")
    data['mean_' + col] = data.groupby(col)[col].transform("mean")
    data['std_' + col] = data.groupby(col)[col].transform("std")
    data['prev_' + col] = data.groupby('userid')[col].shift(1)
    data['next_' + col] = data.groupby('userid')[col].shift(-1)
    data['prev2_' + col] = data.groupby('userid')[col].shift(2)
    data['next2_' + col] = data.groupby('userid')[col].shift(-2)
    data['prev3_' + col] = data.groupby('userid')[col].shift(3)
    data['next3_' + col] = data.groupby('userid')[col].shift(-3)
    data['cnt_userid_' + col] = data['userid'].map(data.groupby('userid')[col].apply(lambda x: x.unique().size).to_dict())
    data['ratio_userid_' + col] = data['userid'].map(data.groupby('userid')[col].apply(lambda x: x.unique().size / len(x)).to_dict())

HBox(children=(IntProgress(value=0, max=48), HTML(value='')))




In [8]:
categorical_cols = ['field2', 'field3', 'field5', 'field7', 'field8', 'field9', 'field10', 'field18', 'field19', 
                    'field20', 'field21', 'field24', 'field27', 'field28', 'field29', 'indicator_goal21', 
                    'indicator_goal22', 'indicator_goal23', 'indicator_goal24', 'indicator_goal25']

In [9]:
def process_column(data, df_train, enc_col, col):
    df_group = df_train.groupby(col)[enc_col]
    building_mean = df_group.mean().astype(np.float16)
    building_median = df_group.median().astype(np.float16)
    building_min = df_group.min().astype(np.float16)
    building_max = df_group.max().astype(np.float16)
    building_std = df_group.std().astype(np.float16)

    data[f'mean_{enc_col}_{col}'] = data[col].map(building_mean)
#         data[f'median_enc_{col}'] = data[col].map(building_median)
#         data[f'min_enc_{col}'] = data[col].map(building_min)
#         data[f'max_enc_{col}'] = data[col].map(building_max)
    data[f'std_{enc_col}_{col}'] = data[col].map(building_std)

def feature_engineering(data, df_train):
    for enc_col in ['goal1', 'goal21', 'goal22', 'goal23', 'goal24', 'goal25']:
        for col in tqdm_notebook(categorical_cols):
            process_column(data, df_train, enc_col, col)
    return data

In [10]:
excluded_cols = {'goal1', 'goal21', 'goal22', 'goal23', 'goal24', 'goal25'}
train_cols = list(set(data.columns) - excluded_cols)

In [11]:
data_single = data[data['num_orders_bin'] == 0]
train_idx_single = data_single[~data_single['goal1'].isna()].index
test_idx_single = data_single[data_single['goal1'].isna()].index

In [12]:
param_lgb = {
    'bagging_fraction': 0.8,
    'bagging_freq': 1,
    'boost': 'gbdt',
    'feature_fraction': 0.8,
    'learning_rate': 0.01,
    'metric':'auc',
    'num_leaves': 31,
    'num_threads': 8,
    'objective': 'binary',
}


kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)


pred_single = pd.DataFrame(index=data.loc[test_idx_single, 'orderid'])
score = []

for i , (train_index, test_index) in enumerate(kf.split(data.loc[train_idx_single, :], 
                                                        data.loc[train_idx_single, 'goal1'])):
    data = feature_engineering(data, data.iloc[train_index])
    X_train, y_train = data.loc[train_index, train_cols], data.loc[train_index, 'goal1']
    X_test, y_test = data.loc[test_index, train_cols], data.loc[test_index, 'goal1']
    tr = lgb.Dataset(np.array(X_train), np.array(y_train))
    te = lgb.Dataset(np.array(X_test), np.array(y_test), reference=tr)
    bst = lgb.train(param_lgb, tr, num_boost_round=10000, 
            valid_sets=te, early_stopping_rounds=int(5 / param_lgb['learning_rate']), verbose_eval=100)
    score.append(bst.best_score['valid_0']['auc'])
    
    pred_single[str(i)] = bst.predict(np.array(data[train_cols])[test_idx_single, :])

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.694391
[200]	valid_0's auc: 0.705248
[300]	valid_0's auc: 0.709845
[400]	valid_0's auc: 0.711579
[500]	valid_0's auc: 0.708383
[600]	valid_0's auc: 0.707868
[700]	valid_0's auc: 0.706854
[800]	valid_0's auc: 0.703439
Early stopping, best iteration is:
[328]	valid_0's auc: 0.712717


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.722788
[200]	valid_0's auc: 0.728564
[300]	valid_0's auc: 0.736723
[400]	valid_0's auc: 0.738736
[500]	valid_0's auc: 0.740045
[600]	valid_0's auc: 0.741584
[700]	valid_0's auc: 0.744103
[800]	valid_0's auc: 0.745288
[900]	valid_0's auc: 0.745494
[1000]	valid_0's auc: 0.744695
[1100]	valid_0's auc: 0.7457
[1200]	valid_0's auc: 0.745405
[1300]	valid_0's auc: 0.744214
Early stopping, best iteration is:
[836]	valid_0's auc: 0.746969


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.71839
[200]	valid_0's auc: 0.72696
[300]	valid_0's auc: 0.730747
[400]	valid_0's auc: 0.728847
[500]	valid_0's auc: 0.726704
[600]	valid_0's auc: 0.724021
[700]	valid_0's auc: 0.721825
[800]	valid_0's auc: 0.720302
Early stopping, best iteration is:
[329]	valid_0's auc: 0.73138


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.67883
[200]	valid_0's auc: 0.686655
[300]	valid_0's auc: 0.685863
[400]	valid_0's auc: 0.682831
[500]	valid_0's auc: 0.682296
[600]	valid_0's auc: 0.681917
Early stopping, best iteration is:
[196]	valid_0's auc: 0.687442


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.706254
[200]	valid_0's auc: 0.719044
[300]	valid_0's auc: 0.721556
[400]	valid_0's auc: 0.721763
[500]	valid_0's auc: 0.721753
[600]	valid_0's auc: 0.721326
[700]	valid_0's auc: 0.719631
[800]	valid_0's auc: 0.717477
Early stopping, best iteration is:
[381]	valid_0's auc: 0.722938


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.70473
[200]	valid_0's auc: 0.718773
[300]	valid_0's auc: 0.727127
[400]	valid_0's auc: 0.730513
[500]	valid_0's auc: 0.731212
[600]	valid_0's auc: 0.730154
[700]	valid_0's auc: 0.730384
[800]	valid_0's auc: 0.729086
[900]	valid_0's auc: 0.727661
Early stopping, best iteration is:
[484]	valid_0's auc: 0.731772


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.683168
[200]	valid_0's auc: 0.69557
[300]	valid_0's auc: 0.698666
[400]	valid_0's auc: 0.697744
[500]	valid_0's auc: 0.698197
[600]	valid_0's auc: 0.699558
[700]	valid_0's auc: 0.700084
[800]	valid_0's auc: 0.698019
[900]	valid_0's auc: 0.701301
[1000]	valid_0's auc: 0.700148
[1100]	valid_0's auc: 0.700392
[1200]	valid_0's auc: 0.700819
[1300]	valid_0's auc: 0.70011
[1400]	valid_0's auc: 0.700135
Early stopping, best iteration is:
[903]	valid_0's auc: 0.701552


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.666283
[200]	valid_0's auc: 0.66892
[300]	valid_0's auc: 0.67435
[400]	valid_0's auc: 0.670231
[500]	valid_0's auc: 0.671668
Early stopping, best iteration is:
[30]	valid_0's auc: 0.675983


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.678091
[200]	valid_0's auc: 0.684402
[300]	valid_0's auc: 0.689142
[400]	valid_0's auc: 0.690927
[500]	valid_0's auc: 0.692667
[600]	valid_0's auc: 0.694607
[700]	valid_0's auc: 0.693703
[800]	valid_0's auc: 0.695357
[900]	valid_0's auc: 0.695551
[1000]	valid_0's auc: 0.692092
[1100]	valid_0's auc: 0.691
[1200]	valid_0's auc: 0.691765
[1300]	valid_0's auc: 0.691463
Early stopping, best iteration is:
[890]	valid_0's auc: 0.696103


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.655955
[200]	valid_0's auc: 0.665783
[300]	valid_0's auc: 0.673457
[400]	valid_0's auc: 0.674794
[500]	valid_0's auc: 0.673761
[600]	valid_0's auc: 0.674327
[700]	valid_0's auc: 0.673513
[800]	valid_0's auc: 0.671807
Early stopping, best iteration is:
[368]	valid_0's auc: 0.675784


In [13]:
scores_df = pd.read_csv('val_scores.csv')
scores_df = pd.concat([scores_df, pd.Series(score).to_csv('val_scores.csv')], axis=1)
scores_df.to_csv('val_scores.csv')

  


In [14]:
data_multi = data[data['num_orders_bin'] == 1]
train_idx_multi = data_multi[~data_multi['goal1'].isna()].index
test_idx_multi= data_multi[data_multi['goal1'].isna()].index

In [15]:
param_lgb = {
    'bagging_fraction': 0.8,
    'bagging_freq': 1,
    'boost': 'gbdt',
    'feature_fraction': 0.8,
    'learning_rate': 0.01,
    'metric':'auc',
    'num_leaves': 31,
    'num_threads': 8,
    'objective': 'binary',
}


kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)


pred_multi = pd.DataFrame(index=data.loc[test_idx_multi, 'orderid'])
score = []

for i , (train_index, test_index) in enumerate(kf.split(data.loc[train_idx_multi, :], 
                                                        data.loc[train_idx_multi, 'goal1'])):
    data = feature_engineering(data, data.iloc[train_index])
    X_train, y_train = data.loc[train_index, train_cols], data.loc[train_index, 'goal1']
    X_test, y_test = data.loc[test_index, train_cols], data.loc[test_index, 'goal1']
    tr = lgb.Dataset(np.array(X_train), np.array(y_train))
    te = lgb.Dataset(np.array(X_test), np.array(y_test), reference=tr)
    bst = lgb.train(param_lgb, tr, num_boost_round=10000, 
            valid_sets=te, early_stopping_rounds=int(5 / param_lgb['learning_rate']), verbose_eval=100)
    score.append(bst.best_score['valid_0']['auc'])
    
    pred_multi[str(i)] = bst.predict(np.array(data[train_cols])[test_idx_multi, :])

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.68932
[200]	valid_0's auc: 0.695576
[300]	valid_0's auc: 0.699149
[400]	valid_0's auc: 0.700781
[500]	valid_0's auc: 0.701956
[600]	valid_0's auc: 0.703209
[700]	valid_0's auc: 0.703347
[800]	valid_0's auc: 0.702201
[900]	valid_0's auc: 0.700932
[1000]	valid_0's auc: 0.700457
[1100]	valid_0's auc: 0.700129
Early stopping, best iteration is:
[645]	valid_0's auc: 0.704106


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.696868
[200]	valid_0's auc: 0.699579
[300]	valid_0's auc: 0.701343
[400]	valid_0's auc: 0.700232
[500]	valid_0's auc: 0.699645
[600]	valid_0's auc: 0.699657
[700]	valid_0's auc: 0.698936
Early stopping, best iteration is:
[280]	valid_0's auc: 0.702225


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.686971
[200]	valid_0's auc: 0.695237
[300]	valid_0's auc: 0.699163
[400]	valid_0's auc: 0.700634
[500]	valid_0's auc: 0.703422
[600]	valid_0's auc: 0.704755
[700]	valid_0's auc: 0.704123
[800]	valid_0's auc: 0.703743
[900]	valid_0's auc: 0.703453
[1000]	valid_0's auc: 0.703281
[1100]	valid_0's auc: 0.704277
Early stopping, best iteration is:
[605]	valid_0's auc: 0.704883


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.686291
[200]	valid_0's auc: 0.69318
[300]	valid_0's auc: 0.695076
[400]	valid_0's auc: 0.698654
[500]	valid_0's auc: 0.702978
[600]	valid_0's auc: 0.704336
[700]	valid_0's auc: 0.706333
[800]	valid_0's auc: 0.706859
[900]	valid_0's auc: 0.704774
[1000]	valid_0's auc: 0.704105
[1100]	valid_0's auc: 0.703626
[1200]	valid_0's auc: 0.702403
[1300]	valid_0's auc: 0.701782
Early stopping, best iteration is:
[806]	valid_0's auc: 0.706865


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.7164
[200]	valid_0's auc: 0.719033
[300]	valid_0's auc: 0.722747
[400]	valid_0's auc: 0.721664
[500]	valid_0's auc: 0.719485
[600]	valid_0's auc: 0.718084
[700]	valid_0's auc: 0.716986
[800]	valid_0's auc: 0.714463
Early stopping, best iteration is:
[330]	valid_0's auc: 0.723734


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.691911
[200]	valid_0's auc: 0.700237
[300]	valid_0's auc: 0.702479
[400]	valid_0's auc: 0.703752
[500]	valid_0's auc: 0.702325
[600]	valid_0's auc: 0.70174
[700]	valid_0's auc: 0.701987
[800]	valid_0's auc: 0.70134
[900]	valid_0's auc: 0.700603
Early stopping, best iteration is:
[428]	valid_0's auc: 0.704009


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.698898
[200]	valid_0's auc: 0.701359
[300]	valid_0's auc: 0.702956
[400]	valid_0's auc: 0.700836
[500]	valid_0's auc: 0.701298
[600]	valid_0's auc: 0.702681
[700]	valid_0's auc: 0.7025
[800]	valid_0's auc: 0.70133
Early stopping, best iteration is:
[302]	valid_0's auc: 0.703227


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.667785
[200]	valid_0's auc: 0.675055
[300]	valid_0's auc: 0.68124
[400]	valid_0's auc: 0.685399
[500]	valid_0's auc: 0.687623
[600]	valid_0's auc: 0.690038
[700]	valid_0's auc: 0.690469
[800]	valid_0's auc: 0.691066
[900]	valid_0's auc: 0.692318
[1000]	valid_0's auc: 0.693453
[1100]	valid_0's auc: 0.693889
[1200]	valid_0's auc: 0.695059
[1300]	valid_0's auc: 0.695327
[1400]	valid_0's auc: 0.696011
[1500]	valid_0's auc: 0.695199
[1600]	valid_0's auc: 0.695652
[1700]	valid_0's auc: 0.696384
[1800]	valid_0's auc: 0.69705
[1900]	valid_0's auc: 0.696073
[2000]	valid_0's auc: 0.696187
[2100]	valid_0's auc: 0.696228
[2200]	valid_0's auc: 0.696984
Early stopping, best iteration is:
[1768]	valid_0's auc: 0.69728


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.706828
[200]	valid_0's auc: 0.71474
[300]	valid_0's auc: 0.720892
[400]	valid_0's auc: 0.723161
[500]	valid_0's auc: 0.725989
[600]	valid_0's auc: 0.726512
[700]	valid_0's auc: 0.727257
[800]	valid_0's auc: 0.726228
[900]	valid_0's auc: 0.727484
[1000]	valid_0's auc: 0.728004
[1100]	valid_0's auc: 0.726853
[1200]	valid_0's auc: 0.725534
[1300]	valid_0's auc: 0.723663
Early stopping, best iteration is:
[841]	valid_0's auc: 0.728549


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Training until validation scores don't improve for 500 rounds.
[100]	valid_0's auc: 0.710308
[200]	valid_0's auc: 0.713827
[300]	valid_0's auc: 0.718182
[400]	valid_0's auc: 0.719505
[500]	valid_0's auc: 0.720645
[600]	valid_0's auc: 0.719485
[700]	valid_0's auc: 0.719263
[800]	valid_0's auc: 0.717778
[900]	valid_0's auc: 0.714613
[1000]	valid_0's auc: 0.7131
Early stopping, best iteration is:
[522]	valid_0's auc: 0.72085


In [16]:
scores_df = pd.read_csv('val_scores.csv')
scores_df = pd.concat([scores_df, pd.Series(score).to_csv('val_scores.csv')], axis=1)
scores_df.to_csv('val_scores.csv')

  


In [17]:
pred = pd.concat([pred_single, pred_multi], axis=0)

In [28]:
pred = pred.sort_index()
pred.index = pred.index - ltr

ans = pd.DataFrame(index=pred.index)
ans['proba'] = pred.mean(axis=1)
ans.to_csv('mean.csv')