In [1]:
import catboost
from catboost import CatBoostClassifier, Pool, cv
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import gc
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer
from sklearn.decomposition import TruncatedSVD

from sklearn.preprocessing import scale
import warnings
warnings.filterwarnings("ignore")

In [2]:
age_test = pd.read_csv('../age_test.csv', header = None, names = ['uId'])
age_train = pd.read_csv('../age_train.csv', header = None, names = ['uId','age_group'])
data = pd.concat([age_train,age_test], axis = 0,sort=True).reset_index()
data.drop(['index'],axis=1,inplace=True)
del age_test
del age_train

In [3]:
user_basic_info = pd.read_csv('../user_basic_info.csv',header= None, names=['uId','gender','city','prodName','ramCapacity','ramLeftRation','romCapacity','romLeftRation','color','fontSize','ct','carrier','os'])
user_behavior_info = pd.read_csv('../user_behavior_info.csv', header = None, names =  ['uId','bootTimes','AFuncTimes','BFuncTimes','CFuncTimes','DFuncTimes','EFuncTimes','FFuncTimes','FFuncSum'])
data = data.merge(user_basic_info)
data = data.merge(user_behavior_info)
del user_basic_info
del user_behavior_info

In [4]:
data['romLeftRation'][data.romLeftRation>1] = np.nan
data['ramLeftRation'][data.ramLeftRation>1] = np.nan

In [16]:
data['ramLeftCapacity'] = data['ramCapacity'] * data['ramLeftRation']
data['romLeftCapacity'] = data['romCapacity'] * data['romLeftRation']

In [5]:
data['FuncTimes_Sum'] = data[['AFuncTimes','BFuncTimes','CFuncTimes','DFuncTimes','EFuncTimes','FFuncTimes']].sum(axis=1)

In [6]:
%%time
for i in ['A','B','C','D','E','F']:
    data['{}FuncTimes_rate'.format(i)] = data['{}FuncTimes'.format(i)] / data['FuncTimes_Sum']

CPU times: user 113 ms, sys: 2.56 ms, total: 116 ms
Wall time: 43 ms


In [7]:
tmp = data.groupby(['prodName'])['bootTimes'].mean().reset_index()
tmp.columns=['prodName','bootTimes_mean']
data = data.merge(tmp, on='prodName', how='left')
tmp = data.groupby(['prodName'])['fontSize'].mean().reset_index()
tmp.columns=['prodName','fontSize_mean']
data = data.merge(tmp, on='prodName', how='left')
del tmp
gc.collect()

105

In [8]:
%%time
user_cate_stat_weekday_pv = pd.read_hdf('../../../Competition/huawei/age/zero/feature/user_cate_stat_weekday_pv.h5', key='data')
data = data.merge(user_cate_stat_weekday_pv, how='left', on='uId')
print(list(user_cate_stat_weekday_pv.columns))
del user_cate_stat_weekday_pv

['uId', 'weekday_0_total_duration', 'weekday_1_total_duration', 'weekday_2_total_duration', 'weekday_3_total_duration', 'weekday_4_total_duration', 'weekday_5_total_duration', 'weekday_6_total_duration', 'weekday_0_total_times', 'weekday_1_total_times', 'weekday_2_total_times', 'weekday_3_total_times', 'weekday_4_total_times', 'weekday_5_total_times', 'weekday_6_total_times']
CPU times: user 1.66 s, sys: 753 ms, total: 2.41 s
Wall time: 2.41 s


In [9]:
%%time
user_all_app_count = pd.read_hdf('../../../Competition/huawei/age/zero/feature/user_all_app_count.h5', key='data')
data = data.merge(user_all_app_count, how='left', on='uId')
print(list(user_all_app_count.columns))
del user_all_app_count

['uId', 'all_app_cnt', 'actived_app_cnt', 'other_app_cnt', 'actived_app_cnt_rate', 'other_app_cnt_rate', 'app_sum', 'actived_not_used_cnt', 'actived_used_rate']
CPU times: user 1.64 s, sys: 632 ms, total: 2.27 s
Wall time: 2.27 s


In [10]:
%%time
user_cate_stat_rate = pd.read_hdf('../../../Competition/huawei/age/zero/feature/user_cate_stat_rate.h5', key='data')
data = data.merge(user_cate_stat_rate, how='left', on='uId')
print(list(user_cate_stat_rate.columns))
del user_cate_stat_rate
gc.collect()

['主题个性_total_duration_rate', '主题铃声_total_duration_rate', '休闲娱乐_total_duration_rate', '休闲游戏_total_duration_rate', '休闲益智_total_duration_rate', '体育射击_total_duration_rate', '体育竞速_total_duration_rate', '便捷生活_total_duration_rate', '儿童_total_duration_rate', '出行导航_total_duration_rate', '动作冒险_total_duration_rate', '动作射击_total_duration_rate', '医疗健康_total_duration_rate', '合作壁纸*_total_duration_rate', '商务_total_duration_rate', '图书阅读_total_duration_rate', '学习办公_total_duration_rate', '实用工具_total_duration_rate', '影音娱乐_total_duration_rate', '拍摄美化_total_duration_rate', '教育_total_duration_rate', '新闻阅读_total_duration_rate', '旅游住宿_total_duration_rate', '棋牌天地_total_duration_rate', '棋牌桌游_total_duration_rate', '模拟游戏_total_duration_rate', '汽车_total_duration_rate', '电子书籍_total_duration_rate', '益智棋牌_total_duration_rate', '社交通讯_total_duration_rate', '策略游戏_total_duration_rate', '经营策略_total_duration_rate', '网络游戏_total_duration_rate', '美食_total_duration_rate', '表盘个性_total_duration_rate', '角色扮演_total_duration_rate', 

In [11]:
%%time
X_times = pd.read_hdf('../../../Competition/huawei/age/zero/feature/X_times.h5', key='data')
data = data.merge(X_times, how='left', on='uId')
#print(list(X_days.columns))
del X_days
gc.collect()

CPU times: user 6.36 s, sys: 6.16 s, total: 12.5 s
Wall time: 30.7 s


In [12]:
%%time
X_duration = pd.read_hdf('../../../Competition/huawei/age/zero/feature/X_duration.h5', key='data')
data = data.merge(X_duration, how='left', on='uId')
#print(list(X_days.columns))
del X_days
gc.collect()

CPU times: user 6.5 s, sys: 6.82 s, total: 13.3 s
Wall time: 31.6 s


In [13]:
%%time
user_cate_stat_weekday_rate = pd.read_hdf('../../../Competition/huawei/age/zero/feature/user_cate_stat_weekday_rate.h5', key='data')
data = data.merge(user_cate_stat_weekday_rate, how='left', on='uId')
print(list(user_cate_stat_weekday_rate.columns))
del user_cate_stat_weekday_rate

['weekday_0_total_duration_rate', 'weekday_1_total_duration_rate', 'weekday_2_total_duration_rate', 'weekday_3_total_duration_rate', 'weekday_4_total_duration_rate', 'weekday_5_total_duration_rate', 'weekday_6_total_duration_rate', 'weekday_0_total_times_rate', 'weekday_1_total_times_rate', 'weekday_2_total_times_rate', 'weekday_3_total_times_rate', 'weekday_4_total_times_rate', 'weekday_5_total_times_rate', 'weekday_6_total_times_rate', 'uId', 'weekday_duration_std', 'weekday_times_std']
CPU times: user 3.13 s, sys: 3.5 s, total: 6.63 s
Wall time: 8.44 s


In [17]:
%%time
user_app_actived = pd.read_csv('../user_app_actived.csv', header = None, names =['uId','appId'])
app_usage5000 = pd.read_hdf('../../../Competition/huawei/age/zero/feature/app_usage5000.h5', key='data')
app_usage5000.columns=['uId', 'usage_appId']
data = data.merge(user_app_actived, on='uId', how='left')
data = data.merge(app_usage5000, on='uId', how='left')
del user_app_actived
del app_usage5000
gc.collect()

CPU times: user 21.9 s, sys: 10.7 s, total: 32.6 s
Wall time: 37.6 s


In [18]:
%%time
actived_app = CountVectorizer(token_pattern='a\d+',binary=True).fit_transform(data['appId'])
weight = np.load('./weight_bias/Xapp_weight2_dense1.npy')
bias = np.load('./weight_bias/Xapp_bias2_dense1.npy')
actived_app_I = actived_app.dot(weight)+bias
weight = np.load('./weight_bias/Xapp_weight2_dense2.npy')
bias = np.load('./weight_bias/Xapp_bias2_dense2.npy')
actived_app_II = actived_app_I.dot(weight)+bias
weight = np.load('./weight_bias/Xapp_weight2_dense3.npy')
bias = np.load('./weight_bias/Xapp_bias2_dense3.npy')
actived_app_III = actived_app_II.dot(weight)+bias
active_app_df_I = pd.DataFrame(actived_app_I, columns=['actived_app_{}'.format(i) for i in range(actived_app_I.shape[1])])
active_app_df_II = pd.DataFrame(actived_app_II, columns=['actived_app_II_{}'.format(i) for i in range(actived_app_II.shape[1])])
active_app_df_III = pd.DataFrame(actived_app_III, columns=['actived_app_III_{}'.format(i) for i in range(actived_app_III.shape[1])])
data = pd.concat([data,active_app_df_I,active_app_df_II,active_app_df_III],axis=1)
del actived_app
del actived_app_I
del actived_app_II
del actived_app_III
del weight
del bias
del active_app_df_I
del active_app_df_II
del active_app_df_III
gc.collect()

CPU times: user 1min 40s, sys: 27.7 s, total: 2min 8s
Wall time: 1min 58s


In [None]:
%%time
X_usage = CountVectorizer(token_pattern='a\d+', binary=True).fit_transform(data['usage_appId'].fillna('-1'))
X_usage_weight = np.load('./weight_bias/X_usage_weight2_dense1.npy')
X_usage_bias = np.load('./weight_bias/X_usage_bias2_dense1.npy')
X_usage_I = X_usage.dot(X_usage_weight) + X_usage_bias
X_usage_weight = np.load('./weight_bias/X_usage_weight2_dense2.npy')
X_usage_bias = np.load('./weight_bias/X_usage_bias2_dense2.npy')
X_usage_II = X_usage_I.dot(X_usage_weight) + X_usage_bias
X_usage_weight = np.load('./weight_bias/X_usage_weight2_dense3.npy')
X_usage_bias = np.load('./weight_bias/X_usage_bias2_dense3.npy')
X_usage_III = X_usage_II.dot(X_usage_weight) + X_usage_bias
X_usage_df_I = pd.DataFrame(X_usage_I, columns=['X_usage_I_{}'.format(i) for i in range(X_usage_I.shape[1])])
X_usage_df_II = pd.DataFrame(X_usage_II, columns=['X_usage_II_{}'.format(i) for i in range(X_usage_II.shape[1])])
X_usage_df_III = pd.DataFrame(X_usage_III, columns=['X_usage_III_{}'.format(i) for i in range(X_usage_III.shape[1])])
data = pd.concat([data, X_usage_df_I, X_usage_df_II, X_usage_df_III], axis=1)
del X_usage
del X_usage_I
del X_usage_II
del X_usage_III
del X_usage_weight
del X_usage_bias
del X_usage_df_I
del X_usage_df_II
del X_usage_df_III
gc.collect()

CPU times: user 1min 8s, sys: 25.9 s, total: 1min 34s
Wall time: 1min 25s


In [None]:
%%time
from tqdm import tqdm_notebook
cate_features = ['city','prodName','color','ct','carrier','gender','fontSize','os']
for feat in tqdm_notebook(cate_features):
    data[feat] = LabelEncoder().fit_transform(data[feat].fillna('-1').apply(str))

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))


CPU times: user 25.6 s, sys: 416 ms, total: 26 s
Wall time: 26 s


In [None]:
feat_I=['weekday_0_total_duration_rate', 'weekday_1_total_duration_rate', 'weekday_2_total_duration_rate', 'weekday_3_total_duration_rate', 'weekday_4_total_duration_rate', 'weekday_5_total_duration_rate', 'weekday_6_total_duration_rate', 'weekday_0_total_times_rate', 'weekday_1_total_times_rate', 'weekday_2_total_times_rate', 'weekday_3_total_times_rate', 'weekday_4_total_times_rate', 'weekday_5_total_times_rate', 'weekday_6_total_times_rate', 'weekday_duration_std', 'weekday_times_std']
feat_II =['主题个性_total_duration_rate', '主题铃声_total_duration_rate', '休闲娱乐_total_duration_rate', '休闲游戏_total_duration_rate', '休闲益智_total_duration_rate', '体育射击_total_duration_rate', '体育竞速_total_duration_rate', '便捷生活_total_duration_rate', '儿童_total_duration_rate', '出行导航_total_duration_rate', '动作冒险_total_duration_rate', '动作射击_total_duration_rate', '医疗健康_total_duration_rate', '合作壁纸*_total_duration_rate', '商务_total_duration_rate', '图书阅读_total_duration_rate', '学习办公_total_duration_rate', '实用工具_total_duration_rate', '影音娱乐_total_duration_rate', '拍摄美化_total_duration_rate', '教育_total_duration_rate', '新闻阅读_total_duration_rate', '旅游住宿_total_duration_rate', '棋牌天地_total_duration_rate', '棋牌桌游_total_duration_rate', '模拟游戏_total_duration_rate', '汽车_total_duration_rate', '电子书籍_total_duration_rate', '益智棋牌_total_duration_rate', '社交通讯_total_duration_rate', '策略游戏_total_duration_rate', '经营策略_total_duration_rate', '网络游戏_total_duration_rate', '美食_total_duration_rate', '表盘个性_total_duration_rate', '角色扮演_total_duration_rate', '角色游戏_total_duration_rate', '购物比价_total_duration_rate', '运动健康_total_duration_rate', '金融理财_total_duration_rate', '主题个性_total_times_rate', '主题铃声_total_times_rate', '休闲娱乐_total_times_rate', '休闲游戏_total_times_rate', '休闲益智_total_times_rate', '体育射击_total_times_rate', '体育竞速_total_times_rate', '便捷生活_total_times_rate', '儿童_total_times_rate', '出行导航_total_times_rate', '动作冒险_total_times_rate', '动作射击_total_times_rate', '医疗健康_total_times_rate', '合作壁纸*_total_times_rate', '商务_total_times_rate', '图书阅读_total_times_rate', '学习办公_total_times_rate', '实用工具_total_times_rate', '影音娱乐_total_times_rate', '拍摄美化_total_times_rate', '教育_total_times_rate', '新闻阅读_total_times_rate', '旅游住宿_total_times_rate', '棋牌天地_total_times_rate', '棋牌桌游_total_times_rate', '模拟游戏_total_times_rate', '汽车_total_times_rate', '电子书籍_total_times_rate', '益智棋牌_total_times_rate', '社交通讯_total_times_rate', '策略游戏_total_times_rate', '经营策略_total_times_rate', '网络游戏_total_times_rate', '美食_total_times_rate', '表盘个性_total_times_rate', '角色扮演_total_times_rate', '角色游戏_total_times_rate', '购物比价_total_times_rate', '运动健康_total_times_rate', '金融理财_total_times_rate', '主题个性_used_days_rate', '主题铃声_used_days_rate', '休闲娱乐_used_days_rate', '休闲游戏_used_days_rate', '休闲益智_used_days_rate', '体育射击_used_days_rate', '体育竞速_used_days_rate', '便捷生活_used_days_rate', '儿童_used_days_rate', '出行导航_used_days_rate', '动作冒险_used_days_rate', '动作射击_used_days_rate', '医疗健康_used_days_rate', '合作壁纸*_used_days_rate', '商务_used_days_rate', '图书阅读_used_days_rate', '学习办公_used_days_rate', '实用工具_used_days_rate', '影音娱乐_used_days_rate', '拍摄美化_used_days_rate', '教育_used_days_rate', '新闻阅读_used_days_rate', '旅游住宿_used_days_rate', '棋牌天地_used_days_rate', '棋牌桌游_used_days_rate', '模拟游戏_used_days_rate', '汽车_used_days_rate', '电子书籍_used_days_rate', '益智棋牌_used_days_rate', '社交通讯_used_days_rate', '策略游戏_used_days_rate', '经营策略_used_days_rate', '网络游戏_used_days_rate', '美食_used_days_rate', '表盘个性_used_days_rate', '角色扮演_used_days_rate', '角色游戏_used_days_rate', '购物比价_used_days_rate', '运动健康_used_days_rate', '金融理财_used_days_rate']
feat_III = [ 'all_app_cnt', 'actived_app_cnt', 'other_app_cnt', 'actived_app_cnt_rate', 'other_app_cnt_rate']
feat_IV = [ 'weekday_0_total_duration', 'weekday_1_total_duration', 'weekday_2_total_duration', 'weekday_3_total_duration', 'weekday_4_total_duration', 'weekday_5_total_duration', 'weekday_6_total_duration']
feat_V = ['X_times_{}'.format(i) for i in range(100)]
feat_VI = ['X_duration_{}'.format(i) for i in range(100)]

X_app_I = ['actived_app_{}'.format(i) for i in range(32)]
X_app_II = ['actived_app_II_{}'.format(i) for i in range(16)]
X_app_III = ['actived_app_III_{}'.format(i) for i in range(8)]
X_app = X_app_I + X_app_II + X_app_III

X_usage_I = ['X_usage_I_{}'.format(i) for i in range(32)]
X_usage_II = ['X_usage_II_{}'.format(i) for i in range(16)]
X_usage_III = ['X_usage_III_{}'.format(i) for i in range(8)]
X_usage = X_usage_I + X_usage_II + X_usage_III

origin_num_feature = ['ramLeftCapacity','romLeftCapacity','city','prodName','color','ct','carrier','gender','ramCapacity','ramLeftRation','romCapacity','romLeftRation','fontSize',
                       'os','bootTimes','AFuncTimes','BFuncTimes','CFuncTimes','DFuncTimes','EFuncTimes','FFuncTimes','bootTimes_mean',
                      'fontSize_mean']


feature = origin_num_feature+feat_I+feat_II+feat_III+feat_IV+feat_V+feat_VI+X_app+X_usage+total_app_count_feat\
          +total_perday_feat+test_feat_I

print(len(feature))


735


In [None]:
import random
frac_axis1 = [0.7,0.75,0.8,0.85,0.9]
frac_axis0 = [0.9,0.85,0.8,0.75,0.7]
for index, i in enumerate(frac_axis1):
    j = frac_axis0[index]
    test_index = np.isnan(data.age_group)
    train_index = ~test_index
    train_x = data[train_index][feature] 
    train_y = data[train_index]['age_group']
    test_x  = data[test_index][feature]
    rand = random.randint(0,2019)
    train_x = train_x.sample(frac=i, replace=False, random_state=rand,axis=1)
    test_x  = test_x.sample(frac=i, replace=False, random_state=rand,axis=1)
    from sklearn.model_selection import train_test_split
    X_train, X_validation, y_train, y_validation = train_test_split(train_x, train_y, test_size=0.02, random_state=42)
    del train_x
    gc.collect()
    rand = random.randint(0,2019)
    X_train  = X_train.sample(frac=j, replace=False, random_state=rand,axis=0)
    y_train  = y_train.sample(frac=j, replace=False, random_state=rand,axis=0)
    train_pool = Pool(X_train, y_train)
    eval_pool = Pool(X_validation, y_validation)
    del X_train
    #del X_validation
    del y_train
    del y_validation
    gc.collect()
    model = CatBoostClassifier(iterations=300000,
                           learning_rate=0.01,
                           eval_metric='Accuracy',
                           use_best_model=True,
                           random_seed=2019,
                           logging_level='Verbose',
                           task_type='GPU',
                           devices='2',
                           early_stopping_rounds=5000,
                           loss_function='MultiClass',
                           depth=8,
                           #gpu_ram_part=0.3,
                           )
    model.fit(train_pool, eval_set=eval_pool, verbose=100) #0.6486
    pred_val = model.predict_proba(X_validation)
    pred_test = model.predict_proba(test_x)
    np.save("./out/proba_val_{}.npy".format(round(model.best_score_['validation']['Accuracy'],6)), pred_val)
    np.save("./out/proba_test_{}.npy".format(round(model.best_score_['validation']['Accuracy'],6)), pred_test)

0:	learn: 0.6352580	test: 0.6126866	best: 0.6126866 (0)	total: 156ms	remaining: 12h 57m 51s
100:	learn: 0.6576009	test: 0.6324627	best: 0.6325373 (99)	total: 13.1s	remaining: 10h 48m 11s
200:	learn: 0.6633871	test: 0.6364925	best: 0.6364925 (200)	total: 27.2s	remaining: 11h 16m 1s
300:	learn: 0.6669448	test: 0.6397512	best: 0.6397512 (300)	total: 41.3s	remaining: 11h 25m 37s
400:	learn: 0.6695654	test: 0.6415920	best: 0.6416418 (398)	total: 54.9s	remaining: 11h 24m 1s
500:	learn: 0.6715403	test: 0.6417910	best: 0.6419900 (476)	total: 1m 8s	remaining: 11h 18m 43s
600:	learn: 0.6733588	test: 0.6428109	best: 0.6428109 (600)	total: 1m 22s	remaining: 11h 21m 2s
700:	learn: 0.6747741	test: 0.6429851	best: 0.6430100 (696)	total: 1m 35s	remaining: 11h 21m 46s
800:	learn: 0.6759784	test: 0.6430846	best: 0.6436318 (774)	total: 1m 49s	remaining: 11h 22m 8s
900:	learn: 0.6770687	test: 0.6435572	best: 0.6437313 (882)	total: 2m 3s	remaining: 11h 22m 26s
1000:	learn: 0.6780903	test: 0.6437065	best: 0

8400:	learn: 0.7030003	test: 0.6455970	best: 0.6458706 (6254)	total: 18m 53s	remaining: 10h 55m 52s
8500:	learn: 0.7031966	test: 0.6455224	best: 0.6458706 (6254)	total: 19m 6s	remaining: 10h 55m 26s
8600:	learn: 0.7034183	test: 0.6455224	best: 0.6458706 (6254)	total: 19m 20s	remaining: 10h 55m 24s
8700:	learn: 0.7036197	test: 0.6454229	best: 0.6458706 (6254)	total: 19m 33s	remaining: 10h 54m 59s
8800:	learn: 0.7038272	test: 0.6457214	best: 0.6458706 (6254)	total: 19m 48s	remaining: 10h 55m 26s
8900:	learn: 0.7040247	test: 0.6457711	best: 0.6458706 (6254)	total: 20m 1s	remaining: 10h 54m 57s
9000:	learn: 0.7042249	test: 0.6456716	best: 0.6458706 (6254)	total: 20m 14s	remaining: 10h 54m 26s
9100:	learn: 0.7044189	test: 0.6458706	best: 0.6459204 (9098)	total: 20m 27s	remaining: 10h 53m 57s
9200:	learn: 0.7046265	test: 0.6457463	best: 0.6459453 (9120)	total: 20m 40s	remaining: 10h 53m 39s
9300:	learn: 0.7048223	test: 0.6458209	best: 0.6459453 (9120)	total: 20m 53s	remaining: 10h 53m 10s
94

16500:	learn: 0.7179996	test: 0.6457214	best: 0.6465920 (13193)	total: 36m 38s	remaining: 10h 29m 33s
16600:	learn: 0.7181744	test: 0.6459701	best: 0.6465920 (13193)	total: 36m 51s	remaining: 10h 29m 16s
16700:	learn: 0.7183549	test: 0.6458706	best: 0.6465920 (13193)	total: 37m 4s	remaining: 10h 28m 59s
16800:	learn: 0.7185039	test: 0.6459453	best: 0.6465920 (13193)	total: 37m 17s	remaining: 10h 28m 39s
16900:	learn: 0.7186663	test: 0.6458955	best: 0.6465920 (13193)	total: 37m 30s	remaining: 10h 28m 20s
17000:	learn: 0.7188310	test: 0.6457711	best: 0.6465920 (13193)	total: 37m 43s	remaining: 10h 28m
17100:	learn: 0.7189720	test: 0.6456965	best: 0.6465920 (13193)	total: 37m 56s	remaining: 10h 27m 43s
17200:	learn: 0.7191164	test: 0.6457214	best: 0.6465920 (13193)	total: 38m 9s	remaining: 10h 27m 23s
17300:	learn: 0.7193015	test: 0.6457960	best: 0.6465920 (13193)	total: 38m 22s	remaining: 10h 27m 9s
17400:	learn: 0.7194825	test: 0.6457463	best: 0.6465920 (13193)	total: 38m 35s	remaining:

300:	learn: 0.6673768	test: 0.6388060	best: 0.6390299 (296)	total: 41.5s	remaining: 11h 28m 17s
400:	learn: 0.6699449	test: 0.6406468	best: 0.6406965 (395)	total: 54.7s	remaining: 11h 21m 11s
500:	learn: 0.6719121	test: 0.6422637	best: 0.6422637 (500)	total: 1m 7s	remaining: 11h 12m 43s
600:	learn: 0.6736420	test: 0.6433333	best: 0.6433333 (589)	total: 1m 21s	remaining: 11h 13m 4s
700:	learn: 0.6752361	test: 0.6437065	best: 0.6438557 (692)	total: 1m 34s	remaining: 11h 13m 58s
800:	learn: 0.6764532	test: 0.6439552	best: 0.6440547 (791)	total: 1m 48s	remaining: 11h 16m 24s
900:	learn: 0.6775764	test: 0.6436318	best: 0.6441045 (844)	total: 2m 2s	remaining: 11h 16m 28s
1000:	learn: 0.6784198	test: 0.6436318	best: 0.6441045 (844)	total: 2m 15s	remaining: 11h 16m 1s
1100:	learn: 0.6792320	test: 0.6436567	best: 0.6441045 (844)	total: 2m 29s	remaining: 11h 16m 15s
1200:	learn: 0.6801166	test: 0.6436070	best: 0.6441045 (844)	total: 2m 43s	remaining: 11h 16m 20s
1300:	learn: 0.6807963	test: 0.64

In [41]:
pred_val = model.predict_proba(X_validation)
pred_test = model.predict_proba(test_x)
np.save("./out/proba_val_{}.npy".format(round(model.best_score_['validation']['Accuracy'],5)), pred_val)
np.save("./out/proba_test_{}.npy".format(round(model.best_score_['validation']['Accuracy'],5)), pred_test)