## Please input your directory for the top level folder
folder name : SUBMISSION MODEL

In [1]:
dir_ = 'C:/Users/jekim/OneDrive/바탕 화면/m5-forecasting-accuracy/' # input only here

#### setting other directory

In [2]:
raw_data_dir = dir_+'2. data/'
processed_data_dir = dir_+'2. data/processed/'
log_dir = dir_+'4. logs/'
model_dir = dir_+'5. models/'

In [3]:
####################################################################################
########################### 1-1. recursive model by store ##########################
####################################################################################

In [4]:
ver, KKK = 'priv', 0
STORES_IDS = ['CA_1','CA_2','CA_3','CA_4','TX_1','TX_2','TX_3','WI_1','WI_2','WI_3']
CLUSTERS_IDS = [0,1,2,3,4,5,6,7,8,9,10,11,12,13]

In [5]:
import numpy as np
import pandas as pd
import os, sys, gc, time, warnings, pickle, psutil, random

from multiprocessing import Pool

warnings.filterwarnings('ignore')

In [6]:
########################### Helpers
#################################################################################
## Seeder
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)

    
## Multiprocess Runs
def df_parallelize_run(func, t_split):
    num_cores = np.min([N_CORES,len(t_split)])
    pool = Pool(num_cores)
    df = pd.concat(pool.map(func, t_split), axis=1)
    pool.close()
    pool.join()
    return df

In [7]:
########################### Helper to load data by store ID
#################################################################################
# Read data
def get_data_by_cluster(cluster):
    
    # Read and contact basic feature
    df = pd.concat([pd.read_pickle(BASE),
                    pd.read_pickle(PRICE).iloc[:,2:],
                    pd.read_pickle(CALENDAR).iloc[:,2:]],
                    axis=1)
    
    df = df[df['d']>=START_TRAIN]
    
    df = df[df['tskm_10']==cluster]

    df2 = pd.read_pickle(MEAN_ENC)[mean_features]
    df2 = df2[df2.index.isin(df.index)]
    
    df3 = pd.read_pickle(LAGS).iloc[:,3:]
    df3 = df3[df3.index.isin(df.index)]
    
    df = pd.concat([df, df2], axis=1)
    del df2
    
    df = pd.concat([df, df3], axis=1)
    del df3

    features = [col for col in list(df) if col not in remove_features]
    df = df[['id','d',TARGET]+features]
    
    df = df.reset_index(drop=True)
    
    return df, features

# Recombine Test set after training
def get_base_test():
    base_test = pd.DataFrame()

    for cluster_id in CLUSTERS_IDS:
        temp_df = pd.read_pickle(processed_data_dir+'test_item_cluster14_'+str(cluster_id)+'.pkl')
        temp_df['cluster_id'] = cluster_id
        base_test = pd.concat([base_test, temp_df]).reset_index(drop=True)
    
    return base_test


########################### Helper to make dynamic rolling lags
#################################################################################
def make_lag(LAG_DAY):
    lag_df = base_test[['id','d',TARGET]]
    col_name = 'sales_lag_'+str(LAG_DAY)
    lag_df[col_name] = lag_df.groupby(['id'])[TARGET].transform(lambda x: x.shift(LAG_DAY)).astype(np.float16)
    return lag_df[[col_name]]


def make_lag_roll(LAG_DAY):
    shift_day = LAG_DAY[0]
    roll_wind = LAG_DAY[1]
    lag_df = base_test[['id','d',TARGET]]
    col_name = 'rolling_mean_tmp_'+str(shift_day)+'_'+str(roll_wind)
    lag_df[col_name] = lag_df.groupby(['id'])[TARGET].transform(lambda x: x.shift(shift_day).rolling(roll_wind).mean())
    return lag_df[[col_name]]

In [8]:
########################### Model params
#################################################################################
import lightgbm as lgb
lgb_params = {
                    'boosting_type': 'gbdt',
                    'objective': 'tweedie',
                    'tweedie_variance_power': 1.1,
                    'metric': 'rmse',
                    'subsample': 0.5,
                    'subsample_freq': 1,
                    'learning_rate': 0.015,
                    'num_leaves': 2**11-1,
                    'min_data_in_leaf': 2**12-1,
                    'feature_fraction': 0.5,
                    'max_bin': 100,
                    'n_estimators': 3000,
                    'boost_from_average': False,
                    'verbose': -1,
                } 


In [9]:
########################### Vars
#################################################################################
VER = 1                          
SEED = 42                        
seed_everything(SEED)            
lgb_params['seed'] = SEED        
N_CORES = psutil.cpu_count()     


#LIMITS and const
TARGET      = 'sales'            
START_TRAIN = 0                  
END_TRAIN   = 1941 - 28*KKK      
P_HORIZON   = 28                 
USE_AUX     = False             

remove_features = ['id','state_id','store_id','cat_id', 'dept_id', 'item_id',
                   'date','wm_yr_wk','d',TARGET]
mean_features   = [#'enc_cat_id_mean','enc_cat_id_std',
                   #'enc_dept_id_mean','enc_dept_id_std',
                   'enc_item_id_mean','enc_item_id_std'] 

ORIGINAL = raw_data_dir
BASE     = processed_data_dir+'grid_part_1.pkl'
PRICE    = processed_data_dir+'grid_part_2.pkl'
CALENDAR = processed_data_dir+'grid_part_3.pkl'
LAGS     = processed_data_dir+'lags_df_28.pkl'
MEAN_ENC = processed_data_dir+'mean_encoding_df.pkl'


#SPLITS for lags creation
SHIFT_DAY  = 28
N_LAGS     = 15
LAGS_SPLIT = [col for col in range(SHIFT_DAY,SHIFT_DAY+N_LAGS)]
ROLS_SPLIT = []
for i in [1,7,14]:
    for j in [7,14,30,60]:
        ROLS_SPLIT.append([i,j])

In [10]:
########################### Train Models
#################################################################################
for cluster_id in CLUSTERS_IDS:
    print('Train cluster', str(cluster_id))
    
    grid_df, features_columns = get_data_by_cluster(cluster_id)
    
    train_mask = grid_df['d']<=END_TRAIN
    valid_mask = train_mask&(grid_df['d']>(END_TRAIN-P_HORIZON))
    preds_mask = (grid_df['d']>(END_TRAIN-100)) & (grid_df['d'] <= END_TRAIN+P_HORIZON)
    
    train_data = lgb.Dataset(grid_df[train_mask][features_columns], 
                       label=grid_df[train_mask][TARGET])
    
    valid_data = lgb.Dataset(grid_df[valid_mask][features_columns], 
                       label=grid_df[valid_mask][TARGET])
    
    grid_df = grid_df[preds_mask].reset_index(drop=True)
    keep_cols = [col for col in list(grid_df) if '_tmp_' not in col]
    grid_df = grid_df[keep_cols]
    
    d_sales = grid_df[['d','sales']]
    substitute = d_sales['sales'].values
    substitute[(d_sales['d'] > END_TRAIN)] = np.nan
    grid_df['sales'] = substitute
    
    grid_df.to_pickle(processed_data_dir+'test_item_cluster14_'+str(cluster_id)+'.pkl')
    del grid_df, d_sales, substitute
    

    seed_everything(SEED)
    estimator = lgb.train(lgb_params,
                          train_data,
                          valid_sets = [valid_data],
                          verbose_eval = 100,
                          )
    
    display(pd.DataFrame({'name':estimator.feature_name(),
                          'imp':estimator.feature_importance()}).sort_values('imp',ascending=False).head(25))

    
    model_name = model_dir+'lgb_model_item_cluster14'+str(cluster_id)+'_v'+str(VER)+'.bin'
    pickle.dump(estimator, open(model_name, 'wb'))

    del train_data, valid_data, estimator
    gc.collect()
    
    MODEL_FEATURES = features_columns

Train cluster_ 0
[100]	valid_0's rmse: 1.20587
[200]	valid_0's rmse: 1.1912
[300]	valid_0's rmse: 1.18688
[400]	valid_0's rmse: 1.18412
[500]	valid_0's rmse: 1.18196
[600]	valid_0's rmse: 1.18014
[700]	valid_0's rmse: 1.17843
[800]	valid_0's rmse: 1.17689
[900]	valid_0's rmse: 1.17542
[1000]	valid_0's rmse: 1.17402
[1100]	valid_0's rmse: 1.17266
[1200]	valid_0's rmse: 1.17139
[1300]	valid_0's rmse: 1.17009
[1400]	valid_0's rmse: 1.16879
[1500]	valid_0's rmse: 1.16757
[1600]	valid_0's rmse: 1.16643
[1700]	valid_0's rmse: 1.16525
[1800]	valid_0's rmse: 1.16414
[1900]	valid_0's rmse: 1.16294
[2000]	valid_0's rmse: 1.16183
[2100]	valid_0's rmse: 1.16065
[2200]	valid_0's rmse: 1.15962
[2300]	valid_0's rmse: 1.15853
[2400]	valid_0's rmse: 1.15747
[2500]	valid_0's rmse: 1.15633
[2600]	valid_0's rmse: 1.15522
[2700]	valid_0's rmse: 1.15412
[2800]	valid_0's rmse: 1.15304
[2900]	valid_0's rmse: 1.15202
[3000]	valid_0's rmse: 1.15093


Unnamed: 0,name,imp
9,item_nunique,30809
53,rolling_std_180,29798
52,rolling_mean_180,29578
28,enc_item_id_std,26617
1,release,26531
27,enc_item_id_mean,26369
21,tm_w,26044
51,rolling_std_60,25004
20,tm_d,24228
50,rolling_mean_60,21886


Train cluster_ 1
[100]	valid_0's rmse: 6.63439
[200]	valid_0's rmse: 5.84097
[300]	valid_0's rmse: 5.81717
[400]	valid_0's rmse: 5.8012
[500]	valid_0's rmse: 5.78451
[600]	valid_0's rmse: 5.76553
[700]	valid_0's rmse: 5.75211
[800]	valid_0's rmse: 5.73903
[900]	valid_0's rmse: 5.73166
[1000]	valid_0's rmse: 5.72036
[1100]	valid_0's rmse: 5.70604
[1200]	valid_0's rmse: 5.70029
[1300]	valid_0's rmse: 5.69106
[1400]	valid_0's rmse: 5.68456
[1500]	valid_0's rmse: 5.67663
[1600]	valid_0's rmse: 5.66984
[1700]	valid_0's rmse: 5.66362
[1800]	valid_0's rmse: 5.65997
[1900]	valid_0's rmse: 5.65516
[2000]	valid_0's rmse: 5.6499
[2100]	valid_0's rmse: 5.64529
[2200]	valid_0's rmse: 5.64024
[2300]	valid_0's rmse: 5.63607
[2400]	valid_0's rmse: 5.63248
[2500]	valid_0's rmse: 5.62622
[2600]	valid_0's rmse: 5.62035
[2700]	valid_0's rmse: 5.61602
[2800]	valid_0's rmse: 5.61149
[2900]	valid_0's rmse: 5.60918
[3000]	valid_0's rmse: 5.60457


Unnamed: 0,name,imp
54,rolling_mean_tmp_1_7,1858
58,rolling_mean_tmp_7_7,1347
21,tm_w,1329
20,tm_d,1248
9,item_nunique,1184
25,tm_dw,955
52,rolling_mean_180,784
51,rolling_std_60,771
53,rolling_std_180,764
55,rolling_mean_tmp_1_14,757


Train cluster_ 2
[100]	valid_0's rmse: 11.4091
[200]	valid_0's rmse: 8.16169
[300]	valid_0's rmse: 7.98256
[400]	valid_0's rmse: 7.99111
[500]	valid_0's rmse: 8.0086
[600]	valid_0's rmse: 8.01278
[700]	valid_0's rmse: 8.02045
[800]	valid_0's rmse: 8.02724
[900]	valid_0's rmse: 8.03126
[1000]	valid_0's rmse: 8.03512
[1100]	valid_0's rmse: 8.03282
[1200]	valid_0's rmse: 8.03613
[1300]	valid_0's rmse: 8.03594
[1400]	valid_0's rmse: 8.03428
[1500]	valid_0's rmse: 8.03261
[1600]	valid_0's rmse: 8.03163
[1700]	valid_0's rmse: 8.03012
[1800]	valid_0's rmse: 8.02625
[1900]	valid_0's rmse: 8.02542
[2000]	valid_0's rmse: 8.02202
[2100]	valid_0's rmse: 8.02514
[2200]	valid_0's rmse: 8.02272
[2300]	valid_0's rmse: 8.02019
[2400]	valid_0's rmse: 8.01531
[2500]	valid_0's rmse: 8.01585
[2600]	valid_0's rmse: 8.01552
[2700]	valid_0's rmse: 8.0084
[2800]	valid_0's rmse: 8.0091
[2900]	valid_0's rmse: 8.00721
[3000]	valid_0's rmse: 8.00491


Unnamed: 0,name,imp
21,tm_w,812
25,tm_dw,796
20,tm_d,719
54,rolling_mean_tmp_1_7,710
11,price_momentum_m,673
52,rolling_mean_180,575
22,tm_m,533
53,rolling_std_180,516
29,sales_lag_28,478
9,item_nunique,383


Train cluster_ 3
[100]	valid_0's rmse: 9.65545
[200]	valid_0's rmse: 8.06413
[300]	valid_0's rmse: 7.90401
[400]	valid_0's rmse: 7.85147
[500]	valid_0's rmse: 7.81536
[600]	valid_0's rmse: 7.78861
[700]	valid_0's rmse: 7.77229
[800]	valid_0's rmse: 7.75845
[900]	valid_0's rmse: 7.74557
[1000]	valid_0's rmse: 7.73347
[1100]	valid_0's rmse: 7.7196
[1200]	valid_0's rmse: 7.70986
[1300]	valid_0's rmse: 7.69881
[1400]	valid_0's rmse: 7.69243
[1500]	valid_0's rmse: 7.68598
[1600]	valid_0's rmse: 7.68006
[1700]	valid_0's rmse: 7.67258
[1800]	valid_0's rmse: 7.66575
[1900]	valid_0's rmse: 7.65703
[2000]	valid_0's rmse: 7.64661
[2100]	valid_0's rmse: 7.64058
[2200]	valid_0's rmse: 7.63142
[2300]	valid_0's rmse: 7.62263
[2400]	valid_0's rmse: 7.61872
[2500]	valid_0's rmse: 7.61252
[2600]	valid_0's rmse: 7.60659
[2700]	valid_0's rmse: 7.59543
[2800]	valid_0's rmse: 7.59025
[2900]	valid_0's rmse: 7.58593
[3000]	valid_0's rmse: 7.57675


Unnamed: 0,name,imp
54,rolling_mean_tmp_1_7,4382
21,tm_w,3327
53,rolling_std_180,3264
20,tm_d,3155
9,item_nunique,2974
58,rolling_mean_tmp_7_7,2719
25,tm_dw,2549
51,rolling_std_60,2539
5,price_std,2428
6,price_mean,2303


Train cluster_ 4
[100]	valid_0's rmse: 2.11757
[200]	valid_0's rmse: 2.02547
[300]	valid_0's rmse: 2.01071
[400]	valid_0's rmse: 2.00426
[500]	valid_0's rmse: 1.99935
[600]	valid_0's rmse: 1.9957
[700]	valid_0's rmse: 1.99245
[800]	valid_0's rmse: 1.98923
[900]	valid_0's rmse: 1.98624
[1000]	valid_0's rmse: 1.98347
[1100]	valid_0's rmse: 1.98092
[1200]	valid_0's rmse: 1.97839
[1300]	valid_0's rmse: 1.97587
[1400]	valid_0's rmse: 1.97333
[1500]	valid_0's rmse: 1.9709
[1600]	valid_0's rmse: 1.96861
[1700]	valid_0's rmse: 1.96639
[1800]	valid_0's rmse: 1.96416
[1900]	valid_0's rmse: 1.96188
[2000]	valid_0's rmse: 1.95981
[2100]	valid_0's rmse: 1.95763
[2200]	valid_0's rmse: 1.95534
[2300]	valid_0's rmse: 1.9533
[2400]	valid_0's rmse: 1.95117
[2500]	valid_0's rmse: 1.94905
[2600]	valid_0's rmse: 1.94698
[2700]	valid_0's rmse: 1.94494
[2800]	valid_0's rmse: 1.94301
[2900]	valid_0's rmse: 1.94095
[3000]	valid_0's rmse: 1.93905


Unnamed: 0,name,imp
9,item_nunique,111642
53,rolling_std_180,109380
28,enc_item_id_std,105503
52,rolling_mean_180,103552
27,enc_item_id_mean,99846
5,price_std,95387
21,tm_w,94731
20,tm_d,89630
51,rolling_std_60,87588
4,price_min,77436


Train cluster_ 5
[100]	valid_0's rmse: 19.2199
[200]	valid_0's rmse: 13.226
[300]	valid_0's rmse: 13.1151
[400]	valid_0's rmse: 13.1619
[500]	valid_0's rmse: 13.1516
[600]	valid_0's rmse: 13.1239
[700]	valid_0's rmse: 13.0903
[800]	valid_0's rmse: 13.0562
[900]	valid_0's rmse: 13.0329
[1000]	valid_0's rmse: 13.0037
[1100]	valid_0's rmse: 12.9815
[1200]	valid_0's rmse: 12.9541
[1300]	valid_0's rmse: 12.9491
[1400]	valid_0's rmse: 12.9122
[1500]	valid_0's rmse: 12.9031
[1600]	valid_0's rmse: 12.881
[1700]	valid_0's rmse: 12.8604
[1800]	valid_0's rmse: 12.8579
[1900]	valid_0's rmse: 12.82
[2000]	valid_0's rmse: 12.8001
[2100]	valid_0's rmse: 12.7856
[2200]	valid_0's rmse: 12.7588
[2300]	valid_0's rmse: 12.7367
[2400]	valid_0's rmse: 12.7254
[2500]	valid_0's rmse: 12.714
[2600]	valid_0's rmse: 12.6871
[2700]	valid_0's rmse: 12.6641
[2800]	valid_0's rmse: 12.6429
[2900]	valid_0's rmse: 12.6378
[3000]	valid_0's rmse: 12.6197


Unnamed: 0,name,imp
9,item_nunique,648
46,rolling_mean_14,303
25,tm_dw,197
53,rolling_std_180,187
44,rolling_mean_7,156
52,rolling_mean_180,152
54,rolling_mean_tmp_1_7,118
31,sales_lag_30,107
33,sales_lag_32,97
40,sales_lag_39,97


Train cluster_ 6
[100]	valid_0's rmse: 3.94895
[200]	valid_0's rmse: 3.65255
[300]	valid_0's rmse: 3.61042
[400]	valid_0's rmse: 3.59335
[500]	valid_0's rmse: 3.58201
[600]	valid_0's rmse: 3.57349
[700]	valid_0's rmse: 3.56701
[800]	valid_0's rmse: 3.56049
[900]	valid_0's rmse: 3.55469
[1000]	valid_0's rmse: 3.54927
[1100]	valid_0's rmse: 3.54459
[1200]	valid_0's rmse: 3.54003
[1300]	valid_0's rmse: 3.53487
[1400]	valid_0's rmse: 3.53003
[1500]	valid_0's rmse: 3.52578
[1600]	valid_0's rmse: 3.52135
[1700]	valid_0's rmse: 3.51733
[1800]	valid_0's rmse: 3.51331
[1900]	valid_0's rmse: 3.50944
[2000]	valid_0's rmse: 3.50597
[2100]	valid_0's rmse: 3.50174
[2200]	valid_0's rmse: 3.4981
[2300]	valid_0's rmse: 3.49477
[2400]	valid_0's rmse: 3.49062
[2500]	valid_0's rmse: 3.4868
[2600]	valid_0's rmse: 3.48298
[2700]	valid_0's rmse: 3.4796
[2800]	valid_0's rmse: 3.47592
[2900]	valid_0's rmse: 3.47264
[3000]	valid_0's rmse: 3.46913


Unnamed: 0,name,imp
53,rolling_std_180,34551
54,rolling_mean_tmp_1_7,31954
9,item_nunique,30902
28,enc_item_id_std,30384
52,rolling_mean_180,30248
21,tm_w,30079
20,tm_d,28715
51,rolling_std_60,27683
5,price_std,27006
27,enc_item_id_mean,25708


Train cluster_ 7
[100]	valid_0's rmse: 32.4492
[200]	valid_0's rmse: 25.95
[300]	valid_0's rmse: 25.5281
[400]	valid_0's rmse: 25.3621
[500]	valid_0's rmse: 25.3091
[600]	valid_0's rmse: 25.2612
[700]	valid_0's rmse: 25.1956
[800]	valid_0's rmse: 25.1628
[900]	valid_0's rmse: 25.0608
[1000]	valid_0's rmse: 25.0669
[1100]	valid_0's rmse: 25.0403
[1200]	valid_0's rmse: 24.9867
[1300]	valid_0's rmse: 24.9779
[1400]	valid_0's rmse: 24.8307
[1500]	valid_0's rmse: 24.7657
[1600]	valid_0's rmse: 24.7235
[1700]	valid_0's rmse: 24.7075
[1800]	valid_0's rmse: 24.6956
[1900]	valid_0's rmse: 24.6339
[2000]	valid_0's rmse: 24.6024
[2100]	valid_0's rmse: 24.5107
[2200]	valid_0's rmse: 24.5153
[2300]	valid_0's rmse: 24.4343
[2400]	valid_0's rmse: 24.333
[2500]	valid_0's rmse: 24.3269
[2600]	valid_0's rmse: 24.2787
[2700]	valid_0's rmse: 24.2449
[2800]	valid_0's rmse: 24.2487
[2900]	valid_0's rmse: 24.1618
[3000]	valid_0's rmse: 24.199


Unnamed: 0,name,imp
52,rolling_mean_180,843
53,rolling_std_180,783
54,rolling_mean_tmp_1_7,178
46,rolling_mean_14,150
25,tm_dw,124
55,rolling_mean_tmp_1_14,116
5,price_std,95
44,rolling_mean_7,93
51,rolling_std_60,69
30,sales_lag_29,61


Train cluster_ 8
[100]	valid_0's rmse: 7.00383
[200]	valid_0's rmse: 6.51695
[300]	valid_0's rmse: 6.4587
[400]	valid_0's rmse: 6.436
[500]	valid_0's rmse: 6.42299
[600]	valid_0's rmse: 6.41532
[700]	valid_0's rmse: 6.40579
[800]	valid_0's rmse: 6.39831
[900]	valid_0's rmse: 6.38973
[1000]	valid_0's rmse: 6.38136
[1100]	valid_0's rmse: 6.37408
[1200]	valid_0's rmse: 6.36846
[1300]	valid_0's rmse: 6.36184
[1400]	valid_0's rmse: 6.35503
[1500]	valid_0's rmse: 6.34804
[1600]	valid_0's rmse: 6.34212
[1700]	valid_0's rmse: 6.33559
[1800]	valid_0's rmse: 6.32871
[1900]	valid_0's rmse: 6.323
[2000]	valid_0's rmse: 6.31644
[2100]	valid_0's rmse: 6.31146
[2200]	valid_0's rmse: 6.30712
[2300]	valid_0's rmse: 6.29841
[2400]	valid_0's rmse: 6.29392
[2500]	valid_0's rmse: 6.28951
[2600]	valid_0's rmse: 6.28257
[2700]	valid_0's rmse: 6.277
[2800]	valid_0's rmse: 6.27142
[2900]	valid_0's rmse: 6.26519
[3000]	valid_0's rmse: 6.25885


Unnamed: 0,name,imp
54,rolling_mean_tmp_1_7,8622
53,rolling_std_180,7681
51,rolling_std_60,6120
21,tm_w,5825
20,tm_d,5757
52,rolling_mean_180,5633
58,rolling_mean_tmp_7_7,5605
9,item_nunique,5512
28,enc_item_id_std,5091
49,rolling_std_30,5084


Train cluster_ 9
[100]	valid_0's rmse: 6.88222
[200]	valid_0's rmse: 6.11499
[300]	valid_0's rmse: 5.99407
[400]	valid_0's rmse: 5.94578
[500]	valid_0's rmse: 5.91732
[600]	valid_0's rmse: 5.89554
[700]	valid_0's rmse: 5.87493
[800]	valid_0's rmse: 5.85623
[900]	valid_0's rmse: 5.83987
[1000]	valid_0's rmse: 5.82565
[1100]	valid_0's rmse: 5.81395
[1200]	valid_0's rmse: 5.80028
[1300]	valid_0's rmse: 5.78816
[1400]	valid_0's rmse: 5.77944
[1500]	valid_0's rmse: 5.76875
[1600]	valid_0's rmse: 5.75894
[1700]	valid_0's rmse: 5.75203
[1800]	valid_0's rmse: 5.74413
[1900]	valid_0's rmse: 5.73698
[2000]	valid_0's rmse: 5.72787
[2100]	valid_0's rmse: 5.72038
[2200]	valid_0's rmse: 5.7126
[2300]	valid_0's rmse: 5.70673
[2400]	valid_0's rmse: 5.69724
[2500]	valid_0's rmse: 5.69025
[2600]	valid_0's rmse: 5.68581
[2700]	valid_0's rmse: 5.679
[2800]	valid_0's rmse: 5.67275
[2900]	valid_0's rmse: 5.66639
[3000]	valid_0's rmse: 5.65969


Unnamed: 0,name,imp
54,rolling_mean_tmp_1_7,6474
21,tm_w,6207
53,rolling_std_180,5318
20,tm_d,5145
58,rolling_mean_tmp_7_7,4866
51,rolling_std_60,4233
9,item_nunique,4117
52,rolling_mean_180,4077
55,rolling_mean_tmp_1_14,3907
5,price_std,3665


Train cluster_ 10
[100]	valid_0's rmse: 0.911823
[200]	valid_0's rmse: 0.901847
[300]	valid_0's rmse: 0.8997
[400]	valid_0's rmse: 0.898054
[500]	valid_0's rmse: 0.89656
[600]	valid_0's rmse: 0.895196
[700]	valid_0's rmse: 0.893935
[800]	valid_0's rmse: 0.892752
[900]	valid_0's rmse: 0.891601
[1000]	valid_0's rmse: 0.890578
[1100]	valid_0's rmse: 0.889521
[1200]	valid_0's rmse: 0.888518
[1300]	valid_0's rmse: 0.887496
[1400]	valid_0's rmse: 0.886542
[1500]	valid_0's rmse: 0.885621
[1600]	valid_0's rmse: 0.884665
[1700]	valid_0's rmse: 0.88375
[1800]	valid_0's rmse: 0.882843
[1900]	valid_0's rmse: 0.881939
[2000]	valid_0's rmse: 0.881074
[2100]	valid_0's rmse: 0.880167
[2200]	valid_0's rmse: 0.879294
[2300]	valid_0's rmse: 0.878433
[2400]	valid_0's rmse: 0.877549
[2500]	valid_0's rmse: 0.876645
[2600]	valid_0's rmse: 0.875783
[2700]	valid_0's rmse: 0.874973
[2800]	valid_0's rmse: 0.874137
[2900]	valid_0's rmse: 0.87325
[3000]	valid_0's rmse: 0.8724


Unnamed: 0,name,imp
53,rolling_std_180,101617
52,rolling_mean_180,99475
9,item_nunique,98675
28,enc_item_id_std,94109
27,enc_item_id_mean,91796
5,price_std,88166
21,tm_w,86653
51,rolling_std_60,81864
20,tm_d,79807
4,price_min,73885


Train cluster_ 11
[100]	valid_0's rmse: 0.640753
[200]	valid_0's rmse: 0.624797
[300]	valid_0's rmse: 0.623618
[400]	valid_0's rmse: 0.622767
[500]	valid_0's rmse: 0.621888
[600]	valid_0's rmse: 0.621065
[700]	valid_0's rmse: 0.620281
[800]	valid_0's rmse: 0.619539
[900]	valid_0's rmse: 0.61882
[1000]	valid_0's rmse: 0.618124
[1100]	valid_0's rmse: 0.617455
[1200]	valid_0's rmse: 0.616776
[1300]	valid_0's rmse: 0.616114
[1400]	valid_0's rmse: 0.615459
[1500]	valid_0's rmse: 0.614815
[1600]	valid_0's rmse: 0.61416
[1700]	valid_0's rmse: 0.61354
[1800]	valid_0's rmse: 0.612901
[1900]	valid_0's rmse: 0.612256
[2000]	valid_0's rmse: 0.611634
[2100]	valid_0's rmse: 0.611012
[2200]	valid_0's rmse: 0.610386
[2300]	valid_0's rmse: 0.609779
[2400]	valid_0's rmse: 0.609149
[2500]	valid_0's rmse: 0.60855
[2600]	valid_0's rmse: 0.607919
[2700]	valid_0's rmse: 0.607316
[2800]	valid_0's rmse: 0.606697
[2900]	valid_0's rmse: 0.606087
[3000]	valid_0's rmse: 0.605488


Unnamed: 0,name,imp
53,rolling_std_180,151932
52,rolling_mean_180,145513
9,item_nunique,145161
28,enc_item_id_std,135153
27,enc_item_id_mean,133405
21,tm_w,133059
1,release,127549
51,rolling_std_60,119118
20,tm_d,118282
5,price_std,104890


Train cluster_ 12
[100]	valid_0's rmse: 1.5694
[200]	valid_0's rmse: 1.53042
[300]	valid_0's rmse: 1.51816
[400]	valid_0's rmse: 1.51196
[500]	valid_0's rmse: 1.50776
[600]	valid_0's rmse: 1.50407
[700]	valid_0's rmse: 1.50095
[800]	valid_0's rmse: 1.4981
[900]	valid_0's rmse: 1.49571
[1000]	valid_0's rmse: 1.49352
[1100]	valid_0's rmse: 1.49139
[1200]	valid_0's rmse: 1.48948
[1300]	valid_0's rmse: 1.4876
[1400]	valid_0's rmse: 1.48578
[1500]	valid_0's rmse: 1.48401
[1600]	valid_0's rmse: 1.4824
[1700]	valid_0's rmse: 1.48081
[1800]	valid_0's rmse: 1.47923
[1900]	valid_0's rmse: 1.47779
[2000]	valid_0's rmse: 1.4763
[2100]	valid_0's rmse: 1.47492
[2200]	valid_0's rmse: 1.47353
[2300]	valid_0's rmse: 1.47198
[2400]	valid_0's rmse: 1.47062
[2500]	valid_0's rmse: 1.46921
[2600]	valid_0's rmse: 1.46785
[2700]	valid_0's rmse: 1.46638
[2800]	valid_0's rmse: 1.46507
[2900]	valid_0's rmse: 1.46375
[3000]	valid_0's rmse: 1.46252


Unnamed: 0,name,imp
9,item_nunique,19098
53,rolling_std_180,19020
52,rolling_mean_180,18325
28,enc_item_id_std,17223
1,release,16562
21,tm_w,16488
27,enc_item_id_mean,15335
20,tm_d,15228
51,rolling_std_60,15227
54,rolling_mean_tmp_1_7,14397


Train cluster_ 13
[100]	valid_0's rmse: 1.23263
[200]	valid_0's rmse: 1.22213
[300]	valid_0's rmse: 1.21906
[400]	valid_0's rmse: 1.21663
[500]	valid_0's rmse: 1.21445
[600]	valid_0's rmse: 1.21258
[700]	valid_0's rmse: 1.21087
[800]	valid_0's rmse: 1.20936
[900]	valid_0's rmse: 1.20787
[1000]	valid_0's rmse: 1.20644
[1100]	valid_0's rmse: 1.20508
[1200]	valid_0's rmse: 1.2037
[1300]	valid_0's rmse: 1.20233
[1400]	valid_0's rmse: 1.20103
[1500]	valid_0's rmse: 1.19978
[1600]	valid_0's rmse: 1.19857
[1700]	valid_0's rmse: 1.19732
[1800]	valid_0's rmse: 1.19611
[1900]	valid_0's rmse: 1.19486
[2000]	valid_0's rmse: 1.19363
[2100]	valid_0's rmse: 1.19241
[2200]	valid_0's rmse: 1.19121
[2300]	valid_0's rmse: 1.19002
[2400]	valid_0's rmse: 1.18883
[2500]	valid_0's rmse: 1.1877
[2600]	valid_0's rmse: 1.18653
[2700]	valid_0's rmse: 1.1853
[2800]	valid_0's rmse: 1.18414
[2900]	valid_0's rmse: 1.18297
[3000]	valid_0's rmse: 1.18189


Unnamed: 0,name,imp
53,rolling_std_180,69170
9,item_nunique,66761
52,rolling_mean_180,66323
28,enc_item_id_std,62673
27,enc_item_id_mean,60254
5,price_std,60098
21,tm_w,58817
20,tm_d,55703
51,rolling_std_60,54347
11,price_momentum_m,50969
