# Lgbm CV

In [None]:
import pandas as pd 
import lightgbm as lgb
import numpy as np
from sklearn.metrics import mean_squared_error
import gc

In [None]:
with pd.HDFStore('../data/feat/data.h5') as store:
    print(store.keys())
    X_train = store['X_train']
    X_cv = store['X_cv']
    y_train = store['y_train']
    y_cv = store['y_cv']
    X_test = store['X_test']

In [None]:
y_train = y_train.clip(0,20)
y_cv = y_cv.clip(0,20)

In [None]:
params = {
    'bossting_type':'gbdt',
    'objective': 'regression',
    'learning_rate' : 0.1,
    'metric': 'rmse',    
    'max_depth' : 12,
    'num_leaves': 64,
    'feature_fraction': 0.6203,
    'bagging_fraction' : 0.9567,
    'reg_lambda' : 0.3886,
    'gamma' : 0.3962
    
}

In [None]:
lgb_train = lgb.Dataset(X_train,y_train)
lgb_cv = lgb.Dataset(X_cv,y_cv,reference=lgb_train)
gbm = lgb.train(params,lgb_train,valid_sets=lgb_cv,early_stopping_rounds = 5,num_boost_round=150)

In [13]:
# specify your configurations as a dict
# params = {
#     'task': 'train',
#     'boosting_type': 'gbdt',
#     'objective': 'regression',
#     'metric': {'l2', 'rmse'},
#     'num_leaves': 31,
#     'learning_rate': 0.05,
#     'feature_fraction': 0.9,
#     'bagging_fraction': 0.8,
#     'bagging_freq': 5,
#     'verbose': 0
# }

## Novice use of CV

In [10]:
lgb_train = lgb.Dataset(X_train,y_train)
# lgb_cv = lgb.Dataset(X_cv,y_cv,reference=lgb_train)
gbm_cv = lgb.cv(params,lgb_train,early_stopping_rounds=5,verbose_eval=True)
# lgb_cv = lgb.Dataset(X_cv,y_cv,reference=lgb_train)
# lgb.cv(params,lgb_train,nfold=5,early_stopping_rounds=5)

[1]	cv_agg's rmse: 1.03234 + 0.00143198
[2]	cv_agg's rmse: 0.969869 + 0.00217628
[3]	cv_agg's rmse: 0.94568 + 0.00212529
[4]	cv_agg's rmse: 0.932161 + 0.00219261
[5]	cv_agg's rmse: 0.92496 + 0.00248139
[6]	cv_agg's rmse: 0.91999 + 0.00288447
[7]	cv_agg's rmse: 0.91606 + 0.00292988
[8]	cv_agg's rmse: 0.912338 + 0.00339231
[9]	cv_agg's rmse: 0.909737 + 0.00295088
[10]	cv_agg's rmse: 0.907743 + 0.00264986
[11]	cv_agg's rmse: 0.905281 + 0.00271093
[12]	cv_agg's rmse: 0.903528 + 0.00251146
[13]	cv_agg's rmse: 0.900599 + 0.00274028
[14]	cv_agg's rmse: 0.898606 + 0.00281831
[15]	cv_agg's rmse: 0.896049 + 0.00251786
[16]	cv_agg's rmse: 0.894117 + 0.00200885
[17]	cv_agg's rmse: 0.891856 + 0.00192826
[18]	cv_agg's rmse: 0.889589 + 0.00151304
[19]	cv_agg's rmse: 0.886479 + 0.00324279
[20]	cv_agg's rmse: 0.885476 + 0.00329226
[21]	cv_agg's rmse: 0.884688 + 0.00351945
[22]	cv_agg's rmse: 0.884 + 0.0033044
[23]	cv_agg's rmse: 0.882579 + 0.0033812
[24]	cv_agg's rmse: 0.882037 + 0.0031244
[25]	cv_agg'

In [12]:
gc.collect()

0

In [13]:
gbm_cv

{'rmse-mean': [1.0323381721608214,
  0.96986926719416322,
  0.94568036585486515,
  0.93216103030699848,
  0.92496029162308024,
  0.91999034231293031,
  0.91606042745827043,
  0.91233804952695652,
  0.90973680437232818,
  0.90774318265118903,
  0.90528102143619105,
  0.90352834988947561,
  0.90059925149454811,
  0.89860582904161357,
  0.89604894591197615,
  0.89411724656026281,
  0.89185592138487579,
  0.88958898904249417,
  0.88647949168224061,
  0.88547550760346749,
  0.88468781195352442,
  0.88399973334494741,
  0.88257942989701699,
  0.88203680904850668,
  0.88075385169054576,
  0.87974546278586607,
  0.87861323981317929,
  0.87763999667357484,
  0.87685597770751955,
  0.87574756411099808,
  0.87487981170824691,
  0.87450659036740785,
  0.87366356665092071,
  0.87326665483544252,
  0.87218910670782113,
  0.87071539408735588,
  0.86994109757767146,
  0.86955280108706978,
  0.86906921304653584,
  0.86837614740944091,
  0.86772251466890249,
  0.86717486832851731,
  0.86678444189720238,

In [15]:
gbm_cv['rmse-mean'][-1]

0.84225748317093974

In [4]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import mean_squared_error

In [35]:
def lgbm_objective(params):
    ## stolen from :https://github.com/hyperopt/hyperopt/issues/357
    lgb_train = lgb.Dataset(X_train,y_train)
#     lgb_cv = lgb.Dataset(X_cv,y_cv,reference=lgb_train)
    params_set = {
        'bossting_type':'gbdt',
        'objective': 'regression',
        'metric': 'rmse',
        'learning_rate' : 0.2,
        'max_depth' : int(params['max_depth']),
        'num_leaves': int(params['num_leaves']),
        'feature_fraction': '{:.3f}'.format(params['feature_fraction']),
#         'bagging_fraction' : '{:.3f}'.format(params['bagging_fraction']),
        'reg_lambda' : '{:.3f}'.format(params['reg_lambda']),
        'gamma' : '{:.3f}'.format(params['gamma'])        
    }
    cv = lgb.cv(params_set,
                lgb_train,
                early_stopping_rounds=5,
                verbose_eval=True)

    print('params:{}'.format(params))
    rmse_best_mean = cv['rmse-mean'][-1]
#     rmse_best_std = cv['rmse-std'][-1]
    print("rmse_mean_best:{:.5f}".format(rmse_best_mean))
    return {'loss':rmse_best_mean, 'status': STATUS_OK }

In [36]:
lgbm_space = { 
    'max_depth' : hp.quniform("max_depth", 4, 16, 1),
    'num_leaves': hp.quniform('num_leaves', 8, 128, 2),
    'feature_fraction': hp.uniform('feature_fraction', 0.3, 1.0),
#     'bagging_fraction': hp.uniform ('bagging_fraction', 0.7, 1),
    'reg_lambda': hp.uniform('reg_lambda',0,1),
    'gamma' : hp.uniform('gamma', 0.1,0.5),
}

trials = Trials()
lgbm_best = fmin(fn=lgbm_objective,
            space=lgbm_space,
            algo=tpe.suggest,
            trials = trials,
            max_evals=10)

[1]	cv_agg's rmse: 1.13876 + 0.000676979
[2]	cv_agg's rmse: 1.08564 + 0.00115226
[3]	cv_agg's rmse: 1.04814 + 0.00157905
[4]	cv_agg's rmse: 1.01471 + 0.00176372
[5]	cv_agg's rmse: 0.991152 + 0.00189205
[6]	cv_agg's rmse: 0.976679 + 0.00197484
[7]	cv_agg's rmse: 0.963949 + 0.0020115
[8]	cv_agg's rmse: 0.956396 + 0.00203034
[9]	cv_agg's rmse: 0.949293 + 0.00221109
[10]	cv_agg's rmse: 0.942661 + 0.00234467
[11]	cv_agg's rmse: 0.938496 + 0.00258566
[12]	cv_agg's rmse: 0.93519 + 0.00252524
[13]	cv_agg's rmse: 0.932616 + 0.00238383
[14]	cv_agg's rmse: 0.928105 + 0.00221575
[15]	cv_agg's rmse: 0.925668 + 0.0022476
[16]	cv_agg's rmse: 0.922782 + 0.00214559
[17]	cv_agg's rmse: 0.919523 + 0.00216259
[18]	cv_agg's rmse: 0.917129 + 0.00214855
[19]	cv_agg's rmse: 0.914898 + 0.00243131
[20]	cv_agg's rmse: 0.91365 + 0.00228592
[21]	cv_agg's rmse: 0.912266 + 0.00275061
[22]	cv_agg's rmse: 0.911001 + 0.00285008
[23]	cv_agg's rmse: 0.909703 + 0.00291307
[24]	cv_agg's rmse: 0.908728 + 0.00287276
[25]	cv_

[94]	cv_agg's rmse: 0.849143 + 0.00207078
[95]	cv_agg's rmse: 0.849016 + 0.00201142
[96]	cv_agg's rmse: 0.848733 + 0.0022129
[97]	cv_agg's rmse: 0.848509 + 0.00210078
[98]	cv_agg's rmse: 0.847845 + 0.00213655
[99]	cv_agg's rmse: 0.847366 + 0.00229821
[100]	cv_agg's rmse: 0.84709 + 0.00224333
params:{'feature_fraction': 0.8978024800556195, 'gamma': 0.4421742402175145, 'max_depth': 10.0, 'num_leaves': 120.0, 'reg_lambda': 0.2741790624991707}
rmse_mean_best:0.84709
[1]	cv_agg's rmse: 1.14015 + 0.000603661
[2]	cv_agg's rmse: 1.0844 + 0.00101019
[3]	cv_agg's rmse: 1.04511 + 0.00141809
[4]	cv_agg's rmse: 1.02052 + 0.00145112
[5]	cv_agg's rmse: 0.994775 + 0.00170994
[6]	cv_agg's rmse: 0.981066 + 0.00182888
[7]	cv_agg's rmse: 0.963231 + 0.00197232
[8]	cv_agg's rmse: 0.956099 + 0.0021206
[9]	cv_agg's rmse: 0.94569 + 0.00214409
[10]	cv_agg's rmse: 0.941714 + 0.00221676
[11]	cv_agg's rmse: 0.935787 + 0.0022997
[12]	cv_agg's rmse: 0.931413 + 0.00221757
[13]	cv_agg's rmse: 0.928059 + 0.00227857
[14

KeyboardInterrupt: 

In [14]:
lgbm_best['num_leaves'] = int(lgbm_best['num_leaves'])
lgbm_best['max_depth'] = int(lgbm_best['max_depth'])

In [15]:
lgbm_best

{'bagging_fraction': 0.87143992494405,
 'feature_fraction': 0.9515360120106497,
 'gamma': 0.49541466113602695,
 'max_depth': 16,
 'num_leaves': 110,
 'reg_lambda': 0.1649307254568939}

In [16]:
trials.best_trial

{'book_time': datetime.datetime(2018, 2, 13, 7, 11, 32, 9000),
 'exp_key': None,
 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'idxs': {'bagging_fraction': [95],
   'feature_fraction': [95],
   'gamma': [95],
   'max_depth': [95],
   'num_leaves': [95],
   'reg_lambda': [95]},
  'tid': 95,
  'vals': {'bagging_fraction': [0.87143992494405],
   'feature_fraction': [0.9515360120106497],
   'gamma': [0.49541466113602695],
   'max_depth': [16.0],
   'num_leaves': [110.0],
   'reg_lambda': [0.1649307254568939]},
  'workdir': None},
 'owner': None,
 'refresh_time': datetime.datetime(2018, 2, 13, 7, 14, 18, 477000),
 'result': {'loss': 0.8149392926296526, 'status': 'ok'},
 'spec': None,
 'state': 2,
 'tid': 95,
 'version': 0}

# Time series CV

In [39]:
import numpy as np 

In [4]:
from sklearn.model_selection import TimeSeriesSplit
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4])
tscv = TimeSeriesSplit(n_splits=3)
print(tscv)  

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]

TimeSeriesSplit(max_train_size=None, n_splits=3)
TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]


In [29]:
temp_df = X_train.head(1000)
tscv = TimeSeriesSplit(n_splits=5)

In [30]:
for train_index, test_index in tscv.split(temp_df,groups=temp_df.date_block_num):
    print('train shape:',temp_df.iloc[train_index].shape)
#     print('test shape:',temp_df.iloc[test_index].shape)

train shape: (170, 54)
train shape: (336, 54)
train shape: (502, 54)
train shape: (668, 54)
train shape: (834, 54)


In [52]:
# feature_name = list(X_train.columns)
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'regression',    
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5
#     'verbose': 0
#     'feature_name' : feature_name
}

In [88]:

# lgb_train = lgb.Dataset(X_train,y_train)
def train_lgbm(X_train,y_train,params,cv_splits=5):
    tscv = TimeSeriesSplit(n_splits = cv_splits)
    lgbm = lgb.LGBMRegressor(**params)
    model_eval = []
    ii = 0
    for train_idx, test_idx in tscv.split(X_train):
        ii += 1
        lgbm.fit(X_train.iloc[train_idx], y_train.iloc[train_idx],                
                 eval_metric='l2',eval_set=[(X_train.iloc[test_idx],y_train.iloc[test_idx])],verbose=False)
        print('{} folds, loss:{:.4f}'.format(ii, lgbm.best_score_['valid_0']['l2']))
        model_eval.append(lgbm.best_score_['valid_0']['l2'])
    loss_mean, loss_std = np.mean(model_eval), np.std(model_eval)
    
    print('mean:{:.4f} + {:.4f}'.format(loss_mean,loss_std))
#     print('====================')
    return model_eval

In [89]:
model_eval = train_lgbm(X_train,y_train,params)

1 folds, loss:0.8325
2 folds, loss:1.0198
3 folds, loss:0.6495
4 folds, loss:0.9993
5 folds, loss:0.6753
mean:0.8353 + 0.1556


In [57]:
model_eval

[0.83252872914602716,
 1.0197854446450194,
 0.64945661679877431,
 0.9993193574787691,
 0.67527629594972116]

# Hyperopt

In [61]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.metrics import mean_squared_error

In [None]:
import pprintint

In [90]:
def lgbm_objective(params):
    ## stolen from :https://github.com/hyperopt/hyperopt/issues/357    
    params_set = {
        'bossting_type':'gbdt',
        'objective': 'regression',
        'metric': 'rmse',
        'learning_rate' : 0.2,         
        'max_depth' : int(params['max_depth']),
        'num_leaves': int(params['num_leaves']),
        'feature_fraction': '{:.3f}'.format(params['feature_fraction']),
#         'bagging_fraction' : '{:.3f}'.format(params['bagging_fraction']),
        'reg_lambda' : '{:.3f}'.format(params['reg_lambda']),
        'gamma' : '{:.3f}'.format(params['gamma'])        
    }
    
    evals = train_lgbm(X_train,y_train,params_set,cv_splits=5) # time series cv l2 loss 
    
    print('params:{}'.format(params))
    print('====================')
    rmse_mean = np.mean(evals)
    std = np.std(evals)    
    return {'loss':rmse_mean, 'status': STATUS_OK }

In [91]:
lgbm_space = { 
    'max_depth' : hp.quniform("max_depth", 4, 16, 1),
    'num_leaves': hp.quniform('num_leaves', 8, 128, 2),
    'feature_fraction': hp.uniform('feature_fraction', 0.3, 1.0),
#     'bagging_fraction': hp.uniform ('bagging_fraction', 0.7, 1),
    'reg_lambda': hp.uniform('reg_lambda',0,1),
    'gamma' : hp.uniform('gamma', 0.1,0.5)
}

trials = Trials()
lgbm_best = fmin(fn=lgbm_objective,
            space=lgbm_space,
            algo=tpe.suggest,
            trials = trials,
            max_evals=100)

1 folds, loss:0.8175
2 folds, loss:1.0248
3 folds, loss:0.6379
4 folds, loss:1.0050
5 folds, loss:0.6633
mean:0.8297 + 0.1633
params:{'feature_fraction': 0.8699990722356254, 'gamma': 0.22151693358752042, 'max_depth': 7.0, 'num_leaves': 48.0, 'reg_lambda': 0.9105037577316176}
1 folds, loss:0.8234
2 folds, loss:1.0235
3 folds, loss:0.6416
4 folds, loss:0.9985
5 folds, loss:0.6655
mean:0.8305 + 0.1603
params:{'feature_fraction': 0.4904258447845349, 'gamma': 0.3400007853551891, 'max_depth': 6.0, 'num_leaves': 110.0, 'reg_lambda': 0.5789469135590799}
1 folds, loss:0.8180
2 folds, loss:1.0155
3 folds, loss:0.6375
4 folds, loss:0.9929
5 folds, loss:0.6607
mean:0.8249 + 0.1591
params:{'feature_fraction': 0.8316086619931005, 'gamma': 0.4010901097955395, 'max_depth': 7.0, 'num_leaves': 102.0, 'reg_lambda': 0.41849126195620734}
1 folds, loss:0.8382
2 folds, loss:1.0169
3 folds, loss:0.6417
4 folds, loss:0.9907
5 folds, loss:0.6608
mean:0.8297 + 0.1581
params:{'feature_fraction': 0.963283379777262

2 folds, loss:1.0303
3 folds, loss:0.6401
4 folds, loss:0.9955
5 folds, loss:0.6703
mean:0.8313 + 0.1607
params:{'feature_fraction': 0.8789156750134186, 'gamma': 0.4576720076877544, 'max_depth': 6.0, 'num_leaves': 58.0, 'reg_lambda': 0.4710828735730814}
1 folds, loss:0.8201
2 folds, loss:1.0212
3 folds, loss:0.6437
4 folds, loss:0.9999
5 folds, loss:0.6651
mean:0.8300 + 0.1596
params:{'feature_fraction': 0.9819168881328044, 'gamma': 0.36299824444784956, 'max_depth': 8.0, 'num_leaves': 42.0, 'reg_lambda': 0.2837953157818128}
1 folds, loss:0.8333
2 folds, loss:1.0214
3 folds, loss:0.6418
4 folds, loss:1.0065
5 folds, loss:0.6642
mean:0.8335 + 0.1616
params:{'feature_fraction': 0.6732018390114148, 'gamma': 0.3967449115682945, 'max_depth': 10.0, 'num_leaves': 72.0, 'reg_lambda': 0.0023809007427819107}
1 folds, loss:0.8237
2 folds, loss:1.0270
3 folds, loss:0.6486
4 folds, loss:1.0057
5 folds, loss:0.6728
mean:0.8356 + 0.1595
params:{'feature_fraction': 0.8059873350339093, 'gamma': 0.482704

2 folds, loss:1.0139
3 folds, loss:0.6442
4 folds, loss:0.9842
5 folds, loss:0.6620
mean:0.8252 + 0.1551
params:{'feature_fraction': 0.5702107711802988, 'gamma': 0.1977217187134886, 'max_depth': 14.0, 'num_leaves': 30.0, 'reg_lambda': 0.02937906663770139}
1 folds, loss:0.8296
2 folds, loss:1.0143
3 folds, loss:0.6444
4 folds, loss:0.9814
5 folds, loss:0.6572
mean:0.8254 + 0.1556
params:{'feature_fraction': 0.5119207507861615, 'gamma': 0.49807953564281005, 'max_depth': 11.0, 'num_leaves': 96.0, 'reg_lambda': 0.1126619327305996}
1 folds, loss:0.8399
2 folds, loss:1.0281
3 folds, loss:0.6458
4 folds, loss:0.9970
5 folds, loss:0.6593
mean:0.8340 + 0.1614
params:{'feature_fraction': 0.305393418797462, 'gamma': 0.14338348125776423, 'max_depth': 14.0, 'num_leaves': 76.0, 'reg_lambda': 0.18276435742637714}
1 folds, loss:0.8136
2 folds, loss:1.0056
3 folds, loss:0.6479
4 folds, loss:0.9798
5 folds, loss:0.6582
mean:0.8210 + 0.1522
params:{'feature_fraction': 0.5018619206441843, 'gamma': 0.10443

2 folds, loss:1.0216
3 folds, loss:0.6468
4 folds, loss:0.9866
5 folds, loss:0.6632
mean:0.8269 + 0.1567
params:{'feature_fraction': 0.5561435644754202, 'gamma': 0.2725608152076332, 'max_depth': 9.0, 'num_leaves': 98.0, 'reg_lambda': 0.1594656456659238}
1 folds, loss:0.8255
2 folds, loss:1.0075
3 folds, loss:0.6462
4 folds, loss:0.9832
5 folds, loss:0.6536
mean:0.8232 + 0.1547
params:{'feature_fraction': 0.6779731555891023, 'gamma': 0.1954428732598678, 'max_depth': 15.0, 'num_leaves': 80.0, 'reg_lambda': 0.5677892623033521}
1 folds, loss:0.8259
2 folds, loss:1.0234
3 folds, loss:0.6391
4 folds, loss:0.9757
5 folds, loss:0.6610
mean:0.8250 + 0.1572
params:{'feature_fraction': 0.708540602187811, 'gamma': 0.29923849927385104, 'max_depth': 12.0, 'num_leaves': 52.0, 'reg_lambda': 0.34523848260998247}
1 folds, loss:0.8293
2 folds, loss:1.0263
3 folds, loss:0.6416
4 folds, loss:0.9937
5 folds, loss:0.6668
mean:0.8315 + 0.1597
params:{'feature_fraction': 0.4565244202752325, 'gamma': 0.38543726

In [92]:
trials.best_trial

{'book_time': datetime.datetime(2018, 2, 13, 12, 51, 25, 302000),
 'exp_key': None,
 'misc': {'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'idxs': {'feature_fraction': [55],
   'gamma': [55],
   'max_depth': [55],
   'num_leaves': [55],
   'reg_lambda': [55]},
  'tid': 55,
  'vals': {'feature_fraction': [0.5768298970544862],
   'gamma': [0.4959732457252676],
   'max_depth': [14.0],
   'num_leaves': [64.0],
   'reg_lambda': [0.041991163528273576]},
  'workdir': None},
 'owner': None,
 'refresh_time': datetime.datetime(2018, 2, 13, 12, 54, 28, 579000),
 'result': {'loss': 0.8195726997605574, 'status': 'ok'},
 'spec': None,
 'state': 2,
 'tid': 55,
 'version': 0}

In [106]:
np.argsort(trials.losses())

array([55, 65, 71, 59, 66, 52, 24, 72, 81, 63, 92, 50, 76, 77, 85, 69, 48,
       88, 73, 82, 67, 74, 70,  2, 97, 86, 56, 57, 64, 35, 21, 42, 12, 62,
       78, 17, 84, 37, 79, 95, 83, 53, 47, 51, 14, 19, 15, 68, 91, 93, 98,
       90, 96, 89, 23, 44, 75, 94, 46, 27, 54, 45, 61, 38,  3, 20,  0, 11,
       29, 40,  1,  7, 10, 28, 87, 33, 18, 13, 26, 60, 41,  9, 30,  5, 58,
       43, 99, 49, 34, 16,  4, 80, 31, 25,  8, 39, 36, 22,  6, 32], dtype=int64)

# Submission

In [94]:
lgbm_best = {
    'feature_fraction': 0.5768298970544862,
    'gamma': 0.4959732457252676,
    'max_depth': 14,
    'num_leaves': 64,
    'reg_lambda': 0.041991163528273576}

In [95]:
defalut_params = {
    'bossting_type':'gbdt',
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate' : 0.1,
    'num_iterations' : 200
}


In [96]:
params_best = {**defalut_params,**lgbm_best}
params_best

{'bossting_type': 'gbdt',
 'feature_fraction': 0.5768298970544862,
 'gamma': 0.4959732457252676,
 'learning_rate': 0.1,
 'max_depth': 14,
 'metric': 'rmse',
 'num_iterations': 200,
 'num_leaves': 64,
 'objective': 'regression',
 'reg_lambda': 0.041991163528273576}

In [97]:
lgb_train = lgb.Dataset(X_train,y_train)
lgb_cv = lgb.Dataset(X_cv,y_cv,reference=lgb_train) # cv : 2015 Oct data
reg = lgb.train(params_best,
                    early_stopping_rounds = 5,
                    train_set = lgb_train,
                    valid_sets=lgb_cv,
                    verbose_eval = True)



[1]	valid_0's rmse: 1.09983
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's rmse: 1.07576
[3]	valid_0's rmse: 1.05453
[4]	valid_0's rmse: 1.03348
[5]	valid_0's rmse: 1.0161
[6]	valid_0's rmse: 1.00196
[7]	valid_0's rmse: 0.98993
[8]	valid_0's rmse: 0.98148
[9]	valid_0's rmse: 0.972278
[10]	valid_0's rmse: 0.967235
[11]	valid_0's rmse: 0.960818
[12]	valid_0's rmse: 0.955609
[13]	valid_0's rmse: 0.952638
[14]	valid_0's rmse: 0.948014
[15]	valid_0's rmse: 0.944631
[16]	valid_0's rmse: 0.943705
[17]	valid_0's rmse: 0.942057
[18]	valid_0's rmse: 0.941555
[19]	valid_0's rmse: 0.939173
[20]	valid_0's rmse: 0.938168
[21]	valid_0's rmse: 0.939607
[22]	valid_0's rmse: 0.938534
[23]	valid_0's rmse: 0.937674
[24]	valid_0's rmse: 0.936796
[25]	valid_0's rmse: 0.937291
[26]	valid_0's rmse: 0.936779
[27]	valid_0's rmse: 0.93563
[28]	valid_0's rmse: 0.936219
[29]	valid_0's rmse: 0.935689
[30]	valid_0's rmse: 0.935922
[31]	valid_0's rmse: 0.935828
[32]	valid_0's rmse: 0.93532

In [107]:
gc.collect()

3334