In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
import xgboost as xgb
from time import time
from hyperopt import hp
from hyperopt import fmin, tpe

In [2]:
date_2016 = pd.date_range(start = '2016-01-01',end='2016-12-31',freq='D')
date_2017 = pd.date_range(start = '2017-01-01',end='2017-12-31',freq='D')

In [3]:
date_2016_df = pd.DataFrame({'quarter':date_2016.quarter,'ones':np.ones(len(date_2016)),'date':pd.Series(date_2016.date).apply(str)})
date_2017_df = pd.DataFrame({'quarter':date_2017.quarter,'ones':np.ones(len(date_2017)),'date':pd.Series(date_2017.date).apply(str)})

In [4]:
date_2017_df.head()

Unnamed: 0,date,ones,quarter
0,2017-01-01,1.0,1
1,2017-01-02,1.0,1
2,2017-01-03,1.0,1
3,2017-01-04,1.0,1
4,2017-01-05,1.0,1


In [5]:
date_2016_df['dayofquarter']=date_2016_df.groupby('quarter',as_index=False)['ones'].cumsum()
date_2017_df['dayofquarter']=date_2017_df.groupby('quarter',as_index=False)['ones'].cumsum()

In [6]:
quarter_info = pd.concat([date_2016_df[['date','dayofquarter']],date_2017_df[['date','dayofquarter']]],axis=0)

In [7]:
quarter_info.head()

Unnamed: 0,date,dayofquarter
0,2016-01-01,1.0
1,2016-01-02,2.0
2,2016-01-03,3.0
3,2016-01-04,4.0
4,2016-01-05,5.0


In [8]:
quarter_info['quarter'] = pd.to_datetime(quarter_info.date).dt.quarter

In [9]:
quarter_info['day'] = pd.to_datetime(quarter_info.date).dt.day
quarter_info['dayofyear'] = pd.to_datetime(quarter_info.date).dt.dayofyear

In [10]:
quarter_info.head()

Unnamed: 0,date,dayofquarter,quarter,day,dayofyear
0,2016-01-01,1.0,1,1,1
1,2016-01-02,2.0,1,2,2
2,2016-01-03,3.0,1,3,3
3,2016-01-04,4.0,1,4,4
4,2016-01-05,5.0,1,5,5


In [11]:
quarter_info.shape

(731, 5)

## ideas from kernel

In [12]:
kernel_train = pd.read_csv('kernel_train.csv')
kernel_test = pd.read_csv('kernel_test.csv')

In [13]:
kernel_train.head()

Unnamed: 0,air_store_id,visit_date,visitors,dow,year,month,day_of_week,holiday_flg,min_visitors,mean_visitors,...,rv2_y,id,total_reserv_sum,total_reserv_mean,total_reserv_dt_diff_mean,date_int,var_max_lat,var_max_long,lon_plus_lat,air_store_id2
0,air_ba937bf13d40fb24,2016-01-13,25,2,2016,1,6,0,7.0,23.84375,...,-1.0,air_ba937bf13d40fb24_2016-01-13,-1.0,-1.0,-1.0,20160113,8.362564,4.521799,175.409667,599
1,air_ba937bf13d40fb24,2016-01-20,31,2,2016,1,6,0,7.0,23.84375,...,-1.0,air_ba937bf13d40fb24_2016-01-20,-1.0,-1.0,-1.0,20160120,8.362564,4.521799,175.409667,599
2,air_ba937bf13d40fb24,2016-01-27,24,2,2016,1,6,0,7.0,23.84375,...,-1.0,air_ba937bf13d40fb24_2016-01-27,-1.0,-1.0,-1.0,20160127,8.362564,4.521799,175.409667,599
3,air_ba937bf13d40fb24,2016-02-03,18,2,2016,2,6,0,7.0,23.84375,...,-1.0,air_ba937bf13d40fb24_2016-02-03,-1.0,-1.0,-1.0,20160203,8.362564,4.521799,175.409667,599
4,air_ba937bf13d40fb24,2016-02-10,32,2,2016,2,6,0,7.0,23.84375,...,-1.0,air_ba937bf13d40fb24_2016-02-10,-1.0,-1.0,-1.0,20160210,8.362564,4.521799,175.409667,599


In [14]:
len(train_data.visit_date)

NameError: name 'train_data' is not defined

In [None]:
len(kernel_train.visit_date)

In [None]:
test_data.shape,kernel_test.shape

In [15]:
kernel_train.columns

Index(['air_store_id', 'visit_date', 'visitors', 'dow', 'year', 'month',
       'day_of_week', 'holiday_flg', 'min_visitors', 'mean_visitors',
       'median_visitors', 'max_visitors', 'count_observations',
       'air_genre_name', 'air_area_name', 'latitude', 'longitude',
       'air_genre_name0', 'air_area_name0', 'air_genre_name1',
       'air_area_name1', 'air_genre_name2', 'air_area_name2',
       'air_genre_name3', 'air_area_name3', 'air_genre_name4',
       'air_area_name4', 'air_genre_name5', 'air_area_name5',
       'air_genre_name6', 'air_area_name6', 'air_genre_name7',
       'air_area_name7', 'air_genre_name8', 'air_area_name8',
       'air_genre_name9', 'air_area_name9', 'rs1_x', 'rv1_x', 'rs2_x', 'rv2_x',
       'rs1_y', 'rv1_y', 'rs2_y', 'rv2_y', 'id', 'total_reserv_sum',
       'total_reserv_mean', 'total_reserv_dt_diff_mean', 'date_int',
       'var_max_lat', 'var_max_long', 'lon_plus_lat', 'air_store_id2'],
      dtype='object')

In [16]:
train_data = pd.merge(left=kernel_train,right=quarter_info,how='left',
                     left_on='visit_date',right_on='date')

In [17]:
test_data = pd.merge(left=kernel_test,right=quarter_info,how='left',
                    left_on='visit_date',right_on='date')

In [32]:
test_data.head()

Unnamed: 0,id,visitors,visit_date,air_store_id,dow,year,month,day_of_week,holiday_flg,min_visitors,...,date_int,var_max_lat,var_max_long,lon_plus_lat,air_store_id2,date,dayofquarter,quarter,day,dayofyear
0,air_00a91d42b08b08d9_2017-04-23,0,2017-04-23,air_00a91d42b08b08d9,6,2017,4,3,0,2.0,...,20170423,8.326629,4.519803,175.447598,0,2017-04-23,23.0,2,23,113
1,air_00a91d42b08b08d9_2017-04-24,0,2017-04-24,air_00a91d42b08b08d9,0,2017,4,1,0,1.0,...,20170424,8.326629,4.519803,175.447598,0,2017-04-24,24.0,2,24,114
2,air_00a91d42b08b08d9_2017-04-25,0,2017-04-25,air_00a91d42b08b08d9,1,2017,4,5,0,1.0,...,20170425,8.326629,4.519803,175.447598,0,2017-04-25,25.0,2,25,115
3,air_00a91d42b08b08d9_2017-04-26,0,2017-04-26,air_00a91d42b08b08d9,2,2017,4,6,0,15.0,...,20170426,8.326629,4.519803,175.447598,0,2017-04-26,26.0,2,26,116
4,air_00a91d42b08b08d9_2017-04-27,0,2017-04-27,air_00a91d42b08b08d9,3,2017,4,4,0,15.0,...,20170427,8.326629,4.519803,175.447598,0,2017-04-27,27.0,2,27,117


In [18]:
train_data.shape,test_data.shape

((250468, 59), (32019, 59))

In [19]:
len(set(test_data.visit_date)),len(set(train_data.visit_date))

(39, 478)

In [20]:
train_date = pd.DataFrame(list(set(train_data.visit_date))).rename(columns={0:'visit_date'})

In [21]:
train_date.sort_values('visit_date',inplace=True)

In [22]:
train_date['range'] = np.arange(train_date.shape[0])

In [23]:
train_date['train'] = train_date.range.apply(lambda x:x<448)
train_date['test'] = train_date.range.apply(lambda x:x>=448)

In [24]:
train_test = pd.merge(left=train_data,right=train_date[['visit_date','train','test']],
                     how='left',left_on='visit_date',right_on='visit_date')

In [25]:
train_set = train_test[train_test.train]
val_set = train_test[train_test.test]

In [26]:
train_data.shape,train_test.shape,train_set.shape,val_set.shape

((250468, 59), (250468, 61), (228882, 61), (21586, 61))

In [27]:
train_set.columns

Index(['air_store_id', 'visit_date', 'visitors', 'dow', 'year', 'month',
       'day_of_week', 'holiday_flg', 'min_visitors', 'mean_visitors',
       'median_visitors', 'max_visitors', 'count_observations',
       'air_genre_name', 'air_area_name', 'latitude', 'longitude',
       'air_genre_name0', 'air_area_name0', 'air_genre_name1',
       'air_area_name1', 'air_genre_name2', 'air_area_name2',
       'air_genre_name3', 'air_area_name3', 'air_genre_name4',
       'air_area_name4', 'air_genre_name5', 'air_area_name5',
       'air_genre_name6', 'air_area_name6', 'air_genre_name7',
       'air_area_name7', 'air_genre_name8', 'air_area_name8',
       'air_genre_name9', 'air_area_name9', 'rs1_x', 'rv1_x', 'rs2_x', 'rv2_x',
       'rs1_y', 'rv1_y', 'rs2_y', 'rv2_y', 'id', 'total_reserv_sum',
       'total_reserv_mean', 'total_reserv_dt_diff_mean', 'date_int',
       'var_max_lat', 'var_max_long', 'lon_plus_lat', 'air_store_id2', 'date',
       'dayofquarter', 'quarter', 'day', 'dayofyear', 

In [33]:
test_data.columns

Index(['id', 'visitors', 'visit_date', 'air_store_id', 'dow', 'year', 'month',
       'day_of_week', 'holiday_flg', 'min_visitors', 'mean_visitors',
       'median_visitors', 'max_visitors', 'count_observations',
       'air_genre_name', 'air_area_name', 'latitude', 'longitude',
       'air_genre_name0', 'air_area_name0', 'air_genre_name1',
       'air_area_name1', 'air_genre_name2', 'air_area_name2',
       'air_genre_name3', 'air_area_name3', 'air_genre_name4',
       'air_area_name4', 'air_genre_name5', 'air_area_name5',
       'air_genre_name6', 'air_area_name6', 'air_genre_name7',
       'air_area_name7', 'air_genre_name8', 'air_area_name8',
       'air_genre_name9', 'air_area_name9', 'rs1_x', 'rv1_x', 'rs2_x', 'rv2_x',
       'rs1_y', 'rv1_y', 'rs2_y', 'rv2_y', 'total_reserv_sum',
       'total_reserv_mean', 'total_reserv_dt_diff_mean', 'date_int',
       'var_max_lat', 'var_max_long', 'lon_plus_lat', 'air_store_id2', 'date',
       'dayofquarter', 'quarter', 'day', 'dayofyear'],

In [41]:
train_x = train_set.drop(['air_store_id','visit_date','visitors','train','test','date','id','air_genre_name3',
                         'air_genre_name4','air_genre_name5','air_genre_name6','air_genre_name7','air_genre_name8',
                         'air_genre_name9','air_area_name7','air_area_name8','air_area_name9','day_of_week',
                         'air_genre_name', 'air_area_name', 'latitude', 'longitude'],axis=1)
train_y = np.log1p(train_set.visitors.values)
val_x = val_set.drop(['air_store_id','visit_date','visitors','train','test','date','id','air_genre_name3',
                         'air_genre_name4','air_genre_name5','air_genre_name6','air_genre_name7','air_genre_name8',
                         'air_genre_name9','air_area_name7','air_area_name8','air_area_name9','day_of_week',
                     'air_genre_name', 'air_area_name', 'latitude', 'longitude',],axis=1)
val_y = np.log1p(val_set.visitors.values)
test_set = test_data.drop(['air_store_id','visit_date','visitors','date','id','air_genre_name3',
                         'air_genre_name4','air_genre_name5','air_genre_name6','air_genre_name7','air_genre_name8',
                         'air_genre_name9','air_area_name7','air_area_name8','air_area_name9','day_of_week',
                          'air_genre_name', 'air_area_name', 'latitude', 'longitude',],axis=1)

In [None]:
holiday_flg, min_visitors, mean_visitors, median_visitors, max_visitors, count_observations,
        rs1_x, rv1_x, rs2_x, rv2_x, rs1_y, rv1_y, rs2_y, rv2_y, total_reserv_sum, total_reserv_mean,
        total_reserv_dt_diff_mean, date_int, var_max_lat, var_max_long, lon_plus_lat,
        dow, year, month, air_store_id, air_area_code, air_genre_code

In [43]:
train_x.columns

Index(['dow', 'year', 'month', 'holiday_flg', 'min_visitors', 'mean_visitors',
       'median_visitors', 'max_visitors', 'count_observations',
       'air_genre_name0', 'air_area_name0', 'air_genre_name1',
       'air_area_name1', 'air_genre_name2', 'air_area_name2', 'air_area_name3',
       'air_area_name4', 'air_area_name5', 'air_area_name6', 'rs1_x', 'rv1_x',
       'rs2_x', 'rv2_x', 'rs1_y', 'rv1_y', 'rs2_y', 'rv2_y',
       'total_reserv_sum', 'total_reserv_mean', 'total_reserv_dt_diff_mean',
       'date_int', 'var_max_lat', 'var_max_long', 'lon_plus_lat',
       'air_store_id2', 'dayofquarter', 'quarter', 'day', 'dayofyear'],
      dtype='object')

train_data.to_csv('full_train_set.csv',index=False)
train_set.to_csv('train_set.csv',index=False)
val_set.to_csv('val_set.csv',index=False)
test_data.to_csv('test_set.csv',index=False)

In [44]:
train_x.head()

Unnamed: 0,dow,year,month,holiday_flg,min_visitors,mean_visitors,median_visitors,max_visitors,count_observations,air_genre_name0,...,total_reserv_dt_diff_mean,date_int,var_max_lat,var_max_long,lon_plus_lat,air_store_id2,dayofquarter,quarter,day,dayofyear
0,2,2016,1,0,7.0,23.84375,25.0,57.0,64.0,4,...,-1.0,20160113,8.362564,4.521799,175.409667,599,13.0,1,13,13
1,2,2016,1,0,7.0,23.84375,25.0,57.0,64.0,4,...,-1.0,20160120,8.362564,4.521799,175.409667,599,20.0,1,20,20
2,2,2016,1,0,7.0,23.84375,25.0,57.0,64.0,4,...,-1.0,20160127,8.362564,4.521799,175.409667,599,27.0,1,27,27
3,2,2016,2,0,7.0,23.84375,25.0,57.0,64.0,4,...,-1.0,20160203,8.362564,4.521799,175.409667,599,34.0,1,3,34
4,2,2016,2,0,7.0,23.84375,25.0,57.0,64.0,4,...,-1.0,20160210,8.362564,4.521799,175.409667,599,41.0,1,10,41


## build xgboost model

In [45]:
def retrive_y(data):
    return np.exp(data)-1

In [46]:
dtrain = xgb.DMatrix(train_x,train_y)
dvalid = xgb.DMatrix(val_x,val_y)

In [47]:
param = {
        'eta':0.35,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':3,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
}

In [48]:
start = time()
bst = xgb.train(param,dtrain,30)
preds = bst.predict(dvalid)
print(mean_squared_error(val_y,preds))
time()-start

0.289008006136


13.048494815826416

In [49]:
param = {
        'eta':0.15,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':3,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
}
start = time()
hist = xgb.cv(param,dtrain,30,metrics='rmse')
print(hist['test-rmse-mean'].iloc[-1])
time()-start

0.500548666667


27.412065267562866

In [50]:
for i in np.arange(0.01,0.2,0.01):
    param = {
        'eta':i,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':3,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

0.01 1.835412
0.02 1.39757633333
0.03 1.08464233333
0.04 0.867315666667
0.05 0.722148333333
0.06 0.629477333333
0.07 0.572941
0.08 0.539917666667
0.09 0.521863666667
0.1 0.511361666667
0.11 0.505778666667
0.12 0.502884
0.13 0.501483
0.14 0.500795333333
0.15 0.500548666667
0.16 0.501091666667
0.17 0.501071
0.18 0.501957
0.19 0.502389666667


In [51]:
for i in np.arange(0.2,0.36,0.01):
    param = {
        'eta':i,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':3,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

0.2 0.502664
0.21 0.504044333333
0.22 0.504632333333
0.23 0.505299333333
0.24 0.506541333333
0.25 0.507012333333
0.26 0.507979666667
0.27 0.508825333333
0.28 0.510170666667
0.29 0.511608
0.3 0.512655333333
0.31 0.514708333333
0.32 0.515088333333
0.33 0.517419
0.34 0.518488
0.35 0.520437333333


In [52]:
for i in np.arange(1,20,1):
    param = {
        'eta':0.15,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':i,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

1 0.501207
2 0.501058666667
3 0.500548666667
4 0.500074333333
5 0.500173666667
6 0.499984666667
7 0.500018
8 0.499570666667
9 0.499462
10 0.498860666667
11 0.499178666667
12 0.498834
13 0.498709
14 0.498713
15 0.498436666667
16 0.49877
17 0.498582666667
18 0.498371333333
19 0.498542


In [53]:
for i in np.arange(20,30,1):
    param = {
        'eta':0.15,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':i,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

20 0.498064333333
21 0.498297333333
22 0.497943666667
23 0.498256333333
24 0.498246333333
25 0.498523
26 0.498208333333
27 0.497913333333
28 0.497941333333
29 0.497657333333


In [54]:
for i in np.arange(30,40,1):
    param = {
        'eta':0.15,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':i,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

30 0.498411333333
31 0.497942666667
32 0.498162333333
33 0.497960333333
34 0.497981333333
35 0.497881666667
36 0.497764
37 0.498094
38 0.497864333333
39 0.498269666667


In [55]:
for i in np.arange(80,200,10):
    param = {
        'eta':0.15,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':29,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':i
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

80 0.498008666667
90 0.498147666667
100 0.498273
110 0.497862333333
120 0.49797
130 0.497962666667
140 0.498017666667
150 0.497657333333
160 0.498506333333
170 0.498372333333
180 0.497745
190 0.497954


In [56]:
for i in np.arange(145,155,1):
    param = {
        'eta':0.15,'max_depth':16,'subsample':0.9,'colsample_bytree':0.9,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':29,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':i
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])

145 0.498095333333
146 0.498026666667
147 0.497887666667
148 0.497805666667
149 0.498251333333
150 0.497657333333
151 0.498152666667
152 0.498289
153 0.497934
154 0.498155


In [61]:
for i in np.arange(0.7,1,0.01):
    for j in np.arange(0.7,1,0.01):
        param = {
            'eta':0.15,'max_depth':16,'subsample':i,'colsample_bytree':j,'lamdba':0.3,'alpha':0.8,\
            'min_child_weight':29,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
            'tree_method':'hist','max_bin':150
        }
        hist = xgb.cv(param,dtrain,30,metrics='rmse')
        print(i,j,hist['test-rmse-mean'].iloc[-1])

0.7 0.7 0.498728666667
0.7 0.71 0.498728666667
0.7 0.72 0.498836333333
0.7 0.73 0.498836333333
0.7 0.74 0.498836333333
0.7 0.75 0.498727333333
0.7 0.76 0.498727333333
0.7 0.77 0.498846666667
0.7 0.78 0.498846666667
0.7 0.79 0.498846666667
0.7 0.8 0.499339333333
0.7 0.81 0.499339333333
0.7 0.82 0.499339333333
0.7 0.83 0.499025333333
0.7 0.84 0.499025333333
0.7 0.85 0.499224
0.7 0.86 0.499224
0.7 0.87 0.499224
0.7 0.88 0.499051333333
0.7 0.89 0.499051333333
0.7 0.9 0.499205666667
0.7 0.91 0.499205666667
0.7 0.92 0.499205666667
0.7 0.93 0.499164666667
0.7 0.94 0.499164666667
0.7 0.95 0.499572333333
0.7 0.96 0.499572333333
0.7 0.97 0.499572333333
0.7 0.98 0.499502
0.7 0.99 0.499502
0.7 1.0 0.499409
0.71 0.7 0.498825333333
0.71 0.71 0.498825333333
0.71 0.72 0.498814333333
0.71 0.73 0.498814333333
0.71 0.74 0.498814333333
0.71 0.75 0.498910666667
0.71 0.76 0.498910666667
0.71 0.77 0.499175
0.71 0.78 0.499175
0.71 0.79 0.499175
0.71 0.8 0.499117
0.71 0.81 0.499117
0.71 0.82 0.499117
0.71 0.83

0.81 0.89 0.498282
0.81 0.9 0.498748333333
0.81 0.91 0.498748333333
0.81 0.92 0.498748333333
0.81 0.93 0.498760333333
0.81 0.94 0.498760333333
0.81 0.95 0.498445
0.81 0.96 0.498445
0.81 0.97 0.498445
0.81 0.98 0.498883
0.81 0.99 0.498883
0.81 1.0 0.499022666667
0.82 0.7 0.497844666667
0.82 0.71 0.497844666667
0.82 0.72 0.498543666667
0.82 0.73 0.498543666667
0.82 0.74 0.498543666667
0.82 0.75 0.498583333333
0.82 0.76 0.498583333333
0.82 0.77 0.497931
0.82 0.78 0.497931
0.82 0.79 0.497931
0.82 0.8 0.497802
0.82 0.81 0.497802
0.82 0.82 0.497802
0.82 0.83 0.498153666667
0.82 0.84 0.498153666667
0.82 0.85 0.498553
0.82 0.86 0.498553
0.82 0.87 0.498553
0.82 0.88 0.498589666667
0.82 0.89 0.498589666667
0.82 0.9 0.498627
0.82 0.91 0.498627
0.82 0.92 0.498627
0.82 0.93 0.498762666667
0.82 0.94 0.498762666667
0.82 0.95 0.498856666667
0.82 0.96 0.498856666667
0.82 0.97 0.498856666667
0.82 0.98 0.498507
0.82 0.99 0.498507
0.82 1.0 0.498662
0.83 0.7 0.49771
0.83 0.71 0.49771
0.83 0.72 0.4977253333

0.93 0.84 0.497934333333
0.93 0.85 0.497807666667
0.93 0.86 0.497807666667
0.93 0.87 0.497807666667
0.93 0.88 0.497849333333
0.93 0.89 0.497849333333
0.93 0.9 0.497789333333
0.93 0.91 0.497789333333
0.93 0.92 0.497789333333
0.93 0.93 0.497775666667
0.93 0.94 0.497775666667
0.93 0.95 0.497950666667
0.93 0.96 0.497950666667
0.93 0.97 0.497950666667
0.93 0.98 0.498083
0.93 0.99 0.498083
0.93 1.0 0.498413
0.94 0.7 0.4978
0.94 0.71 0.4978
0.94 0.72 0.497210333333
0.94 0.73 0.497210333333
0.94 0.74 0.497210333333
0.94 0.75 0.497776
0.94 0.76 0.497776
0.94 0.77 0.497845
0.94 0.78 0.497845
0.94 0.79 0.497845
0.94 0.8 0.497796666667
0.94 0.81 0.497796666667
0.94 0.82 0.497796666667
0.94 0.83 0.498109333333
0.94 0.84 0.498109333333
0.94 0.85 0.498097333333
0.94 0.86 0.498097333333
0.94 0.87 0.498097333333
0.94 0.88 0.498174666667
0.94 0.89 0.498174666667
0.94 0.9 0.498153333333
0.94 0.91 0.498153333333
0.94 0.92 0.498153333333
0.94 0.93 0.498271333333
0.94 0.94 0.498271333333
0.94 0.95 0.4981846

In [None]:
for i in np.arange(145,155,1):
    param = {
        'eta':0.15,'max_depth':16,'subsample':0.94,'colsample_bytree':0.73,'lamdba':0.3,'alpha':0.8,\
        'min_child_weight':29,'num_boost_round':100,'objective':'reg:linear','booster':'gbtree',\
        'tree_method':'hist','max_bin':150
    }
    hist = xgb.cv(param,dtrain,30,metrics='rmse')
    print(i,hist['test-rmse-mean'].iloc[-1])