In [1]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import xgboost as xgb
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import joblib
import prince
from mlens.ensemble import SuperLearner
from sklearn.base import BaseEstimator, TransformerMixin

[MLENS] backend: threading


In [2]:
dat = pd.read_csv('training_ZILLOW_CONFIDENTIAL.csv')
dat_test = pd.read_csv('test_ZILLOW_CONFIDENTIAL.csv')
dat_train = dat.drop('SaleDollarCnt', axis = 1)
dat_test = dat_test.drop('SaleDollarCnt', axis = 1)
target = dat['SaleDollarCnt']

In [30]:
dat.loc[dat.GarageSquareFeet.isna(), 'PropertyID']

3         48649040
4         48649057
6         48649122
7         48649137
8         48649324
           ...    
11417    122181248
11432    122182157
11479    122962619
11564    124003199
11580    124393049
Name: PropertyID, Length: 2841, dtype: int64

In [3]:
dat_train.shape, dat_test.shape

((11588, 23), (4402, 23))

In [4]:
np.sum(pd.isna(dat_train))

PropertyID               0
TransDate                0
censusblockgroup         0
ZoneCodeCounty           0
Usecode                  0
BedroomCnt               0
BathroomCnt              0
FinishedSquareFeet       0
GarageSquareFeet      2841
LotSizeSquareFeet        0
StoryCnt                 0
BuiltYear                0
ViewType              8956
Latitude                 0
Longitude                0
BGMedHomeValue           6
BGMedRent             2631
BGMedYearBuilt         247
BGPctOwn                 0
BGPctVacant              0
BGMedIncome              0
BGPctKids                0
BGMedAge                 0
dtype: int64

## Preprocessing (option 1): Using onehotencoding to encoding categorical data

- PropertyID: remove;
- Usecode: remove;
- TransDate: convert it to be the num of days from one date to '01/01/2015', then divide it by 366;
- ViewType: Treat 'NA' as another category
- censusblockgroup: drive the FIPS code by truncating the rightmost digit
- BGMedIncome: make a log transformation
- BGMedHomeValue: make a log transformation

Apply one-hot encoding to deal with categorical varialbes


In [6]:
## Preprocessing
# PropertyID: remove
# Usecode: remove
# TransDate: convert transdate to be the difference in days from one date to '01/01/2015'
# ViewType: fill na with 0 and then covert to be categorical.
dat_all = pd.concat([dat_train, dat_test])
dat_all = dat_all.drop(['PropertyID','Usecode'], axis = 1)
dat_all['TransDate'] = (pd.to_datetime(dat_all['TransDate']) - pd.to_datetime('01/01/2015')).dt.days/366
dat_all['ViewType'] = (dat_all['ViewType'].fillna(0)).astype(object)
dat_all['censusblockgroup'] = dat_all['censusblockgroup'].map(lambda x: str(x)[:-1])
dat_all['BGMedIncome'] = np.log(dat_all['BGMedIncome'])
dat_all['BGMedHomeValue'] = np.log(dat_all['BGMedHomeValue'])

con_var = ['FinishedSquareFeet','GarageSquareFeet', 'LotSizeSquareFeet',
           'BGMedHomeValue', 'BGMedRent', 'BGMedYearBuilt','BGMedIncome', 
           'BGMedAge', 'BuiltYear', 'BGPctOwn', 'BGPctVacant', 'BGPctKids', 
           'TransDate','BedroomCnt', 'BathroomCnt', 'StoryCnt','Latitude', 
           'Longitude']

cat_var = ['censusblockgroup','ZoneCodeCounty', 'ViewType']

dat_train_after = dat_all.iloc[:dat_train.shape[0],]
dat_test_after = dat_all.iloc[dat_train.shape[0]:, ]

X_train, X_test, y_train, y_test = train_test_split(dat_train_after, target, train_size=0.8)


class IdentityTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, input_array, y=None):
        return self
    
    def transform(self, input_array, y=None):
        return input_array*1

class KNNimputer_dat(BaseEstimator, TransformerMixin):
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        self.KNNimputer = KNNImputer(n_neighbors = n_neighbors)
    
    def fit(self, X, y=None):
        self.KNNimputer.fit(X)
        return self
    
    def transform(self, X, y=None):
        col_names = X.columns
        index = X.index
        X_imputed = self.KNNimputer.transform(X)
        X_imputed = pd.DataFrame(X_imputed, columns=col_names, index = index)
        return X_imputed


num_ColumnTrans = ColumnTransformer([
    ('std_scale', StandardScaler(), ['FinishedSquareFeet','GarageSquareFeet', 'LotSizeSquareFeet',
                                     'BGMedHomeValue', 'BGMedRent', 'BGMedYearBuilt','BGMedIncome', 
                                     'BGMedAge', 'BuiltYear']),
    ('Identity', IdentityTransformer(), ['BGPctOwn', 'BGPctVacant', 'BGPctKids', 'TransDate']),
    ('Minmax_scale', MinMaxScaler(), ['BedroomCnt', 'BathroomCnt', 
                                      'StoryCnt','Latitude', 
                                      'Longitude'])
])


num_transformer = Pipeline(steps=[
    ('KNNimputer', KNNimputer_dat(n_neighbors=5)),
    ('scaler', num_ColumnTrans)
])

mypreprocessor = ColumnTransformer([
    ('num', num_transformer, con_var),
    ('Ohot2blockgroup', OneHotEncoder(handle_unknown='ignore'), ['censusblockgroup']),
    ('Ohot2ZoneCode', OneHotEncoder(handle_unknown='ignore'), ['ZoneCodeCounty']),
    ('Ohot2ViewType', OneHotEncoder(handle_unknown='ignore'), ['ViewType'])
])


## Preprocessing (option 2): Using MAC to encoding categorical data

- PropertyID: remove;
- Usecode: remove;
- TransDate: convert it to be the num of days from one date to '01/01/2015', then divide it by 366;
- ViewType: Treat 'NA' as another category
- censusblockgroup: drive the FIPS code by truncating the rightmost digit
- BGMedIncome: make a log transformation
- BGMedHomeValue: make a log transformation

Apply MCA deal with variables 'censusblockgroup' and 'ZoneCodeCounty' because of their numbers of categories are large.
Apply one-hot encoder on 'ViewType'.

In [80]:
## Preprocessing
# PropertyID: remove
# Usecode: remove
# TransDate: convert transdate to be the difference in days from one date to '01/01/2015'
# ViewType: fill na with 0 and then covert to be categorical.
dat_all = pd.concat([dat_train, dat_test])
dat_all = dat_all.drop(['PropertyID','Usecode'], axis = 1)
dat_all['TransDate'] = (pd.to_datetime(dat_all['TransDate']) - pd.to_datetime('01/01/2015')).dt.days/366 
dat_all['ViewType'] = (dat_all['ViewType'].fillna(0)).astype(object)
dat_all['censusblockgroup'] = dat_all['censusblockgroup'].map(lambda x: str(x)[:-1])


con_var = ['BedroomCnt', 'BathroomCnt', 'StoryCnt', 'FinishedSquareFeet', 
           'GarageSquareFeet', 'LotSizeSquareFeet' , 'Latitude', 'Longitude',
           'BGMedHomeValue', 'BGMedRent', 'BGMedYearBuilt', 
           'BGPctOwn', 'BGPctVacant','BGMedIncome', 'BGPctKids', 'BGMedAge', 
           'BuiltYear','TransDate']
cat_var = ['censusblockgroup','ZoneCodeCounty', 'ViewType']

n_component =5
mac = prince.MCA(n_components=n_component)
num_reprsent = mac.fit_transform(dat_all[['censusblockgroup','ZoneCodeCounty']])
MCA_columns = [f'rep_{i}' for i in range(n_component)]
num_reprsent.columns = MCA_columns
dat_all = pd.concat([dat_all[con_var], dat_all['ViewType'], num_reprsent], axis=1)


dat_train_after = dat_all.iloc[:dat_train.shape[0],]
dat_test_after = dat_all.iloc[dat_train.shape[0]:, ]


# ## deal 'censusblockgroup'
# min_num = 5
# category_blockgroup = []
# for cat in dat_train_after['censusblockgroup'].unique():
#     if len(dat_train_after[dat_train_after['censusblockgroup'] == cat]) >= min_num:
#         category_blockgroup.append(cat)
# dat_train_after['censusblockgroup'] = dat_train_after['censusblockgroup'].map(lambda x: x if x in category_blockgroup else 'other')
# dat_test_after['censusblockgroup'] = dat_test_after['censusblockgroup'].map(lambda x: x if x in category_blockgroup else 'other')        

# ## deal 'ZoneCodeCounty'
# min_num = 5
# category_ZoneCode= []
# for cat in dat_train_after['ZoneCodeCounty'].unique():
#     if len(dat_train_after[dat_train_after['ZoneCodeCounty'] == cat]) >= min_num:
#         category_ZoneCode.append(cat)
# dat_train_after['ZoneCodeCounty'] = dat_train_after['ZoneCodeCounty'].map(lambda x: x if x in category_ZoneCode else 'other')
# dat_test_after['ZoneCodeCounty'] = dat_test_after['ZoneCodeCounty'].map(lambda x: x if x in category_ZoneCode else 'other')        



X_train, X_test, y_train, y_test = train_test_split(dat_train_after, target, train_size=0.85)

class IdentityTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, input_array, y=None):
        return self
    
    def transform(self, input_array, y=None):
        return input_array*1

class KNNimputer_dat(BaseEstimator, TransformerMixin):
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        self.KNNimputer = KNNImputer(n_neighbors = n_neighbors)
    
    def fit(self, X, y=None):
        self.KNNimputer.fit(X)
        return self
    
    def transform(self, X, y=None):
        col_names = X.columns
        index = X.index
        X_imputed = self.KNNimputer.transform(X)
        X_imputed = pd.DataFrame(X_imputed, columns=col_names, index = index)
        return X_imputed


num_ColumnTrans = ColumnTransformer([
    ('std_scale', StandardScaler(), ['FinishedSquareFeet','GarageSquareFeet', 'LotSizeSquareFeet',
                                     'BGMedHomeValue', 'BGMedRent', 'BGMedYearBuilt','BGMedIncome', 
                                     'BGMedAge', 'BuiltYear']),
    ('Identity', IdentityTransformer(), ['BGPctOwn', 'BGPctVacant', 'BGPctKids', 'TransDate']),
    ('Minmax_scale', MinMaxScaler(), ['BedroomCnt', 'BathroomCnt', 
                                      'StoryCnt','Latitude', 
                                      'Longitude'])
])


num_transformer = Pipeline(steps=[
    ('KNNimputer', KNNimputer_dat(n_neighbors=5)),
    ('scaler', num_ColumnTrans)
])



# class category_MAC(BaseEstimator, TransformerMixin):
#     def __init__(self, n_compent, min_num = 5):
#         self.mac = prince.MCA(n_components=n_component)
#         self.category_blockgroup = []
#         self.category_ZoneCode= []
#         self.min_num = 5
    
#     def fit(self, X, y=None):
#         for cat in X['censusblockgroup'].unique():
#             if len(X[X['censusblockgroup'] == cat]) >= self.min_num:
#                 self.category_blockgroup.append(cat)
#         X['censusblockgroup'] = X['censusblockgroup'].map(lambda x: x if x in self.category_blockgroup else 'other')
#         for cat in X['ZoneCodeCounty'].unique():
#             if len(X[X['ZoneCodeCounty'] == cat]) >= self.min_num:
#                 self.category_ZoneCode.append(cat)
#         X['ZoneCodeCounty'] = X['ZoneCodeCounty'].map(lambda x: x if x in self.category_ZoneCode else 'other')
#         self.mac.fit(X)
#         return self
    
#     def transform(self, X, y =None):
#         X['censusblockgroup'] = X['censusblockgroup'].map(lambda x: x if x in self.category_blockgroup else 'other')
#         X['ZoneCodeCounty'] = X['ZoneCodeCounty'].map(lambda x: x if x in self.category_ZoneCode else 'other')
#         return self.mac.transform(X)


mypreprocessor = ColumnTransformer([
    ('num', num_transformer, con_var),
    ('Identidy', IdentityTransformer(), MCA_columns),
    ('Onehotencoder', OneHotEncoder(), ['ViewType'])
])

## Define a custom metric function

I customize a metric funcition, because I treat log('SaleDollarCnt') as the reponse in the model.

In [4]:
## define a metric function 
def myloss(y_true, y_pred):
    return np.mean(abs(np.exp(y_true) - np.exp(y_pred)) / np.exp(y_true))

myscore = make_scorer(myloss, greater_is_better=False)


# Modeling
## XGBRegressor

In [None]:
## fix a large learning rate 0.1, 
## then find a n_estimator for future parameter tuning

X_train_processed = mypreprocessor.fit_transform(X_train)
X_test_processed = mypreprocessor.transform(X_test)
bestXGB = XGBRegressor(colsample_bytree=0.8,
                       learning_rate=0.1,
                       reg_lambda = 0.1,
                       reg_alpha = 0.1,
                       max_depth=5,
                       min_child_weight=1,
                       n_estimators=50000,                                                                    
                       eval_metric = myloss,
                       early_stopping_rounds = 10,
                       subsample=0.6, seed = 42)
bestXGB.fit(X_train_processed, np.log(y_train), eval_set = [(X_test_processed, np.log(y_test))])

In [None]:
## Step1 Tune max_depth and min_child_weight with n_estimators = 250
param_grid1 = {'model__max_depth': [3,5,6,8,10],
               'model__min_child_weight': [1,2,4,6,8,10],               
}

pipe1 = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', XGBRegressor(colsample_bytree=0.8,
                           learning_rate=0.1,
                           reg_lambda = 0.1,
                           reg_alpha = 0.1,
                           max_depth=5,
                           min_child_weight=1,
                           n_estimators=250,                                                                    
                           eval_metric = myloss,
                           subsample=0.5, seed = 42))
])

grid_search1 = GridSearchCV(pipe1, param_grid1, scoring=myscore, n_jobs=-1)
target_log = np.log(target)
grid_search1.fit(dat_train_after, target_log)

In [130]:
grid_search1.best_score_, grid_search1.best_params_

(-0.12787269280796282, {'model__max_depth': 6, 'model__min_child_weight': 2})

In [None]:
## Step 2: Tune gamma

pipe = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', XGBRegressor(colsample_bytree=0.8,
                           learning_rate=0.1,
                           reg_lambda = 0.1,
                           reg_alpha = 0.1,
                           max_depth=6,
                           min_child_weight=2,
                           n_estimators=250,                                                                    
                           eval_metric = myloss,
                           subsample=0.5, seed = 42))
])

param_grid2 = {
 'model__gamma':[i/10.0 for i in range(0,11)]
}

grid_search2 = GridSearchCV(pipe, param_grid2, scoring=myscore, n_jobs=-1)
target_log = np.log(target)
grid_search2.fit(dat_train_after, target_log)


In [138]:
grid_search2.best_score_, grid_search2.best_params_,

(-0.12787269280796282, {'model__gamma': 0.0})

In [None]:
# Step 3: Tune subsample and colsample_bytree
pipe3 = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', XGBRegressor(learning_rate=0.1,
                           reg_lambda = 0.1,
                           reg_alpha = 0.1,
                           gamma = 0,
                           max_depth=6,
                           min_child_weight=2,
                           n_estimators=250,                                                                    
                           eval_metric = myloss,
                           subsample=0.5,
                           colsample_bytree=0.8,
                           seed = 42))
])

param_grid3 = {
 'model__subsample':[i/10.0 for i in range(3,11)],
 'model__colsample_bytree':[i/10.0 for i in range(5,11)]
}

grid_search3 = GridSearchCV(pipe3, param_grid3, scoring=myscore, n_jobs=-1)
target_log = np.log(target)
grid_search3.fit(dat_train_after, target_log)

In [143]:
grid_search3.best_params_, grid_search3.best_score_

({'model__colsample_bytree': 0.6, 'model__subsample': 0.9},
 -0.12634547312715258)

In [None]:
## Step 4: Tuning Regularization Parameters

pipe4 = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', XGBRegressor(learning_rate=0.1,
                           reg_lambda = 0.1,
                           reg_alpha = 0.1,
                           gamma = 0,
                           max_depth=6,
                           min_child_weight=2,
                           n_estimators=250,                                                                    
                           eval_metric = myloss,
                           subsample=0.9,
                           colsample_bytree=0.6,
                           seed = 42))
])

param_grid4 = {
 'model__reg_lambda':[0, 1e-3, 1e-2, 0.1, 1,10, 100],
 'model__reg_alpha':[0, 1e-3, 1e-2, 0.1, 1, 10, 100]
}

grid_search4 = GridSearchCV(pipe4, param_grid4, scoring=myscore, n_jobs=-1)
target_log = np.log(target)
grid_search4.fit(dat_train_after, target_log)

In [150]:
grid_search4.best_params_, grid_search4.best_score_

({'model__reg_alpha': 1, 'model__reg_lambda': 0.01}, -0.12499748148609904)

In [9]:
## Step 5: Reducing Learning Rate and refit the model
X_train_processed = mypreprocessor.fit_transform(X_train)
X_test_processed = mypreprocessor.transform(X_test)
bestXGB = XGBRegressor(colsample_bytree=0.6,
                       gamma = 0,
                       learning_rate=0.01,
                       reg_lambda = 0.01,
                       reg_alpha = 1,
                       max_depth=6,
                       min_child_weight=2,
                       n_estimators=30000,                                                                    
                       eval_metric = myloss,
                       early_stopping_rounds = 50,
                       subsample=0.9, seed = 42)
bestXGB.fit(X_train_processed, np.log(y_train), eval_set = [(X_test_processed, np.log(y_test))])

[0]	validation_0-rmse:12.57090	validation_0-myloss:1.00000
[1]	validation_0-rmse:12.44534	validation_0-myloss:1.00000
[2]	validation_0-rmse:12.32100	validation_0-myloss:0.99999
[3]	validation_0-rmse:12.19791	validation_0-myloss:0.99999
[4]	validation_0-rmse:12.07607	validation_0-myloss:0.99999
[5]	validation_0-rmse:11.95537	validation_0-myloss:0.99999
[6]	validation_0-rmse:11.83592	validation_0-myloss:0.99999
[7]	validation_0-rmse:11.71771	validation_0-myloss:0.99999
[8]	validation_0-rmse:11.60068	validation_0-myloss:0.99999
[9]	validation_0-rmse:11.48482	validation_0-myloss:0.99999
[10]	validation_0-rmse:11.37008	validation_0-myloss:0.99999
[11]	validation_0-rmse:11.25653	validation_0-myloss:0.99999
[12]	validation_0-rmse:11.14408	validation_0-myloss:0.99998
[13]	validation_0-rmse:11.03279	validation_0-myloss:0.99998
[14]	validation_0-rmse:10.92258	validation_0-myloss:0.99998
[15]	validation_0-rmse:10.81348	validation_0-myloss:0.99998
[16]	validation_0-rmse:10.70551	validation_0-mylos

[138]	validation_0-rmse:3.15375	validation_0-myloss:0.95511
[139]	validation_0-rmse:3.12243	validation_0-myloss:0.95369
[140]	validation_0-rmse:3.09150	validation_0-myloss:0.95224
[141]	validation_0-rmse:3.06079	validation_0-myloss:0.95076
[142]	validation_0-rmse:3.03037	validation_0-myloss:0.94925
[143]	validation_0-rmse:3.00021	validation_0-myloss:0.94770
[144]	validation_0-rmse:2.97040	validation_0-myloss:0.94613
[145]	validation_0-rmse:2.94088	validation_0-myloss:0.94452
[146]	validation_0-rmse:2.91167	validation_0-myloss:0.94288
[147]	validation_0-rmse:2.88276	validation_0-myloss:0.94122
[148]	validation_0-rmse:2.85416	validation_0-myloss:0.93952
[149]	validation_0-rmse:2.82588	validation_0-myloss:0.93779
[150]	validation_0-rmse:2.79785	validation_0-myloss:0.93603
[151]	validation_0-rmse:2.77007	validation_0-myloss:0.93424
[152]	validation_0-rmse:2.74264	validation_0-myloss:0.93241
[153]	validation_0-rmse:2.71544	validation_0-myloss:0.93056
[154]	validation_0-rmse:2.68848	validati

[275]	validation_0-rmse:0.82307	validation_0-myloss:0.54029
[276]	validation_0-rmse:0.81539	validation_0-myloss:0.53666
[277]	validation_0-rmse:0.80778	validation_0-myloss:0.53304
[278]	validation_0-rmse:0.80021	validation_0-myloss:0.52942
[279]	validation_0-rmse:0.79273	validation_0-myloss:0.52582
[280]	validation_0-rmse:0.78532	validation_0-myloss:0.52223
[281]	validation_0-rmse:0.77797	validation_0-myloss:0.51864
[282]	validation_0-rmse:0.77076	validation_0-myloss:0.51507
[283]	validation_0-rmse:0.76358	validation_0-myloss:0.51150
[284]	validation_0-rmse:0.75649	validation_0-myloss:0.50795
[285]	validation_0-rmse:0.74948	validation_0-myloss:0.50440
[286]	validation_0-rmse:0.74255	validation_0-myloss:0.50088
[287]	validation_0-rmse:0.73567	validation_0-myloss:0.49735
[288]	validation_0-rmse:0.72887	validation_0-myloss:0.49383
[289]	validation_0-rmse:0.72214	validation_0-myloss:0.49033
[290]	validation_0-rmse:0.71548	validation_0-myloss:0.48683
[291]	validation_0-rmse:0.70889	validati

[412]	validation_0-rmse:0.27905	validation_0-myloss:0.19583
[413]	validation_0-rmse:0.27755	validation_0-myloss:0.19464
[414]	validation_0-rmse:0.27606	validation_0-myloss:0.19347
[415]	validation_0-rmse:0.27460	validation_0-myloss:0.19234
[416]	validation_0-rmse:0.27317	validation_0-myloss:0.19120
[417]	validation_0-rmse:0.27177	validation_0-myloss:0.19009
[418]	validation_0-rmse:0.27036	validation_0-myloss:0.18898
[419]	validation_0-rmse:0.26896	validation_0-myloss:0.18790
[420]	validation_0-rmse:0.26755	validation_0-myloss:0.18682
[421]	validation_0-rmse:0.26618	validation_0-myloss:0.18575
[422]	validation_0-rmse:0.26486	validation_0-myloss:0.18472
[423]	validation_0-rmse:0.26354	validation_0-myloss:0.18369
[424]	validation_0-rmse:0.26223	validation_0-myloss:0.18269
[425]	validation_0-rmse:0.26096	validation_0-myloss:0.18170
[426]	validation_0-rmse:0.25973	validation_0-myloss:0.18073
[427]	validation_0-rmse:0.25846	validation_0-myloss:0.17975
[428]	validation_0-rmse:0.25722	validati

[549]	validation_0-rmse:0.19001	validation_0-myloss:0.12951
[550]	validation_0-rmse:0.18979	validation_0-myloss:0.12939
[551]	validation_0-rmse:0.18958	validation_0-myloss:0.12928
[552]	validation_0-rmse:0.18938	validation_0-myloss:0.12918
[553]	validation_0-rmse:0.18921	validation_0-myloss:0.12909
[554]	validation_0-rmse:0.18903	validation_0-myloss:0.12901
[555]	validation_0-rmse:0.18886	validation_0-myloss:0.12891
[556]	validation_0-rmse:0.18867	validation_0-myloss:0.12881
[557]	validation_0-rmse:0.18850	validation_0-myloss:0.12871
[558]	validation_0-rmse:0.18833	validation_0-myloss:0.12863
[559]	validation_0-rmse:0.18816	validation_0-myloss:0.12854
[560]	validation_0-rmse:0.18800	validation_0-myloss:0.12845
[561]	validation_0-rmse:0.18781	validation_0-myloss:0.12835
[562]	validation_0-rmse:0.18765	validation_0-myloss:0.12828
[563]	validation_0-rmse:0.18750	validation_0-myloss:0.12821
[564]	validation_0-rmse:0.18736	validation_0-myloss:0.12814
[565]	validation_0-rmse:0.18720	validati

[686]	validation_0-rmse:0.17824	validation_0-myloss:0.12453
[687]	validation_0-rmse:0.17820	validation_0-myloss:0.12452
[688]	validation_0-rmse:0.17815	validation_0-myloss:0.12450
[689]	validation_0-rmse:0.17811	validation_0-myloss:0.12448
[690]	validation_0-rmse:0.17806	validation_0-myloss:0.12446
[691]	validation_0-rmse:0.17801	validation_0-myloss:0.12444
[692]	validation_0-rmse:0.17796	validation_0-myloss:0.12441
[693]	validation_0-rmse:0.17793	validation_0-myloss:0.12440
[694]	validation_0-rmse:0.17791	validation_0-myloss:0.12439
[695]	validation_0-rmse:0.17787	validation_0-myloss:0.12438
[696]	validation_0-rmse:0.17784	validation_0-myloss:0.12436
[697]	validation_0-rmse:0.17780	validation_0-myloss:0.12435
[698]	validation_0-rmse:0.17776	validation_0-myloss:0.12433
[699]	validation_0-rmse:0.17772	validation_0-myloss:0.12431
[700]	validation_0-rmse:0.17770	validation_0-myloss:0.12431
[701]	validation_0-rmse:0.17768	validation_0-myloss:0.12430
[702]	validation_0-rmse:0.17763	validati

[823]	validation_0-rmse:0.17448	validation_0-myloss:0.12287
[824]	validation_0-rmse:0.17447	validation_0-myloss:0.12286
[825]	validation_0-rmse:0.17445	validation_0-myloss:0.12286
[826]	validation_0-rmse:0.17444	validation_0-myloss:0.12286
[827]	validation_0-rmse:0.17442	validation_0-myloss:0.12284
[828]	validation_0-rmse:0.17440	validation_0-myloss:0.12283
[829]	validation_0-rmse:0.17438	validation_0-myloss:0.12282
[830]	validation_0-rmse:0.17437	validation_0-myloss:0.12281
[831]	validation_0-rmse:0.17434	validation_0-myloss:0.12280
[832]	validation_0-rmse:0.17432	validation_0-myloss:0.12280
[833]	validation_0-rmse:0.17430	validation_0-myloss:0.12278
[834]	validation_0-rmse:0.17428	validation_0-myloss:0.12277
[835]	validation_0-rmse:0.17427	validation_0-myloss:0.12276
[836]	validation_0-rmse:0.17426	validation_0-myloss:0.12276
[837]	validation_0-rmse:0.17423	validation_0-myloss:0.12274
[838]	validation_0-rmse:0.17421	validation_0-myloss:0.12273
[839]	validation_0-rmse:0.17421	validati

[960]	validation_0-rmse:0.17205	validation_0-myloss:0.12125
[961]	validation_0-rmse:0.17203	validation_0-myloss:0.12123
[962]	validation_0-rmse:0.17201	validation_0-myloss:0.12121
[963]	validation_0-rmse:0.17199	validation_0-myloss:0.12120
[964]	validation_0-rmse:0.17198	validation_0-myloss:0.12119
[965]	validation_0-rmse:0.17196	validation_0-myloss:0.12117
[966]	validation_0-rmse:0.17195	validation_0-myloss:0.12117
[967]	validation_0-rmse:0.17195	validation_0-myloss:0.12117
[968]	validation_0-rmse:0.17193	validation_0-myloss:0.12115
[969]	validation_0-rmse:0.17192	validation_0-myloss:0.12114
[970]	validation_0-rmse:0.17191	validation_0-myloss:0.12114
[971]	validation_0-rmse:0.17190	validation_0-myloss:0.12112
[972]	validation_0-rmse:0.17188	validation_0-myloss:0.12111
[973]	validation_0-rmse:0.17186	validation_0-myloss:0.12109
[974]	validation_0-rmse:0.17185	validation_0-myloss:0.12109
[975]	validation_0-rmse:0.17184	validation_0-myloss:0.12108
[976]	validation_0-rmse:0.17182	validati

[1095]	validation_0-rmse:0.17024	validation_0-myloss:0.11982
[1096]	validation_0-rmse:0.17022	validation_0-myloss:0.11980
[1097]	validation_0-rmse:0.17023	validation_0-myloss:0.11981
[1098]	validation_0-rmse:0.17022	validation_0-myloss:0.11979
[1099]	validation_0-rmse:0.17021	validation_0-myloss:0.11979
[1100]	validation_0-rmse:0.17019	validation_0-myloss:0.11978
[1101]	validation_0-rmse:0.17018	validation_0-myloss:0.11977
[1102]	validation_0-rmse:0.17017	validation_0-myloss:0.11976
[1103]	validation_0-rmse:0.17014	validation_0-myloss:0.11975
[1104]	validation_0-rmse:0.17012	validation_0-myloss:0.11973
[1105]	validation_0-rmse:0.17011	validation_0-myloss:0.11972
[1106]	validation_0-rmse:0.17009	validation_0-myloss:0.11971
[1107]	validation_0-rmse:0.17007	validation_0-myloss:0.11968
[1108]	validation_0-rmse:0.17006	validation_0-myloss:0.11967
[1109]	validation_0-rmse:0.17004	validation_0-myloss:0.11966
[1110]	validation_0-rmse:0.17003	validation_0-myloss:0.11965
[1111]	validation_0-rmse

[1230]	validation_0-rmse:0.16886	validation_0-myloss:0.11872
[1231]	validation_0-rmse:0.16884	validation_0-myloss:0.11871
[1232]	validation_0-rmse:0.16883	validation_0-myloss:0.11871
[1233]	validation_0-rmse:0.16882	validation_0-myloss:0.11869
[1234]	validation_0-rmse:0.16881	validation_0-myloss:0.11868
[1235]	validation_0-rmse:0.16880	validation_0-myloss:0.11868
[1236]	validation_0-rmse:0.16879	validation_0-myloss:0.11867
[1237]	validation_0-rmse:0.16878	validation_0-myloss:0.11867
[1238]	validation_0-rmse:0.16877	validation_0-myloss:0.11866
[1239]	validation_0-rmse:0.16876	validation_0-myloss:0.11866
[1240]	validation_0-rmse:0.16874	validation_0-myloss:0.11865
[1241]	validation_0-rmse:0.16874	validation_0-myloss:0.11864
[1242]	validation_0-rmse:0.16873	validation_0-myloss:0.11863
[1243]	validation_0-rmse:0.16872	validation_0-myloss:0.11863
[1244]	validation_0-rmse:0.16870	validation_0-myloss:0.11861
[1245]	validation_0-rmse:0.16869	validation_0-myloss:0.11859
[1246]	validation_0-rmse

[1365]	validation_0-rmse:0.16791	validation_0-myloss:0.11789
[1366]	validation_0-rmse:0.16790	validation_0-myloss:0.11788
[1367]	validation_0-rmse:0.16789	validation_0-myloss:0.11787
[1368]	validation_0-rmse:0.16789	validation_0-myloss:0.11787
[1369]	validation_0-rmse:0.16788	validation_0-myloss:0.11787
[1370]	validation_0-rmse:0.16788	validation_0-myloss:0.11788
[1371]	validation_0-rmse:0.16787	validation_0-myloss:0.11787
[1372]	validation_0-rmse:0.16786	validation_0-myloss:0.11786
[1373]	validation_0-rmse:0.16786	validation_0-myloss:0.11786
[1374]	validation_0-rmse:0.16786	validation_0-myloss:0.11786
[1375]	validation_0-rmse:0.16785	validation_0-myloss:0.11785
[1376]	validation_0-rmse:0.16785	validation_0-myloss:0.11785
[1377]	validation_0-rmse:0.16783	validation_0-myloss:0.11784
[1378]	validation_0-rmse:0.16782	validation_0-myloss:0.11783
[1379]	validation_0-rmse:0.16781	validation_0-myloss:0.11782
[1380]	validation_0-rmse:0.16781	validation_0-myloss:0.11782
[1381]	validation_0-rmse

[1500]	validation_0-rmse:0.16714	validation_0-myloss:0.11718
[1501]	validation_0-rmse:0.16714	validation_0-myloss:0.11717
[1502]	validation_0-rmse:0.16713	validation_0-myloss:0.11717
[1503]	validation_0-rmse:0.16713	validation_0-myloss:0.11717
[1504]	validation_0-rmse:0.16712	validation_0-myloss:0.11717
[1505]	validation_0-rmse:0.16711	validation_0-myloss:0.11716
[1506]	validation_0-rmse:0.16710	validation_0-myloss:0.11715
[1507]	validation_0-rmse:0.16709	validation_0-myloss:0.11714
[1508]	validation_0-rmse:0.16708	validation_0-myloss:0.11714
[1509]	validation_0-rmse:0.16708	validation_0-myloss:0.11713
[1510]	validation_0-rmse:0.16707	validation_0-myloss:0.11713
[1511]	validation_0-rmse:0.16707	validation_0-myloss:0.11712
[1512]	validation_0-rmse:0.16706	validation_0-myloss:0.11711
[1513]	validation_0-rmse:0.16706	validation_0-myloss:0.11711
[1514]	validation_0-rmse:0.16704	validation_0-myloss:0.11709
[1515]	validation_0-rmse:0.16704	validation_0-myloss:0.11709
[1516]	validation_0-rmse

[1635]	validation_0-rmse:0.16650	validation_0-myloss:0.11659
[1636]	validation_0-rmse:0.16649	validation_0-myloss:0.11658
[1637]	validation_0-rmse:0.16649	validation_0-myloss:0.11658
[1638]	validation_0-rmse:0.16649	validation_0-myloss:0.11657
[1639]	validation_0-rmse:0.16648	validation_0-myloss:0.11657
[1640]	validation_0-rmse:0.16647	validation_0-myloss:0.11657
[1641]	validation_0-rmse:0.16646	validation_0-myloss:0.11657
[1642]	validation_0-rmse:0.16647	validation_0-myloss:0.11657
[1643]	validation_0-rmse:0.16646	validation_0-myloss:0.11656
[1644]	validation_0-rmse:0.16645	validation_0-myloss:0.11655
[1645]	validation_0-rmse:0.16645	validation_0-myloss:0.11655
[1646]	validation_0-rmse:0.16644	validation_0-myloss:0.11654
[1647]	validation_0-rmse:0.16645	validation_0-myloss:0.11655
[1648]	validation_0-rmse:0.16645	validation_0-myloss:0.11655
[1649]	validation_0-rmse:0.16643	validation_0-myloss:0.11654
[1650]	validation_0-rmse:0.16643	validation_0-myloss:0.11654
[1651]	validation_0-rmse

[1770]	validation_0-rmse:0.16589	validation_0-myloss:0.11605
[1771]	validation_0-rmse:0.16588	validation_0-myloss:0.11605
[1772]	validation_0-rmse:0.16588	validation_0-myloss:0.11604
[1773]	validation_0-rmse:0.16588	validation_0-myloss:0.11604
[1774]	validation_0-rmse:0.16587	validation_0-myloss:0.11604
[1775]	validation_0-rmse:0.16587	validation_0-myloss:0.11603
[1776]	validation_0-rmse:0.16586	validation_0-myloss:0.11603
[1777]	validation_0-rmse:0.16586	validation_0-myloss:0.11603
[1778]	validation_0-rmse:0.16586	validation_0-myloss:0.11603
[1779]	validation_0-rmse:0.16585	validation_0-myloss:0.11603
[1780]	validation_0-rmse:0.16584	validation_0-myloss:0.11602
[1781]	validation_0-rmse:0.16584	validation_0-myloss:0.11602
[1782]	validation_0-rmse:0.16584	validation_0-myloss:0.11602
[1783]	validation_0-rmse:0.16583	validation_0-myloss:0.11601
[1784]	validation_0-rmse:0.16583	validation_0-myloss:0.11602
[1785]	validation_0-rmse:0.16583	validation_0-myloss:0.11601
[1786]	validation_0-rmse

[1905]	validation_0-rmse:0.16544	validation_0-myloss:0.11562
[1906]	validation_0-rmse:0.16543	validation_0-myloss:0.11562
[1907]	validation_0-rmse:0.16543	validation_0-myloss:0.11562
[1908]	validation_0-rmse:0.16543	validation_0-myloss:0.11562
[1909]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1910]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1911]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1912]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1913]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1914]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1915]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1916]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1917]	validation_0-rmse:0.16542	validation_0-myloss:0.11562
[1918]	validation_0-rmse:0.16541	validation_0-myloss:0.11562
[1919]	validation_0-rmse:0.16541	validation_0-myloss:0.11562
[1920]	validation_0-rmse:0.16540	validation_0-myloss:0.11561
[1921]	validation_0-rmse

[2040]	validation_0-rmse:0.16506	validation_0-myloss:0.11524
[2041]	validation_0-rmse:0.16506	validation_0-myloss:0.11523
[2042]	validation_0-rmse:0.16506	validation_0-myloss:0.11524
[2043]	validation_0-rmse:0.16505	validation_0-myloss:0.11524
[2044]	validation_0-rmse:0.16505	validation_0-myloss:0.11524
[2045]	validation_0-rmse:0.16506	validation_0-myloss:0.11524
[2046]	validation_0-rmse:0.16506	validation_0-myloss:0.11524
[2047]	validation_0-rmse:0.16506	validation_0-myloss:0.11524
[2048]	validation_0-rmse:0.16506	validation_0-myloss:0.11524
[2049]	validation_0-rmse:0.16505	validation_0-myloss:0.11523
[2050]	validation_0-rmse:0.16505	validation_0-myloss:0.11523
[2051]	validation_0-rmse:0.16505	validation_0-myloss:0.11522
[2052]	validation_0-rmse:0.16505	validation_0-myloss:0.11522
[2053]	validation_0-rmse:0.16505	validation_0-myloss:0.11522
[2054]	validation_0-rmse:0.16504	validation_0-myloss:0.11521
[2055]	validation_0-rmse:0.16504	validation_0-myloss:0.11521
[2056]	validation_0-rmse

[2175]	validation_0-rmse:0.16472	validation_0-myloss:0.11488
[2176]	validation_0-rmse:0.16471	validation_0-myloss:0.11488
[2177]	validation_0-rmse:0.16471	validation_0-myloss:0.11487
[2178]	validation_0-rmse:0.16470	validation_0-myloss:0.11486
[2179]	validation_0-rmse:0.16469	validation_0-myloss:0.11486
[2180]	validation_0-rmse:0.16469	validation_0-myloss:0.11485
[2181]	validation_0-rmse:0.16469	validation_0-myloss:0.11485
[2182]	validation_0-rmse:0.16468	validation_0-myloss:0.11485
[2183]	validation_0-rmse:0.16468	validation_0-myloss:0.11485
[2184]	validation_0-rmse:0.16468	validation_0-myloss:0.11485
[2185]	validation_0-rmse:0.16468	validation_0-myloss:0.11485
[2186]	validation_0-rmse:0.16468	validation_0-myloss:0.11485
[2187]	validation_0-rmse:0.16468	validation_0-myloss:0.11485
[2188]	validation_0-rmse:0.16467	validation_0-myloss:0.11484
[2189]	validation_0-rmse:0.16467	validation_0-myloss:0.11484
[2190]	validation_0-rmse:0.16466	validation_0-myloss:0.11483
[2191]	validation_0-rmse

[2310]	validation_0-rmse:0.16435	validation_0-myloss:0.11453
[2311]	validation_0-rmse:0.16435	validation_0-myloss:0.11453
[2312]	validation_0-rmse:0.16435	validation_0-myloss:0.11452
[2313]	validation_0-rmse:0.16435	validation_0-myloss:0.11452
[2314]	validation_0-rmse:0.16434	validation_0-myloss:0.11452
[2315]	validation_0-rmse:0.16434	validation_0-myloss:0.11452
[2316]	validation_0-rmse:0.16434	validation_0-myloss:0.11451
[2317]	validation_0-rmse:0.16434	validation_0-myloss:0.11451
[2318]	validation_0-rmse:0.16434	validation_0-myloss:0.11451
[2319]	validation_0-rmse:0.16434	validation_0-myloss:0.11451
[2320]	validation_0-rmse:0.16434	validation_0-myloss:0.11451
[2321]	validation_0-rmse:0.16433	validation_0-myloss:0.11450
[2322]	validation_0-rmse:0.16432	validation_0-myloss:0.11450
[2323]	validation_0-rmse:0.16433	validation_0-myloss:0.11450
[2324]	validation_0-rmse:0.16433	validation_0-myloss:0.11450
[2325]	validation_0-rmse:0.16432	validation_0-myloss:0.11450
[2326]	validation_0-rmse

[2445]	validation_0-rmse:0.16413	validation_0-myloss:0.11429
[2446]	validation_0-rmse:0.16413	validation_0-myloss:0.11429
[2447]	validation_0-rmse:0.16413	validation_0-myloss:0.11428
[2448]	validation_0-rmse:0.16413	validation_0-myloss:0.11428
[2449]	validation_0-rmse:0.16412	validation_0-myloss:0.11428
[2450]	validation_0-rmse:0.16412	validation_0-myloss:0.11428
[2451]	validation_0-rmse:0.16412	validation_0-myloss:0.11428
[2452]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2453]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2454]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2455]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2456]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2457]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2458]	validation_0-rmse:0.16412	validation_0-myloss:0.11427
[2459]	validation_0-rmse:0.16412	validation_0-myloss:0.11428
[2460]	validation_0-rmse:0.16412	validation_0-myloss:0.11428
[2461]	validation_0-rmse

[2580]	validation_0-rmse:0.16401	validation_0-myloss:0.11410
[2581]	validation_0-rmse:0.16401	validation_0-myloss:0.11410
[2582]	validation_0-rmse:0.16401	validation_0-myloss:0.11410
[2583]	validation_0-rmse:0.16400	validation_0-myloss:0.11409
[2584]	validation_0-rmse:0.16400	validation_0-myloss:0.11409
[2585]	validation_0-rmse:0.16400	validation_0-myloss:0.11409
[2586]	validation_0-rmse:0.16400	validation_0-myloss:0.11408
[2587]	validation_0-rmse:0.16400	validation_0-myloss:0.11408
[2588]	validation_0-rmse:0.16399	validation_0-myloss:0.11408
[2589]	validation_0-rmse:0.16399	validation_0-myloss:0.11407
[2590]	validation_0-rmse:0.16399	validation_0-myloss:0.11407
[2591]	validation_0-rmse:0.16398	validation_0-myloss:0.11407
[2592]	validation_0-rmse:0.16398	validation_0-myloss:0.11407
[2593]	validation_0-rmse:0.16398	validation_0-myloss:0.11406
[2594]	validation_0-rmse:0.16398	validation_0-myloss:0.11406
[2595]	validation_0-rmse:0.16397	validation_0-myloss:0.11406
[2596]	validation_0-rmse

[2715]	validation_0-rmse:0.16384	validation_0-myloss:0.11388
[2716]	validation_0-rmse:0.16384	validation_0-myloss:0.11388
[2717]	validation_0-rmse:0.16384	validation_0-myloss:0.11388
[2718]	validation_0-rmse:0.16384	validation_0-myloss:0.11388
[2719]	validation_0-rmse:0.16383	validation_0-myloss:0.11387
[2720]	validation_0-rmse:0.16383	validation_0-myloss:0.11388
[2721]	validation_0-rmse:0.16383	validation_0-myloss:0.11387
[2722]	validation_0-rmse:0.16383	validation_0-myloss:0.11387
[2723]	validation_0-rmse:0.16383	validation_0-myloss:0.11387
[2724]	validation_0-rmse:0.16383	validation_0-myloss:0.11387
[2725]	validation_0-rmse:0.16383	validation_0-myloss:0.11387
[2726]	validation_0-rmse:0.16382	validation_0-myloss:0.11386
[2727]	validation_0-rmse:0.16382	validation_0-myloss:0.11386
[2728]	validation_0-rmse:0.16382	validation_0-myloss:0.11386
[2729]	validation_0-rmse:0.16382	validation_0-myloss:0.11386
[2730]	validation_0-rmse:0.16382	validation_0-myloss:0.11386
[2731]	validation_0-rmse

[2850]	validation_0-rmse:0.16368	validation_0-myloss:0.11373
[2851]	validation_0-rmse:0.16368	validation_0-myloss:0.11373
[2852]	validation_0-rmse:0.16367	validation_0-myloss:0.11372
[2853]	validation_0-rmse:0.16367	validation_0-myloss:0.11372
[2854]	validation_0-rmse:0.16368	validation_0-myloss:0.11371
[2855]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2856]	validation_0-rmse:0.16368	validation_0-myloss:0.11371
[2857]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2858]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2859]	validation_0-rmse:0.16368	validation_0-myloss:0.11371
[2860]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2861]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2862]	validation_0-rmse:0.16367	validation_0-myloss:0.11370
[2863]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2864]	validation_0-rmse:0.16367	validation_0-myloss:0.11371
[2865]	validation_0-rmse:0.16367	validation_0-myloss:0.11370
[2866]	validation_0-rmse

[2985]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2986]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2987]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2988]	validation_0-rmse:0.16359	validation_0-myloss:0.11361
[2989]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2990]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2991]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2992]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2993]	validation_0-rmse:0.16359	validation_0-myloss:0.11360
[2994]	validation_0-rmse:0.16358	validation_0-myloss:0.11360
[2995]	validation_0-rmse:0.16358	validation_0-myloss:0.11359
[2996]	validation_0-rmse:0.16357	validation_0-myloss:0.11360
[2997]	validation_0-rmse:0.16357	validation_0-myloss:0.11359
[2998]	validation_0-rmse:0.16356	validation_0-myloss:0.11359
[2999]	validation_0-rmse:0.16356	validation_0-myloss:0.11359
[3000]	validation_0-rmse:0.16356	validation_0-myloss:0.11359
[3001]	validation_0-rmse

[3120]	validation_0-rmse:0.16351	validation_0-myloss:0.11350
[3121]	validation_0-rmse:0.16351	validation_0-myloss:0.11350
[3122]	validation_0-rmse:0.16352	validation_0-myloss:0.11351
[3123]	validation_0-rmse:0.16352	validation_0-myloss:0.11351
[3124]	validation_0-rmse:0.16352	validation_0-myloss:0.11351
[3125]	validation_0-rmse:0.16352	validation_0-myloss:0.11351
[3126]	validation_0-rmse:0.16352	validation_0-myloss:0.11351
[3127]	validation_0-rmse:0.16352	validation_0-myloss:0.11350
[3128]	validation_0-rmse:0.16352	validation_0-myloss:0.11351
[3129]	validation_0-rmse:0.16351	validation_0-myloss:0.11350
[3130]	validation_0-rmse:0.16351	validation_0-myloss:0.11350
[3131]	validation_0-rmse:0.16351	validation_0-myloss:0.11349
[3132]	validation_0-rmse:0.16351	validation_0-myloss:0.11349
[3133]	validation_0-rmse:0.16351	validation_0-myloss:0.11349
[3134]	validation_0-rmse:0.16351	validation_0-myloss:0.11349
[3135]	validation_0-rmse:0.16351	validation_0-myloss:0.11349
[3136]	validation_0-rmse

[3255]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3256]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3257]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3258]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3259]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3260]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3261]	validation_0-rmse:0.16346	validation_0-myloss:0.11338
[3262]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3263]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3264]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3265]	validation_0-rmse:0.16345	validation_0-myloss:0.11338
[3266]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3267]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3268]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3269]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3270]	validation_0-rmse:0.16344	validation_0-myloss:0.11338
[3271]	validation_0-rmse

[3390]	validation_0-rmse:0.16337	validation_0-myloss:0.11329
[3391]	validation_0-rmse:0.16337	validation_0-myloss:0.11329
[3392]	validation_0-rmse:0.16337	validation_0-myloss:0.11329
[3393]	validation_0-rmse:0.16337	validation_0-myloss:0.11329
[3394]	validation_0-rmse:0.16337	validation_0-myloss:0.11329
[3395]	validation_0-rmse:0.16337	validation_0-myloss:0.11330
[3396]	validation_0-rmse:0.16337	validation_0-myloss:0.11330
[3397]	validation_0-rmse:0.16337	validation_0-myloss:0.11330
[3398]	validation_0-rmse:0.16337	validation_0-myloss:0.11330
[3399]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3400]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3401]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3402]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3403]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3404]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3405]	validation_0-rmse:0.16338	validation_0-myloss:0.11330
[3406]	validation_0-rmse

[3525]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3526]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3527]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3528]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3529]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3530]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3531]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3532]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3533]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3534]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3535]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3536]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3537]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3538]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3539]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3540]	validation_0-rmse:0.16336	validation_0-myloss:0.11328
[3541]	validation_0-rmse

[3660]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3661]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3662]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3663]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3664]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3665]	validation_0-rmse:0.16333	validation_0-myloss:0.11324
[3666]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3667]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3668]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3669]	validation_0-rmse:0.16332	validation_0-myloss:0.11324
[3670]	validation_0-rmse:0.16331	validation_0-myloss:0.11323
[3671]	validation_0-rmse:0.16331	validation_0-myloss:0.11323
[3672]	validation_0-rmse:0.16331	validation_0-myloss:0.11323
[3673]	validation_0-rmse:0.16331	validation_0-myloss:0.11323
[3674]	validation_0-rmse:0.16331	validation_0-myloss:0.11323
[3675]	validation_0-rmse:0.16331	validation_0-myloss:0.11323
[3676]	validation_0-rmse

[3795]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3796]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3797]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3798]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3799]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3800]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3801]	validation_0-rmse:0.16329	validation_0-myloss:0.11318
[3802]	validation_0-rmse:0.16329	validation_0-myloss:0.11318
[3803]	validation_0-rmse:0.16329	validation_0-myloss:0.11318
[3804]	validation_0-rmse:0.16329	validation_0-myloss:0.11319
[3805]	validation_0-rmse:0.16329	validation_0-myloss:0.11319
[3806]	validation_0-rmse:0.16328	validation_0-myloss:0.11318
[3807]	validation_0-rmse:0.16329	validation_0-myloss:0.11318
[3808]	validation_0-rmse:0.16329	validation_0-myloss:0.11319
[3809]	validation_0-rmse:0.16329	validation_0-myloss:0.11319
[3810]	validation_0-rmse:0.16329	validation_0-myloss:0.11319
[3811]	validation_0-rmse

In [None]:
importance_Score = bestXGB.get_booster().get_score(importance_type="gain")
sorted_idx = np.argsort(bestXGB.feature_importances_)[::-1]

In [10]:
pipe_final = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', XGBRegressor(colsample_bytree=0.6,
                           gamma = 0,
                           learning_rate=0.01,
                           reg_lambda = 0.01,
                           reg_alpha = 1,
                           max_depth=6,
                           min_child_weight=2,
                           n_estimators=3000,                                                                    
                           eval_metric = myloss,
                           subsample=0.9, seed = 42))
])

target_log = np.log(target)
pipe_final.fit(dat_train_after, target_log)


In [18]:
#joblib.dump(pipe_final, 'XGBooster.pkl')
XGBooster = joblib.load('XGBooster.pkl')

In [11]:
y_pred = pipe_final.predict(dat_test_after)
output = {'PropertyID':dat_test['PropertyID'], 'SaleDollarCnt': np.exp(y_pred).round(3)}
output_dat = pd.DataFrame(output)
#output_dat.to_csv('Prediction_XGB.csv')

In [12]:
output_dat

Unnamed: 0,PropertyID,SaleDollarCnt
0,48735321,2.375323e+06
1,48735471,1.025143e+06
2,49128764,5.301614e+05
3,48897535,4.060026e+05
4,49083957,1.080858e+06
...,...,...
4397,49034257,2.749728e+05
4398,49101515,5.930839e+04
4399,49092758,1.778608e+05
4400,49034232,3.390675e+05


In [13]:
dat_test

Unnamed: 0,PropertyID,TransDate,censusblockgroup,ZoneCodeCounty,Usecode,BedroomCnt,BathroomCnt,FinishedSquareFeet,GarageSquareFeet,LotSizeSquareFeet,...,Latitude,Longitude,BGMedHomeValue,BGMedRent,BGMedYearBuilt,BGPctOwn,BGPctVacant,BGMedIncome,BGPctKids,BGMedAge
0,48735321,10/31/2015,530330001001,SF 9600,9,5.0,4.000000,5540,,25338,...,47725642,-122283771,527700.0,1750.0,1956.0,0.9134,0.1061,113450,0.2524,49.6
1,48735471,11/6/2015,530330001001,SF 9600,9,5.0,3.000000,2470,510.0,26006,...,47726993,-122281969,527700.0,1750.0,1956.0,0.9134,0.1061,113450,0.2524,49.6
2,49128764,10/17/2015,530330001001,SF 7200,9,4.0,2.000000,1680,,8743,...,47731749,-122289304,527700.0,1750.0,1956.0,0.9134,0.1061,113450,0.2524,49.6
3,48897535,11/19/2015,530330001001,SF 7200,9,2.0,1.000000,990,260.0,12219,...,47728810,-122289224,527700.0,1750.0,1956.0,0.9134,0.1061,113450,0.2524,49.6
4,49083957,12/15/2015,530330001001,SF 9600,9,4.0,3.000000,2960,550.0,23568,...,47731170,-122282684,527700.0,1750.0,1956.0,0.9134,0.1061,113450,0.2524,49.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4397,49034257,12/16/2015,530330328003,RA2.5,9,3.0,2.000000,1420,,13590,...,47758782,-121472451,180600.0,507.0,1971.0,0.6477,0.6384,26979,0.1317,48.5
4398,49101515,12/8/2015,530330328003,F,9,2.0,2.560000,730,,11758,...,47713245,-121171762,180600.0,507.0,1971.0,0.6477,0.6384,26979,0.1317,48.5
4399,49092758,10/6/2015,530330328003,RA2.5,9,2.0,1.000000,770,,15141,...,47713748,-121322193,180600.0,507.0,1971.0,0.6477,0.6384,26979,0.1317,48.5
4400,49034232,11/21/2015,530330328003,RA2.5,9,3.0,1.942308,2250,,15600,...,47760904,-121475524,180600.0,507.0,1971.0,0.6477,0.6384,26979,0.1317,48.5


## GradientBoostring

In [210]:
pipe2 = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', GradientBoostingRegressor(learning_rate = 1e-3,
                                        n_estimators = 200,
                                        max_depth = 5,
                                        max_features = None,
                                        min_samples_leaf = 3,
                                        loss = 'squared_error'))
])

param_grid2 = {'model__learning_rate': [0.1],
               'model_subsample:' [0.3, 0.6, 0.8, 1]
               'model__max_depth':[5, 7, 10],
               'model__n_estimators': [300],
               'model__max_features': [0.5, 0.6, 0.8, 1],
               'model__min_samples_leaf':[1, 3, 5]
             }



In [211]:
grid_search = GridSearchCV(pipe2, param_grid2, scoring=myscore, n_jobs=-1)
target_log = np.log(y_train)
grid_search.fit(X_train, target_log)

In [14]:
#joblib.dump(grid_search, 'GradientBoosting2.pkl')
#gs = joblib.load('GradientBoosting2.pkl')

In [None]:
best_GBoosting = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', GradientBoostingRegressor(learning_rate = 0.01,
                                        subsample=0.8,
                                        n_estimators = 3000,
                                        max_depth = 7,
                                        max_features = 0.8,
                                        min_samples_leaf = 5,
                                        loss = 'squared_error'))
])

best_GBoosting.fit(dat_train_after, np.log(target))
#y_pred = best_GBoosting.predict(X_test)
#mean_absolute_percentage_error(y_test, np.exp(y_pred))

In [242]:
y_pred = best_GBoosting.predict(dat_test_after)

In [266]:
output = {'PropertyID':dat_test['PropertyID'], 'SaleDollarCnt': np.exp(y_pred).round(3)}
output_dat.to_csv('Prediction1_GB.csv')

## ensembled by Superlearn

In [26]:
superlearner = SuperLearner()

XGB = XGBRegressor(colsample_bytree=0.6,
                           gamma = 0,
                           learning_rate=0.01,
                           reg_lambda = 0.01,
                           reg_alpha = 1,
                           max_depth=6,
                           min_child_weight=2,
                           n_estimators=3000,                                                                    
                           eval_metric = myloss,
                           subsample=0.9, seed = 42)

GradB = GradientBoostingRegressor(learning_rate = 0.01,
                                  subsample=0.8,
                                  n_estimators = 3000,
                                  max_depth = 7,
                                  max_features = 0.8,
                                  min_samples_leaf = 5,
                                  loss = 'squared_error')

RandomForest = RandomForestRegressor(max_samples=0.8,
                                    n_estimators = 2000,
                                    max_depth = 7,
                                    min_samples_split = 2,
                                    max_features = 0.8,
                                    min_samples_leaf = 5)

superlearner.add([('XGB', XGB), 
                  ('GradientBooster', GradB),
                  ('Random Forest', RandomForest)], preprocessing=[mypreprocessor])
superlearner.add_meta(XGB)
superlearner.fit(X_train, np.log(y_train))

SuperLearner(array_check=None, backend=None, folds=2,
       layers=[Layer(backend='threading', dtype=<class 'numpy.float32'>, n_jobs=-1,
   name='layer-1', propagate_features=None, raise_on_exception=True,
   random_state=None, shuffle=False,
   stack=[Group(backend='threading', dtype=<class 'numpy.float32'>,
   indexer=FoldIndex(X=None, folds=2, raise_on_ex...rer=None)],
   n_jobs=-1, name='group-7', raise_on_exception=True, transformers=[])],
   verbose=0)],
       model_selection=False, n_jobs=None, raise_on_exception=True,
       random_state=None, sample_size=20, scorer=None, shuffle=False,
       verbose=False)

In [27]:
y_pred = superlearner.predict(X_test)
mean_absolute_percentage_error(y_test, np.exp(y_pred))

0.13811785259455533

In [25]:
pipe_final = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', XGBRegressor(colsample_bytree=0.6,
                           gamma = 0,
                           learning_rate=0.01,
                           reg_lambda = 0.01,
                           reg_alpha = 1,
                           max_depth=6,
                           min_child_weight=2,
                           n_estimators=3000,                                                                    
                           subsample=0.9, seed = 42))
])

pipe_final.fit(X_train, np.log(y_train))
y_pred = pipe_final.predict(X_test)
mean_absolute_percentage_error(y_test, np.exp(y_pred))

0.13138988603300247

In [33]:
pipe = Pipeline([
    ('preprocessor', mypreprocessor),
    ('model', RandomForestRegressor(max_samples=0.9,
                                    n_estimators = 2000,
                                    max_depth = 10,
                                    min_samples_split = 2,
                                    max_features = 0.6,
                                    min_samples_leaf = 5))
])

pipe.fit(X_train, np.log(y_train))
y_pred = pipe.predict(X_test)
mean_absolute_percentage_error(y_test, np.exp(y_pred))

0.14822342410187062

In [49]:
company=["A","B","C"]

data=pd.DataFrame({
    "company":[company[x] for x in np.random.randint(0,len(company),10)],
    "salary":np.random.randint(5,50,10),
    "age":np.random.randint(15,50,10)
}
)

In [38]:
data

Unnamed: 0,company,salary,age
0,C,13,46
1,B,30,48
2,C,31,44
3,C,16,15
4,B,23,46
5,A,42,28
6,B,15,43
7,A,22,28
8,A,48,23
9,A,24,16


In [46]:
avg_salary_dict = data.groupby('company')['salary'].mean().to_dict()
data['avg_salary'] = data['company'].map(avg_salary_dict)

In [67]:
def get_ordest(x):
    df = x.sort_values(by = 'age', ascending=False)
    return df['age'].iloc[0]

data.groupby('company').apply(get_ordest)


company
A    47
B    45
C    47
dtype: int32