# XGBoost Hyperparameter tuning

Hyperparameter tuning for this Kaggle competition: https://www.kaggle.com/c/bike-sharing-demand
See the main notebook for a full explanation

The notebook is organized into the following sections:
- XGBoost Model hyperparameter tuning


Data Notes: (see main notebook for details)
Notation notes: think of variable 'data' as the raw data table

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import xgboost as xgb
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)




# Data Preparation


In [2]:
data = pd.read_csv("train.csv")
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 12 columns):
datetime      10886 non-null object
season        10886 non-null int64
holiday       10886 non-null int64
workingday    10886 non-null int64
weather       10886 non-null int64
temp          10886 non-null float64
atemp         10886 non-null float64
humidity      10886 non-null int64
windspeed     10886 non-null float64
casual        10886 non-null int64
registered    10886 non-null int64
count         10886 non-null int64
dtypes: float64(3), int64(8), object(1)
memory usage: 1020.6+ KB



### Cleaning up the date format:
- convert to datetime
- change index
- add date col
- add y,m,d,hr cols
- drop datetime col

In [3]:
data['datetime'] = pd.to_datetime(data.datetime)
data.index = data.datetime
data['date'] = data['datetime'].dt.date

data['year'] = data['datetime'].dt.year
data['month'] = data['datetime'].dt.month
data['day'] = data['datetime'].dt.day
data['hour'] = data['datetime'].dt.hour
data = data.drop(["datetime"], axis = 1)

### Change to categorical variables

In [4]:
cat_vars = ["season","holiday","workingday","weather","hour","year","month","day"]
for var in cat_vars:
    data[var] = data[var].astype("category")

from sklearn.model_selection import train_test_split

reg_cat_vars = ['season','weather','hour','year','month','day']
cat_dummies = pd.get_dummies(dataDropped[reg_cat_vars], drop_first = True)
cat_dummies = cat_dummies.drop(['weather_4'], axis=1) #only 1 data point had category 4 weather

dataReg = pd.concat([dataDropped[['temp','humidity','holiday','workingday']], cat_dummies], axis=1)

X_train, X_test, y_train, y_test = train_test_split(dataReg, dataDropped['count'], test_size=0.2, random_state=0)

y_train = np.log1p(y_train)
y_test = np.log1p(y_test)

# XGBoost Model

Define Error Function

In [5]:
def rmsle(y, y_preds): #error function defined in the competition
    y = np.exp(y)
    y_preds = np.exp(y_preds)
    log1 = np.nan_to_num(np.array([np.log(v + 1) for v in y]))
    log2 = np.nan_to_num(np.array([np.log(v + 1) for v in y_preds]))
    calc = (log1 - log2) ** 2
    return np.sqrt(np.mean(calc))

Create training and test set with dummy and one-hot-encoding. XGBoost is very "robust to correlated variables" so no need to remove certain vars.


In [21]:
from sklearn.model_selection import train_test_split

dummy_vars = pd.get_dummies(data[cat_vars], drop_first = True)
dummy_vars = dummy_vars.drop(['weather_4'], axis=1) #only 1 data point had category 4 weather
dummyData = pd.concat([data[['temp','atemp','humidity','windspeed']], dummy_vars], axis=1)

one_hot_vars = pd.get_dummies(data[cat_vars], drop_first = False)
one_hot_vars = one_hot_vars.drop(['weather_4'], axis=1) #only 1 data point had category 4 weather
oneHotData = pd.concat([data[['temp','atemp','humidity','windspeed']], one_hot_vars], axis=1)


dummy_X_train, dummy_X_test, dummy_y_train, dummy_y_test = train_test_split(dummyData, np.log1p(data['count']), 
                                                                            test_size=0.2, random_state=0)
oh_X_train, oh_X_test, oh_y_train, oh_y_test = train_test_split(oneHotData, np.log1p(data['count']), 
                                                                test_size=0.2, random_state=0)

In [7]:
from xgboost.sklearn import XGBRegressor
from sklearn import metrics
from sklearn.grid_search import GridSearchCV

def trainModel(X_train, X_test, y_train, y_test, model, erounds):
    xtrain = X_train
    ytrain = y_train
    xtest = X_test
    ytest = y_test

    model.fit(xtrain, ytrain, eval_set=[(xtrain, ytrain),(xtest,ytest)], early_stopping_rounds=erounds)
    return model


def gridSearch(search_params, model, X_train, y_train):
    search = GridSearchCV(estimator = model, param_grid = search_params, 
                          scoring = 'neg_mean_squared_log_error', n_jobs=4, iid=False, cv=5)
    search.fit(X_train, y_train)
    print(search.grid_scores_, search.best_params_, search.best_score_)


Find optimal params and fit model for dummy encoding

In [8]:
model_init = XGBRegressor(learning_rate = 0.2,
               n_estimators=300,
               max_depth=8,
               min_child_weight=6,
               gamma=0.4,
               subsample=0.8,
               colsample_bytree=0.8,
               objective= 'reg:linear',
               scale_pos_weight=1,
               silent = True,
               seed=1)
m = trainModel(oh_X_train, oh_X_test, oh_y_train, oh_y_test, model_init, 50)

[0]	validation_0-rmse:3.51375	validation_1-rmse:3.53165
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:2.87025	validation_1-rmse:2.88802
[2]	validation_0-rmse:2.32748	validation_1-rmse:2.3438
[3]	validation_0-rmse:1.92058	validation_1-rmse:1.94037
[4]	validation_0-rmse:1.60365	validation_1-rmse:1.62598
[5]	validation_0-rmse:1.35734	validation_1-rmse:1.38339
[6]	validation_0-rmse:1.15507	validation_1-rmse:1.18217
[7]	validation_0-rmse:1.00551	validation_1-rmse:1.03642
[8]	validation_0-rmse:0.852804	validation_1-rmse:0.884688
[9]	validation_0-rmse:0.739732	validation_1-rmse:0.774888
[10]	validation_0-rmse:0.657224	validation_1-rmse:0.694666
[11]	validation_0-rmse:0.6045	validation_1-rmse:0.644469
[12]	validation_0-rmse:0.547918	validation_1-rmse:0.589659
[13]	validation_0-rmse:0.507689	validation_1-rmse:0.550893
[14]	validation_0-rmse:0.47717	validation_1-

[137]	validation_0-rmse:0.219168	validation_1-rmse:0.33813
[138]	validation_0-rmse:0.218862	validation_1-rmse:0.338172
[139]	validation_0-rmse:0.21878	validation_1-rmse:0.338291
[140]	validation_0-rmse:0.21878	validation_1-rmse:0.338291
[141]	validation_0-rmse:0.218591	validation_1-rmse:0.338147
[142]	validation_0-rmse:0.218545	validation_1-rmse:0.338101
[143]	validation_0-rmse:0.218544	validation_1-rmse:0.338099
[144]	validation_0-rmse:0.218281	validation_1-rmse:0.338266
[145]	validation_0-rmse:0.218216	validation_1-rmse:0.338232
[146]	validation_0-rmse:0.218101	validation_1-rmse:0.33818
[147]	validation_0-rmse:0.217849	validation_1-rmse:0.338198
[148]	validation_0-rmse:0.217673	validation_1-rmse:0.338115
[149]	validation_0-rmse:0.217492	validation_1-rmse:0.338136
[150]	validation_0-rmse:0.217262	validation_1-rmse:0.338267
[151]	validation_0-rmse:0.217261	validation_1-rmse:0.338264
[152]	validation_0-rmse:0.217047	validation_1-rmse:0.3381
[153]	validation_0-rmse:0.216941	validation_1-

[275]	validation_0-rmse:0.206753	validation_1-rmse:0.337507
[276]	validation_0-rmse:0.206753	validation_1-rmse:0.337506
[277]	validation_0-rmse:0.206753	validation_1-rmse:0.337504
[278]	validation_0-rmse:0.206553	validation_1-rmse:0.337435
[279]	validation_0-rmse:0.206554	validation_1-rmse:0.337434
[280]	validation_0-rmse:0.206426	validation_1-rmse:0.337469
[281]	validation_0-rmse:0.206426	validation_1-rmse:0.337467
[282]	validation_0-rmse:0.206426	validation_1-rmse:0.337469
[283]	validation_0-rmse:0.206426	validation_1-rmse:0.337468
[284]	validation_0-rmse:0.206349	validation_1-rmse:0.337506
[285]	validation_0-rmse:0.206349	validation_1-rmse:0.337506
[286]	validation_0-rmse:0.206349	validation_1-rmse:0.337508
[287]	validation_0-rmse:0.206349	validation_1-rmse:0.337511
[288]	validation_0-rmse:0.206317	validation_1-rmse:0.337558
[289]	validation_0-rmse:0.206179	validation_1-rmse:0.337523
[290]	validation_0-rmse:0.206179	validation_1-rmse:0.337522
[291]	validation_0-rmse:0.206179	validat

Find optimal params and fit model for one hot encoding (one hot encoding has the best chance for max performance)

In [9]:
s1params = {
    'max_depth':range(3,10,2),
    'min_child_weight':range(2,10,2)
}

gridSearch(s1params, m, oh_X_train, oh_y_train)

([mean: -0.00718, std: 0.00074, params: {'max_depth': 3, 'min_child_weight': 2}, mean: -0.00716, std: 0.00068, params: {'max_depth': 3, 'min_child_weight': 4}, mean: -0.00721, std: 0.00069, params: {'max_depth': 3, 'min_child_weight': 6}, mean: -0.00723, std: 0.00075, params: {'max_depth': 3, 'min_child_weight': 8}, mean: -0.00717, std: 0.00083, params: {'max_depth': 5, 'min_child_weight': 2}, mean: -0.00704, std: 0.00077, params: {'max_depth': 5, 'min_child_weight': 4}, mean: -0.00701, std: 0.00087, params: {'max_depth': 5, 'min_child_weight': 6}, mean: -0.00707, std: 0.00084, params: {'max_depth': 5, 'min_child_weight': 8}, mean: -0.00725, std: 0.00084, params: {'max_depth': 7, 'min_child_weight': 2}, mean: -0.00706, std: 0.00086, params: {'max_depth': 7, 'min_child_weight': 4}, mean: -0.00714, std: 0.00073, params: {'max_depth': 7, 'min_child_weight': 6}, mean: -0.00710, std: 0.00069, params: {'max_depth': 7, 'min_child_weight': 8}, mean: -0.00745, std: 0.00085, params: {'max_depth'

In [10]:
s2params = {
    'max_depth':[4,5,6],
    'min_child_weight':[3,4,5,6,7]
}
gridSearch(s2params, m, oh_X_train, oh_y_train)


([mean: -0.00692, std: 0.00075, params: {'max_depth': 4, 'min_child_weight': 3}, mean: -0.00695, std: 0.00079, params: {'max_depth': 4, 'min_child_weight': 4}, mean: -0.00697, std: 0.00079, params: {'max_depth': 4, 'min_child_weight': 5}, mean: -0.00696, std: 0.00070, params: {'max_depth': 4, 'min_child_weight': 6}, mean: -0.00695, std: 0.00067, params: {'max_depth': 4, 'min_child_weight': 7}, mean: -0.00704, std: 0.00083, params: {'max_depth': 5, 'min_child_weight': 3}, mean: -0.00704, std: 0.00077, params: {'max_depth': 5, 'min_child_weight': 4}, mean: -0.00690, std: 0.00076, params: {'max_depth': 5, 'min_child_weight': 5}, mean: -0.00701, std: 0.00087, params: {'max_depth': 5, 'min_child_weight': 6}, mean: -0.00697, std: 0.00087, params: {'max_depth': 5, 'min_child_weight': 7}, mean: -0.00717, std: 0.00088, params: {'max_depth': 6, 'min_child_weight': 3}, mean: -0.00712, std: 0.00080, params: {'max_depth': 6, 'min_child_weight': 4}, mean: -0.00709, std: 0.00075, params: {'max_depth'

In [11]:
model_2 = XGBRegressor(learning_rate = 0.2,
               n_estimators=300,
               max_depth=5,
               min_child_weight=5,
               gamma=0,
               subsample=0.8,
               colsample_bytree=0.8,
               objective= 'reg:linear',
               scale_pos_weight=1,
               silent = True,
               seed=1)
m2 = trainModel(oh_X_train, oh_X_test, oh_y_train, oh_y_test, model_2, 50)

[0]	validation_0-rmse:3.51918	validation_1-rmse:3.53623
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:2.88464	validation_1-rmse:2.89924
[2]	validation_0-rmse:2.35516	validation_1-rmse:2.3708
[3]	validation_0-rmse:1.95721	validation_1-rmse:1.97694
[4]	validation_0-rmse:1.65121	validation_1-rmse:1.67241
[5]	validation_0-rmse:1.41648	validation_1-rmse:1.43829
[6]	validation_0-rmse:1.23234	validation_1-rmse:1.25743
[7]	validation_0-rmse:1.08956	validation_1-rmse:1.11616
[8]	validation_0-rmse:0.952491	validation_1-rmse:0.979418
[9]	validation_0-rmse:0.849553	validation_1-rmse:0.876323
[10]	validation_0-rmse:0.776416	validation_1-rmse:0.80331
[11]	validation_0-rmse:0.728407	validation_1-rmse:0.757523
[12]	validation_0-rmse:0.679873	validation_1-rmse:0.710039
[13]	validation_0-rmse:0.645505	validation_1-rmse:0.676267
[14]	validation_0-rmse:0.618342	validation_

[137]	validation_0-rmse:0.242622	validation_1-rmse:0.340324
[138]	validation_0-rmse:0.242143	validation_1-rmse:0.339596
[139]	validation_0-rmse:0.241953	validation_1-rmse:0.339374
[140]	validation_0-rmse:0.241495	validation_1-rmse:0.339447
[141]	validation_0-rmse:0.240978	validation_1-rmse:0.33944
[142]	validation_0-rmse:0.240437	validation_1-rmse:0.339509
[143]	validation_0-rmse:0.239657	validation_1-rmse:0.339353
[144]	validation_0-rmse:0.239148	validation_1-rmse:0.339079
[145]	validation_0-rmse:0.238759	validation_1-rmse:0.338748
[146]	validation_0-rmse:0.23851	validation_1-rmse:0.338753
[147]	validation_0-rmse:0.23825	validation_1-rmse:0.338561
[148]	validation_0-rmse:0.237875	validation_1-rmse:0.338131
[149]	validation_0-rmse:0.237561	validation_1-rmse:0.337969
[150]	validation_0-rmse:0.237034	validation_1-rmse:0.33798
[151]	validation_0-rmse:0.236578	validation_1-rmse:0.337763
[152]	validation_0-rmse:0.235738	validation_1-rmse:0.337423
[153]	validation_0-rmse:0.23443	validation_1

In [12]:
s3params = {
    'gamma':[i/10.0 for i in range(0,5)]
}

gridSearch(s3params, m2, oh_X_train, oh_y_train)



([mean: -0.00714, std: 0.00074, params: {'gamma': 0.0}, mean: -0.00700, std: 0.00084, params: {'gamma': 0.1}, mean: -0.00698, std: 0.00075, params: {'gamma': 0.2}, mean: -0.00712, std: 0.00085, params: {'gamma': 0.3}, mean: -0.00690, std: 0.00076, params: {'gamma': 0.4}], {'gamma': 0.4}, -0.006896581693259014)


In [14]:
model_3 = XGBRegressor(learning_rate = 0.2,
               n_estimators=300,
               max_depth=5,
               min_child_weight=5,
               gamma=0.4,
               subsample=0.8,
               colsample_bytree=0.8,
               objective= 'reg:linear',
               scale_pos_weight=1,
               silent = True,
               seed=1)
m3 = trainModel(oh_X_train, oh_X_test, oh_y_train, oh_y_test, model_3, 50)

[0]	validation_0-rmse:3.51917	validation_1-rmse:3.53624
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:2.88467	validation_1-rmse:2.89929
[2]	validation_0-rmse:2.35523	validation_1-rmse:2.37086
[3]	validation_0-rmse:1.9573	validation_1-rmse:1.97702
[4]	validation_0-rmse:1.65132	validation_1-rmse:1.67251
[5]	validation_0-rmse:1.41662	validation_1-rmse:1.43833
[6]	validation_0-rmse:1.23251	validation_1-rmse:1.25742
[7]	validation_0-rmse:1.08972	validation_1-rmse:1.11629
[8]	validation_0-rmse:0.952723	validation_1-rmse:0.979667
[9]	validation_0-rmse:0.849766	validation_1-rmse:0.876564
[10]	validation_0-rmse:0.776625	validation_1-rmse:0.803544
[11]	validation_0-rmse:0.728608	validation_1-rmse:0.757699
[12]	validation_0-rmse:0.680116	validation_1-rmse:0.710416
[13]	validation_0-rmse:0.645799	validation_1-rmse:0.676652
[14]	validation_0-rmse:0.61866	validation_

[137]	validation_0-rmse:0.253865	validation_1-rmse:0.337454
[138]	validation_0-rmse:0.253484	validation_1-rmse:0.337166
[139]	validation_0-rmse:0.253422	validation_1-rmse:0.337186
[140]	validation_0-rmse:0.253029	validation_1-rmse:0.336827
[141]	validation_0-rmse:0.25242	validation_1-rmse:0.33672
[142]	validation_0-rmse:0.252115	validation_1-rmse:0.336491
[143]	validation_0-rmse:0.251638	validation_1-rmse:0.336385
[144]	validation_0-rmse:0.251477	validation_1-rmse:0.336193
[145]	validation_0-rmse:0.251138	validation_1-rmse:0.336012
[146]	validation_0-rmse:0.250938	validation_1-rmse:0.335853
[147]	validation_0-rmse:0.250681	validation_1-rmse:0.335791
[148]	validation_0-rmse:0.250567	validation_1-rmse:0.335638
[149]	validation_0-rmse:0.250407	validation_1-rmse:0.335784
[150]	validation_0-rmse:0.250149	validation_1-rmse:0.335842
[151]	validation_0-rmse:0.250048	validation_1-rmse:0.335817
[152]	validation_0-rmse:0.24957	validation_1-rmse:0.335737
[153]	validation_0-rmse:0.24957	validation_

[274]	validation_0-rmse:0.229793	validation_1-rmse:0.32986
[275]	validation_0-rmse:0.229756	validation_1-rmse:0.329883
[276]	validation_0-rmse:0.229571	validation_1-rmse:0.329825
[277]	validation_0-rmse:0.229478	validation_1-rmse:0.329794
[278]	validation_0-rmse:0.229477	validation_1-rmse:0.329792
[279]	validation_0-rmse:0.229477	validation_1-rmse:0.329792
[280]	validation_0-rmse:0.229118	validation_1-rmse:0.329756
[281]	validation_0-rmse:0.229053	validation_1-rmse:0.329728
[282]	validation_0-rmse:0.229053	validation_1-rmse:0.329727
[283]	validation_0-rmse:0.229053	validation_1-rmse:0.329728
Stopping. Best iteration:
[233]	validation_0-rmse:0.233867	validation_1-rmse:0.329236



In [16]:
s4params = {
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gridSearch(s4params, m3, oh_X_train, oh_y_train)

([mean: -0.00693, std: 0.00086, params: {'subsample': 0.6, 'colsample_bytree': 0.6}, mean: -0.00688, std: 0.00076, params: {'subsample': 0.7, 'colsample_bytree': 0.6}, mean: -0.00704, std: 0.00074, params: {'subsample': 0.8, 'colsample_bytree': 0.6}, mean: -0.00688, std: 0.00077, params: {'subsample': 0.9, 'colsample_bytree': 0.6}, mean: -0.00714, std: 0.00105, params: {'subsample': 0.6, 'colsample_bytree': 0.7}, mean: -0.00710, std: 0.00093, params: {'subsample': 0.7, 'colsample_bytree': 0.7}, mean: -0.00708, std: 0.00086, params: {'subsample': 0.8, 'colsample_bytree': 0.7}, mean: -0.00693, std: 0.00081, params: {'subsample': 0.9, 'colsample_bytree': 0.7}, mean: -0.00716, std: 0.00085, params: {'subsample': 0.6, 'colsample_bytree': 0.8}, mean: -0.00707, std: 0.00088, params: {'subsample': 0.7, 'colsample_bytree': 0.8}, mean: -0.00690, std: 0.00076, params: {'subsample': 0.8, 'colsample_bytree': 0.8}, mean: -0.00702, std: 0.00074, params: {'subsample': 0.9, 'colsample_bytree': 0.8}, me

In [22]:
model_4 = XGBRegressor(learning_rate = 0.2,
               n_estimators=300,
               max_depth=5,
               min_child_weight=5,
               gamma=0.4,
               subsample=0.7,
               colsample_bytree=0.6,
               objective= 'reg:linear',
               scale_pos_weight=1,
               silent = True,
               seed=1)
m4 = trainModel(oh_X_train, oh_X_test, oh_y_train, oh_y_test, model_4, 50)

[0]	validation_0-rmse:3.52667	validation_1-rmse:3.54247
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:2.89046	validation_1-rmse:2.90552
[2]	validation_0-rmse:2.39159	validation_1-rmse:2.40449
[3]	validation_0-rmse:1.99659	validation_1-rmse:2.01022
[4]	validation_0-rmse:1.69981	validation_1-rmse:1.71309
[5]	validation_0-rmse:1.46786	validation_1-rmse:1.48216
[6]	validation_0-rmse:1.28053	validation_1-rmse:1.29619
[7]	validation_0-rmse:1.14946	validation_1-rmse:1.1656
[8]	validation_0-rmse:0.991363	validation_1-rmse:1.00916
[9]	validation_0-rmse:0.874204	validation_1-rmse:0.892656
[10]	validation_0-rmse:0.81468	validation_1-rmse:0.834148
[11]	validation_0-rmse:0.77152	validation_1-rmse:0.793919
[12]	validation_0-rmse:0.701103	validation_1-rmse:0.722284
[13]	validation_0-rmse:0.65286	validation_1-rmse:0.674132
[14]	validation_0-rmse:0.6118	validation_1-rms

[137]	validation_0-rmse:0.268589	validation_1-rmse:0.342087
[138]	validation_0-rmse:0.268219	validation_1-rmse:0.341736
[139]	validation_0-rmse:0.267989	validation_1-rmse:0.341909
[140]	validation_0-rmse:0.267435	validation_1-rmse:0.341773
[141]	validation_0-rmse:0.266296	validation_1-rmse:0.340911
[142]	validation_0-rmse:0.265811	validation_1-rmse:0.340786
[143]	validation_0-rmse:0.265198	validation_1-rmse:0.340658
[144]	validation_0-rmse:0.264969	validation_1-rmse:0.34052
[145]	validation_0-rmse:0.264894	validation_1-rmse:0.34046
[146]	validation_0-rmse:0.264639	validation_1-rmse:0.34032
[147]	validation_0-rmse:0.264563	validation_1-rmse:0.340145
[148]	validation_0-rmse:0.264325	validation_1-rmse:0.340056
[149]	validation_0-rmse:0.26414	validation_1-rmse:0.339847
[150]	validation_0-rmse:0.263801	validation_1-rmse:0.339798
[151]	validation_0-rmse:0.263516	validation_1-rmse:0.339553
[152]	validation_0-rmse:0.26313	validation_1-rmse:0.338955
[153]	validation_0-rmse:0.262741	validation_1

[275]	validation_0-rmse:0.239768	validation_1-rmse:0.333041
[276]	validation_0-rmse:0.239768	validation_1-rmse:0.333041
[277]	validation_0-rmse:0.239676	validation_1-rmse:0.333034
[278]	validation_0-rmse:0.239564	validation_1-rmse:0.332987
[279]	validation_0-rmse:0.239341	validation_1-rmse:0.333001
[280]	validation_0-rmse:0.239247	validation_1-rmse:0.332792
[281]	validation_0-rmse:0.23918	validation_1-rmse:0.332639
[282]	validation_0-rmse:0.23908	validation_1-rmse:0.332571
[283]	validation_0-rmse:0.238892	validation_1-rmse:0.332441
[284]	validation_0-rmse:0.238818	validation_1-rmse:0.332537
[285]	validation_0-rmse:0.238818	validation_1-rmse:0.332537
[286]	validation_0-rmse:0.238732	validation_1-rmse:0.332328
[287]	validation_0-rmse:0.238664	validation_1-rmse:0.332363
[288]	validation_0-rmse:0.238665	validation_1-rmse:0.332364
[289]	validation_0-rmse:0.238589	validation_1-rmse:0.332338
[290]	validation_0-rmse:0.238589	validation_1-rmse:0.332338
[291]	validation_0-rmse:0.238458	validatio

In [23]:
# Make predictions
preds = m4.predict(oh_X_test)
print ("RMSLE Value For Linear Regression: ", rmsle(oh_y_test,preds))

('RMSLE Value For Linear Regression: ', 0.31035530724431504)


In [28]:
oh_X_train.head()

Unnamed: 0_level_0,temp,atemp,humidity,windspeed,season_1,season_2,season_3,season_4,holiday_0,holiday_1,...,day_10,day_11,day_12,day_13,day_14,day_15,day_16,day_17,day_18,day_19
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-09-03 12:00:00,28.7,32.575,61,15.0013,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2012-08-02 17:00:00,33.62,38.635,52,16.9979,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2012-04-12 06:00:00,12.3,14.395,65,16.9979,0,1,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
2011-06-19 19:00:00,29.52,34.09,62,8.9981,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
2012-02-03 04:00:00,10.66,12.88,60,15.0013,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
