# Xgboost

## Importing Required Libraries

In [25]:
pip install xgboost



In [26]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import DatetimeTickFormatter
from bokeh.layouts import row, column
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

## Defining Required Functions for Visualization

In [27]:
def analysis_plot(x, y):
#     x = df['DATE']
#     y = df['MW']

    p = figure(title="Date vs MW", 
               sizing_mode="stretch_width",
               x_axis_type='datetime',
               x_axis_label='Date',
               y_axis_label='MW')

    p.line(x, y, legend_label="Visualisation", line_width=1)
    p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

    output_notebook()
    show(p)

In [28]:
def visualisation_plot(x, y1, y2):
#     x = test_df['DATE']
#     y1 = test_df['MW']
#     y2 = pred

    p = figure(title="Date vs MW", 
               sizing_mode="stretch_width",
               x_axis_type='datetime',
               x_axis_label='Date',
               y_axis_label='MW')

    p.line(x, y1, legend_label="Visualisation", line_width=1)
    p.line(x, y2, legend_label="Visualisation", line_width=1, color='red')
    p.xaxis[0].formatter = DatetimeTickFormatter(months="%b %Y")

    output_notebook()
    show(p)

# Dataset - 3

In [29]:
df = pd.read_excel('/content/3-hour-load-weather-data.xlsx')
df

Unnamed: 0,DATE,max-temp,min-temp,RH-0830,RH-1730,MW
0,2017-01-01 00:00:00,20.3,9.2,100,80,1815.571045
1,2017-01-01 01:00:00,20.3,9.2,100,80,1576.699585
2,2017-01-01 02:00:00,20.3,9.2,100,80,1428.967896
3,2017-01-01 03:00:00,20.3,9.2,100,80,1356.272705
4,2017-01-01 04:00:00,20.3,9.2,100,80,1354.029175
...,...,...,...,...,...,...
26275,2019-12-31 19:00:00,9.4,4.8,91,69,4157.812988
26276,2019-12-31 20:00:00,9.4,4.8,91,69,4008.450439
26277,2019-12-31 21:00:00,9.4,4.8,91,69,3757.650391
26278,2019-12-31 22:00:00,9.4,4.8,91,69,3556.840576


In [30]:
analysis_plot(df['DATE'], df['MW'])

In [31]:
train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

features = ['max-temp', 'min-temp', 'RH-0830', 'RH-1730']
label = 'MW'

X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [32]:
reg = XGBRegressor(n_estimators=500, learning_rate=0.01)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae')

[0]	validation_0-mae:3597.16	validation_1-mae:3739.63
[1]	validation_0-mae:3561.18	validation_1-mae:3704.31
[2]	validation_0-mae:3525.6	validation_1-mae:3669.35
[3]	validation_0-mae:3490.37	validation_1-mae:3634.73
[4]	validation_0-mae:3455.47	validation_1-mae:3600.46
[5]	validation_0-mae:3420.93	validation_1-mae:3566.53
[6]	validation_0-mae:3386.73	validation_1-mae:3532.98
[7]	validation_0-mae:3352.9	validation_1-mae:3499.76
[8]	validation_0-mae:3319.37	validation_1-mae:3466.84
[9]	validation_0-mae:3286.2	validation_1-mae:3434.26
[10]	validation_0-mae:3253.35	validation_1-mae:3401.86
[11]	validation_0-mae:3220.83	validation_1-mae:3369.92
[12]	validation_0-mae:3188.64	validation_1-mae:3338.14
[13]	validation_0-mae:3156.78	validation_1-mae:3306.84
[14]	validation_0-mae:3125.21	validation_1-mae:3275.69
[15]	validation_0-mae:3093.98	validation_1-mae:3244.86
[16]	validation_0-mae:3063.05	validation_1-mae:3214.34
[17]	validation_0-mae:3032.42	validation_1-mae:3184.27
[18]	validation_0-mae:3

XGBRegressor(learning_rate=0.01, n_estimators=500)

**NOTE:** We might see an slight increase in MAE. The XGBRegressor optimizes square loss by default. There isn't support for MAE since gradient boosting (may) non-zero second order derivative.

In [33]:
predictions = reg.predict(X_test)
predictions

array([2728.5364, 2728.5364, 2728.5364, ..., 2733.679 , 2733.679 ,
       2733.679 ], dtype=float32)

In [34]:
from sklearn.metrics import accuracy_score

acc_test = [round(value) for value in y_test]
acc_pred = [round(value) for value in predictions]

accuracy = accuracy_score(acc_test, acc_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 0.05%


In [35]:
visualisation_plot(test['DATE'], test['MW'], predictions)

## SCORE

In [36]:
mse = mean_squared_error(y_test,predictions)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % np.sqrt(mse))

MSE: 491416.96
RMSE: 701.01


In [37]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, predictions)

0.7045171275081742

In [38]:
from sklearn.metrics import r2_score 
r2_score(y_test, predictions)

0.6797409817666576

# Dataset - 4

In [39]:
df = pd.read_excel('/content/4-day-load-weather-data.xlsx')
df.rename({'date': 'DATE'}, axis=1, inplace=True)
df

Unnamed: 0,DATE,max-temp,min-temp,RH-0830,RH-1730,MW
0,2017-01-01,20.3,9.2,100,80,3536.238770
1,2017-01-02,23.2,9.3,100,82,3639.738770
2,2017-01-03,24.3,9.5,100,77,3673.321289
3,2017-01-04,24.0,8.9,97,66,3898.860840
4,2017-01-05,25.2,10.4,97,71,3547.965820
...,...,...,...,...,...,...
1090,2019-12-27,13.4,4.2,86,76,4976.180664
1091,2019-12-28,14.4,2.4,100,83,4708.879395
1092,2019-12-29,13.3,3.1,94,79,4831.750488
1093,2019-12-30,15.8,2.6,100,97,5298.331055


In [40]:
analysis_plot(df['DATE'], df['MW'])

In [41]:
train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

features = ['max-temp', 'min-temp', 'RH-0830', 'RH-1730']
label = 'MW'

X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [42]:
reg = XGBRegressor(n_estimators=500, learning_rate=0.01)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae')

[0]	validation_0-mae:4466.59	validation_1-mae:4695.11
[1]	validation_0-mae:4422.19	validation_1-mae:4650.95
[2]	validation_0-mae:4378.22	validation_1-mae:4607.22
[3]	validation_0-mae:4334.69	validation_1-mae:4563.93
[4]	validation_0-mae:4291.53	validation_1-mae:4521.38
[5]	validation_0-mae:4248.86	validation_1-mae:4478.95
[6]	validation_0-mae:4206.62	validation_1-mae:4436.94
[7]	validation_0-mae:4164.74	validation_1-mae:4395.65
[8]	validation_0-mae:4123.33	validation_1-mae:4354.47
[9]	validation_0-mae:4082.33	validation_1-mae:4313.69
[10]	validation_0-mae:4041.69	validation_1-mae:4273.62
[11]	validation_0-mae:4001.5	validation_1-mae:4233.66
[12]	validation_0-mae:3961.72	validation_1-mae:4194.1
[13]	validation_0-mae:3922.27	validation_1-mae:4155.17
[14]	validation_0-mae:3883.28	validation_1-mae:4116.39
[15]	validation_0-mae:3844.67	validation_1-mae:4078
[16]	validation_0-mae:3806.39	validation_1-mae:4040.23
[17]	validation_0-mae:3768.55	validation_1-mae:4002.59
[18]	validation_0-mae:373

XGBRegressor(learning_rate=0.01, n_estimators=500)

**NOTE:** We might see an slight increase in MAE. The XGBRegressor optimizes square loss by default. There isn't support for MAE since gradient boosting (may) non-zero second order derivative.

In [43]:
predictions = reg.predict(X_test)
predictions

array([3941.3098, 3941.3098, 3941.3098, 3950.7603, 3957.1272, 3779.6062,
       3950.7603, 3950.7603, 3871.6436, 3957.1272, 3957.1272, 3934.9429,
       3807.2786, 3761.0896, 3969.2817, 3957.1272, 3957.1272, 3941.3098,
       3969.2817, 3650.0347, 3483.4214, 3722.6626, 3859.489 , 3775.675 ,
       3775.675 , 3969.2817, 3957.1272, 3969.2817, 3969.2817, 3969.2817,
       3859.489 , 3738.48  , 3843.6716, 3775.675 , 3871.6436, 3759.8582,
       3591.9873, 3560.758 , 3962.9148, 3835.2505, 3957.1272, 3962.9148,
       3722.6626, 3632.8525, 3560.758 , 3722.6626, 3738.48  , 3750.6345,
       3750.6345, 3520.226 , 3560.758 , 3738.48  , 3484.4072, 3718.6492,
       3718.6492, 3670.0476, 3591.9873, 3787.8296, 3969.2817, 3969.2817,
       3613.3008, 3738.48  , 3722.6626, 3632.8525, 3718.6492, 3517.668 ,
       3484.2644, 3480.251 , 3358.5786, 3473.852 , 3480.251 , 3479.408 ,
       3587.9739, 3587.9739, 3516.2126, 3473.852 , 3478.127 , 3480.251 ,
       3344.372 , 3575.0703, 3336.6455, 3301.635 , 

In [44]:
visualisation_plot(test['DATE'], test['MW'], predictions)

## SCORE

In [45]:
mse = mean_squared_error(y_test,predictions)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % np.sqrt(mse))

MSE: 207466.76
RMSE: 455.49


In [46]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, predictions)

0.8830265205233399

In [47]:
from sklearn.metrics import r2_score 
r2_score(y_test, predictions)

0.8258921599899995

# Dataset - 5

In [48]:
df = pd.read_excel('/content/5-hour-load-holiday-data.xlsx')
df

Unnamed: 0,DATE,day,MW
0,2017-01-01 00:00:00,2,1815.571045
1,2017-01-01 01:00:00,2,1576.699585
2,2017-01-01 02:00:00,2,1428.967896
3,2017-01-01 03:00:00,2,1356.272705
4,2017-01-01 04:00:00,2,1354.029175
...,...,...,...
26275,2019-12-31 19:00:00,0,4157.812988
26276,2019-12-31 20:00:00,0,4008.450439
26277,2019-12-31 21:00:00,0,3757.650391
26278,2019-12-31 22:00:00,0,3556.840576


In [49]:
analysis_plot(df['DATE'], df['MW'])

In [50]:
train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

features = ['day']
label = 'MW'

X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [51]:
reg = XGBRegressor(n_estimators=500, learning_rate=0.01)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae')

[0]	validation_0-mae:3597.13	validation_1-mae:3738.94
[1]	validation_0-mae:3561.17	validation_1-mae:3702.95
[2]	validation_0-mae:3525.57	validation_1-mae:3667.3
[3]	validation_0-mae:3490.31	validation_1-mae:3632.03
[4]	validation_0-mae:3455.41	validation_1-mae:3597.1
[5]	validation_0-mae:3420.87	validation_1-mae:3562.52
[6]	validation_0-mae:3386.67	validation_1-mae:3528.29
[7]	validation_0-mae:3352.8	validation_1-mae:3494.4
[8]	validation_0-mae:3319.28	validation_1-mae:3460.86
[9]	validation_0-mae:3286.09	validation_1-mae:3427.65
[10]	validation_0-mae:3253.23	validation_1-mae:3394.76
[11]	validation_0-mae:3220.71	validation_1-mae:3362.2
[12]	validation_0-mae:3188.51	validation_1-mae:3329.96
[13]	validation_0-mae:3156.62	validation_1-mae:3298.07
[14]	validation_0-mae:3125.07	validation_1-mae:3266.48
[15]	validation_0-mae:3093.84	validation_1-mae:3235.21
[16]	validation_0-mae:3062.9	validation_1-mae:3204.25
[17]	validation_0-mae:3032.27	validation_1-mae:3173.6
[18]	validation_0-mae:3001.

XGBRegressor(learning_rate=0.01, n_estimators=500)

**NOTE:** We might see an slight increase in MAE. The XGBRegressor optimizes square loss by default. There isn't support for MAE since gradient boosting (may) non-zero second order derivative.

In [52]:
predictions = reg.predict(X_test)
predictions

array([3284.191, 3284.191, 3284.191, ..., 3720.386, 3720.386, 3720.386],
      dtype=float32)

In [53]:
visualisation_plot(test['DATE'], test['MW'], predictions)

## SCORE

In [54]:
mse = mean_squared_error(y_test,predictions)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % np.sqrt(mse))

MSE: 1543456.82
RMSE: 1242.36


In [55]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, predictions)

0.011374129134344768

In [56]:
from sklearn.metrics import r2_score 
r2_score(y_test, predictions)

-0.005878937600653389

# Dataset - 6

In [57]:
df = pd.read_excel('/content/6-day-load-holiday-data.xlsx')
df.rename({'date': 'DATE'}, axis=1, inplace=True)
df

Unnamed: 0,DATE,day,MW
0,2017-01-01,2,3536.238770
1,2017-01-02,0,3639.738770
2,2017-01-03,0,3673.321289
3,2017-01-04,0,3898.860840
4,2017-01-05,2,3547.965820
...,...,...,...
1090,2019-12-27,0,4976.180664
1091,2019-12-28,1,4708.879395
1092,2019-12-29,1,4831.750488
1093,2019-12-30,0,5298.331055


In [58]:
analysis_plot(df['DATE'], df['MW'])

In [59]:
train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

features = ['day']
label = 'MW'

X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [60]:
reg = XGBRegressor(n_estimators=500, learning_rate=0.01)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae')

[0]	validation_0-mae:4466.39	validation_1-mae:4694.67
[1]	validation_0-mae:4421.79	validation_1-mae:4650.07
[2]	validation_0-mae:4377.63	validation_1-mae:4605.91
[3]	validation_0-mae:4333.92	validation_1-mae:4562.19
[4]	validation_0-mae:4290.64	validation_1-mae:4518.91
[5]	validation_0-mae:4247.79	validation_1-mae:4476.07
[6]	validation_0-mae:4205.37	validation_1-mae:4433.64
[7]	validation_0-mae:4163.37	validation_1-mae:4391.65
[8]	validation_0-mae:4121.8	validation_1-mae:4350.07
[9]	validation_0-mae:4080.63	validation_1-mae:4308.91
[10]	validation_0-mae:4039.88	validation_1-mae:4268.16
[11]	validation_0-mae:3999.54	validation_1-mae:4227.82
[12]	validation_0-mae:3959.65	validation_1-mae:4187.9
[13]	validation_0-mae:3920.16	validation_1-mae:4148.38
[14]	validation_0-mae:3881.06	validation_1-mae:4109.26
[15]	validation_0-mae:3842.35	validation_1-mae:4070.53
[16]	validation_0-mae:3804.02	validation_1-mae:4032.18
[17]	validation_0-mae:3766.03	validation_1-mae:3994.19
[18]	validation_0-mae:

XGBRegressor(learning_rate=0.01, n_estimators=500)

**NOTE:** We might see an slight increase in MAE. The XGBRegressor optimizes square loss by default. There isn't support for MAE since gradient boosting (may) non-zero second order derivative.

In [61]:
predictions = reg.predict(X_test)
predictions

array([4122.5146, 4588.7695, 4588.7695, 4588.7695, 4388.1895, 4388.1895,
       4588.7695, 4588.7695, 4588.7695, 4588.7695, 4588.7695, 4388.1895,
       4122.5146, 4122.5146, 4122.5146, 4588.7695, 4588.7695, 4588.7695,
       4388.1895, 4388.1895, 4588.7695, 4588.7695, 4588.7695, 4588.7695,
       4588.7695, 4122.5146, 4388.1895, 4588.7695, 4588.7695, 4588.7695,
       4588.7695, 4588.7695, 4388.1895, 4388.1895, 4588.7695, 4588.7695,
       4588.7695, 4588.7695, 4588.7695, 4388.1895, 4122.5146, 4588.7695,
       4588.7695, 4588.7695, 4588.7695, 4588.7695, 4388.1895, 4388.1895,
       4588.7695, 4122.5146, 4588.7695, 4588.7695, 4588.7695, 4388.1895,
       4388.1895, 4588.7695, 4588.7695, 4588.7695, 4588.7695, 4122.5146,
       4388.1895, 4388.1895, 4122.5146, 4588.7695, 4588.7695, 4588.7695,
       4588.7695, 4388.1895, 4388.1895, 4588.7695, 4588.7695, 4588.7695,
       4588.7695, 4588.7695, 4388.1895, 4388.1895, 4588.7695, 4122.5146,
       4122.5146, 4122.5146, 4588.7695, 4388.1895, 

In [62]:
visualisation_plot(test['DATE'], test['MW'], predictions)

## SCORE

In [63]:
mse = mean_squared_error(y_test,predictions)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % np.sqrt(mse))

MSE: 1237493.56
RMSE: 1112.43


In [64]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, predictions)

0.016179045437779815

In [65]:
from sklearn.metrics import r2_score 
r2_score(y_test, predictions)

-0.03851495657000292

# Dataset - 7

In [66]:
df = pd.read_excel('/content/7-hour-load-weather-holiday-data.xlsx')
df

Unnamed: 0,DATE,max-temp,min-temp,RH-0830,RH-1730,day,MW
0,2017-01-01 00:00:00,20.3,9.2,100,80,2,1815.571045
1,2017-01-01 01:00:00,20.3,9.2,100,80,2,1576.699585
2,2017-01-01 02:00:00,20.3,9.2,100,80,2,1428.967896
3,2017-01-01 03:00:00,20.3,9.2,100,80,2,1356.272705
4,2017-01-01 04:00:00,20.3,9.2,100,80,2,1354.029175
...,...,...,...,...,...,...,...
26275,2019-12-31 19:00:00,9.4,4.8,91,69,0,4157.812988
26276,2019-12-31 20:00:00,9.4,4.8,91,69,0,4008.450439
26277,2019-12-31 21:00:00,9.4,4.8,91,69,0,3757.650391
26278,2019-12-31 22:00:00,9.4,4.8,91,69,0,3556.840576


In [67]:
analysis_plot(df['DATE'], df['MW'])

In [68]:
train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

features = ['max-temp', 'min-temp', 'RH-0830', 'RH-1730', 'day']
label = 'MW'

X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [69]:
reg = XGBRegressor(n_estimators=1000, learning_rate=0.01)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae')

[0]	validation_0-mae:3597.16	validation_1-mae:3739.63
[1]	validation_0-mae:3561.18	validation_1-mae:3704.31
[2]	validation_0-mae:3525.6	validation_1-mae:3669.35
[3]	validation_0-mae:3490.37	validation_1-mae:3634.7
[4]	validation_0-mae:3455.47	validation_1-mae:3600.43
[5]	validation_0-mae:3420.93	validation_1-mae:3566.48
[6]	validation_0-mae:3386.73	validation_1-mae:3532.93
[7]	validation_0-mae:3352.89	validation_1-mae:3499.7
[8]	validation_0-mae:3319.36	validation_1-mae:3466.76
[9]	validation_0-mae:3286.2	validation_1-mae:3434.18
[10]	validation_0-mae:3253.35	validation_1-mae:3401.74
[11]	validation_0-mae:3220.83	validation_1-mae:3369.77
[12]	validation_0-mae:3188.64	validation_1-mae:3338.01
[13]	validation_0-mae:3156.78	validation_1-mae:3306.68
[14]	validation_0-mae:3125.21	validation_1-mae:3275.54
[15]	validation_0-mae:3093.98	validation_1-mae:3244.7
[16]	validation_0-mae:3063.05	validation_1-mae:3214.18
[17]	validation_0-mae:3032.43	validation_1-mae:3184.07
[18]	validation_0-mae:300

XGBRegressor(learning_rate=0.01, n_estimators=1000)

**NOTE:** We might see an slight increase in MAE. The XGBRegressor optimizes square loss by default. There isn't support for MAE since gradient boosting (may) non-zero second order derivative.

In [70]:
predictions = reg.predict(X_test)
predictions

array([2688.0342, 2688.0342, 2688.0342, ..., 2817.31  , 2817.31  ,
       2817.31  ], dtype=float32)

In [71]:
visualisation_plot(test['DATE'], test['MW'], predictions)

## SCORE

In [72]:
mse = mean_squared_error(y_test,predictions)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % np.sqrt(mse))

MSE: 474123.16
RMSE: 688.57


In [73]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, predictions)

0.7101533352252797

In [74]:
from sklearn.metrics import r2_score 
r2_score(y_test, predictions)

0.6910114387492521

# Dataset - 8

In [75]:
df = pd.read_excel('/content/8-day-load-weather-holiday-data.xlsx')
df.rename({'date': 'DATE'}, axis=1, inplace=True)
df

Unnamed: 0,DATE,max-temp,min-temp,RH-0830,RH-1730,MW,day
0,2017-01-01,20.3,9.2,100,80,3536.238770,2
1,2017-01-02,23.2,9.3,100,82,3639.738770,0
2,2017-01-03,24.3,9.5,100,77,3673.321289,0
3,2017-01-04,24.0,8.9,97,66,3898.860840,0
4,2017-01-05,25.2,10.4,97,71,3547.965820,2
...,...,...,...,...,...,...,...
1090,2019-12-27,13.4,4.2,86,76,4976.180664,0
1091,2019-12-28,14.4,2.4,100,83,4708.879395,1
1092,2019-12-29,13.3,3.1,94,79,4831.750488,1
1093,2019-12-30,15.8,2.6,100,97,5298.331055,0


In [76]:
analysis_plot(df['DATE'], df['MW'])

In [77]:
train = df.loc[ df['DATE'].dt.year < 2019 ]
test = df.loc[ df['DATE'].dt.year >= 2019 ]

features = ['max-temp', 'min-temp', 'RH-0830', 'RH-1730', 'day']
label = 'MW'

X_train, y_train = train[features], train[label]
X_test, y_test = test[features], test[label]

In [78]:
reg = XGBRegressor(n_estimators=1000, learning_rate=0.01)
reg.fit(X_train, 
        y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        eval_metric='mae')

[0]	validation_0-mae:4466.59	validation_1-mae:4695.11
[1]	validation_0-mae:4422.19	validation_1-mae:4650.95
[2]	validation_0-mae:4378.22	validation_1-mae:4607.22
[3]	validation_0-mae:4334.69	validation_1-mae:4563.93
[4]	validation_0-mae:4291.53	validation_1-mae:4521.38
[5]	validation_0-mae:4248.86	validation_1-mae:4478.95
[6]	validation_0-mae:4206.62	validation_1-mae:4436.94
[7]	validation_0-mae:4164.74	validation_1-mae:4395.65
[8]	validation_0-mae:4123.33	validation_1-mae:4354.47
[9]	validation_0-mae:4082.33	validation_1-mae:4313.69
[10]	validation_0-mae:4041.69	validation_1-mae:4273.62
[11]	validation_0-mae:4001.5	validation_1-mae:4233.66
[12]	validation_0-mae:3961.72	validation_1-mae:4194.1
[13]	validation_0-mae:3922.27	validation_1-mae:4155.17
[14]	validation_0-mae:3883.28	validation_1-mae:4116.39
[15]	validation_0-mae:3844.67	validation_1-mae:4078
[16]	validation_0-mae:3806.39	validation_1-mae:4040.23
[17]	validation_0-mae:3768.55	validation_1-mae:4002.59
[18]	validation_0-mae:373

XGBRegressor(learning_rate=0.01, n_estimators=1000)

**NOTE:** We might see an slight increase in MAE. The XGBRegressor optimizes square loss by default. There isn't support for MAE since gradient boosting (may) non-zero second order derivative.

In [79]:
predictions = reg.predict(X_test)
predictions

array([3986.6826, 3978.1902, 4019.3638, 4037.597 , 3969.84  , 3728.8652,
       4028.563 , 4031.8293, 3993.9763, 4033.3738, 4043.7732, 3947.4868,
       3767.2131, 3759.66  , 4044.0747, 4045.8257, 4046.6843, 4022.7656,
       4070.393 , 3620.2441, 3522.722 , 3792.1553, 3963.4475, 3830.272 ,
       3808.8994, 4047.2612, 3979.46  , 4072.5164, 4115.202 , 4084.133 ,
       3933.8386, 3782.096 , 3816.3108, 3746.2612, 4000.641 , 3799.2424,
       3678.1646, 3592.7876, 4094.7788, 3893.0322, 3993.6497, 4098.409 ,
       3768.8716, 3705.0908, 3642.5142, 3757.3513, 3652.277 , 3744.486 ,
       3821.7056, 3440.2854, 3625.879 , 3793.2236, 3570.316 , 3678.9185,
       3676.4902, 3702.1675, 3668.5115, 3873.1816, 4110.0854, 4049.6113,
       3496.483 , 3720.3113, 3657.0305, 3700.4316, 3786.7283, 3584.001 ,
       3587.238 , 3416.3533, 3270.9949, 3519.552 , 3568.1553, 3550.9917,
       3643.3606, 3665.5823, 3467.6985, 3413.6804, 3548.1013, 3386.9802,
       3160.138 , 3427.5356, 3413.885 , 3207.2236, 

In [80]:
visualisation_plot(test['DATE'], test['MW'], predictions)

## SCORE

In [81]:
mse = mean_squared_error(y_test,predictions)
print("MSE: %.2f" % mse)
print("RMSE: %.2f" % np.sqrt(mse))

MSE: 184579.72
RMSE: 429.63


In [82]:
from sklearn.metrics import explained_variance_score
explained_variance_score(y_test, predictions)

0.8886511124894769

In [83]:
from sklearn.metrics import r2_score 
r2_score(y_test, predictions)

0.84509915699735