In [43]:
#import libraries
import xgboost as xgb
from fbprophet import Prophet
#import libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [44]:
#get the data
data = pd.read_csv("../data/Daily Bike Sharing.csv")
data.head(1)

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,1/1/2011,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985


In [45]:
#select variables
dataset = data.loc[:, ["dteday","cnt", "holiday"]]
dataset.head(1) 

Unnamed: 0,dteday,cnt,holiday
0,1/1/2011,985,0


In [47]:
#Date variable
dataset.dteday = pd.to_datetime(dataset.dteday)
dataset.dteday

0     2011-01-01
1     2011-01-02
2     2011-01-03
3     2011-01-04
4     2011-01-05
         ...    
726   2012-12-27
727   2012-12-28
728   2012-12-29
729   2012-12-30
730   2012-12-31
Name: dteday, Length: 731, dtype: datetime64[ns]

In [48]:
#renaming variable
dataset = dataset.rename(columns = {'cnt' : 'y'})
dataset = dataset.rename(columns = {'dteday' : 'ds'})
dataset.head(1)

Unnamed: 0,ds,y,holiday
0,2011-01-01,985,0


In [49]:
holiday_dates = dataset[dataset.holiday == 1].ds
holidays = pd.DataFrame({'holiday' : 'holi',
                         'ds': pd.to_datetime(holiday_dates),
                         'lower_window': -3,
                         'upper_window': 1})
holidays.head()

Unnamed: 0,holiday,ds,lower_window,upper_window
16,holi,2011-01-17,-3,1
51,holi,2011-02-21,-3,1
104,holi,2011-04-15,-3,1
149,holi,2011-05-30,-3,1
184,holi,2011-07-04,-3,1


In [50]:
#removing holiday column
dataset = dataset.drop(columns = "holiday")
dataset.head(1)

Unnamed: 0,ds,y
0,2011-01-01,985


In [51]:
#Facebook Prophet model
m = Prophet(growth = "linear",
            yearly_seasonality = True,
            weekly_seasonality = True,
            daily_seasonality = False,
            holidays = holidays,
            seasonality_mode = "multiplicative",
            seasonality_prior_scale = 20,
            holidays_prior_scale = 20,
            changepoint_prior_scale = 0.01)
#m.add_regressor('workingday')
#m.add_regressor('weathersit')
#m.add_regressor('temp')
#m.add_regressor('atemp')
#m.add_regressor('hum')
#m.add_regressor('windspeed')
m.fit(dataset)

<fbprophet.forecaster.Prophet at 0x26fec5ec730>

In [52]:
#Create Future Dataframe
future = m.make_future_dataframe(periods = 7,
                                 freq = "D")
future.tail(1)

Unnamed: 0,ds
737,2013-01-07


In [53]:
#forecast
forecast = m.predict(future)
forecast.tail()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,holi,holi_lower,holi_upper,holidays,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
733,2013-01-03,6728.684258,2065.268481,4617.872757,6728.684258,6728.684258,0.0,0.0,0.0,0.0,...,0.038616,0.038616,0.038616,-0.551719,-0.551719,-0.551719,0.0,0.0,0.0,3276.179667
734,2013-01-04,6734.67336,2126.006794,4581.796781,6734.67336,6734.67336,0.0,0.0,0.0,0.0,...,0.036039,0.036039,0.036039,-0.54153,-0.54153,-0.54153,0.0,0.0,0.0,3330.352216
735,2013-01-05,6740.662462,2001.282319,4480.65797,6740.662462,6740.662462,0.0,0.0,0.0,0.0,...,0.009792,0.009792,0.009792,-0.529443,-0.529443,-0.529443,0.0,0.0,0.0,3237.872623
736,2013-01-06,6746.651564,1584.493768,4051.735554,6746.651564,6746.651564,0.0,0.0,0.0,0.0,...,-0.0705,-0.0705,-0.0705,-0.515682,-0.515682,-0.515682,0.0,0.0,0.0,2791.887375
737,2013-01-07,6752.640665,1864.667411,4338.783718,6752.640665,6752.640666,0.0,0.0,0.0,0.0,...,-0.035822,-0.035822,-0.035822,-0.500497,-0.500497,-0.500497,0.0,0.0,0.0,3131.074434


In [59]:
#xgboost prep
prophet_variables = forecast.loc[:, ["trend", "holi", "weekly", "yearly"]]
df_xgb = pd.concat([dataset, prophet_variables], axis = 1)
df_xgb.head()

Unnamed: 0,ds,y,trend,holi,weekly,yearly
0,2011-01-01,985.0,2338.670957,0.0,0.009792,-0.567654
1,2011-01-02,801.0,2344.660066,0.0,-0.0705,-0.563013
2,2011-01-03,1349.0,2350.649174,0.0,-0.035822,-0.556037
3,2011-01-04,1562.0,2356.638282,0.0,0.004841,-0.546875
4,2011-01-05,1600.0,2362.627391,0.0,0.017034,-0.53571


In [56]:
#Training and test set
test_days = 31
training_set = df_xgb.iloc[:-test_days, :]
test_set = df_xgb.iloc[-test_days:, :]
test_set.head(1)

Unnamed: 0,ds,y,trend,holi,weekly,yearly
707,2012-12-08,5582.0,6572.967612,0.0,0.009792,-0.233458


In [None]:
#isolate X and Y
y_train = training_set.y
y_test = test_set.y
X_train = training_set.iloc[:, 2:]
X_test = test_set.iloc[:, 2:]

In [None]:
#create XGBoost Matrices
Train = xgb.DMatrix(data = X_train, label = y_train)
Test = xgb.DMatrix(data = X_test, label = y_test)

In [None]:
#Set the parameters
parameters = {'learning_rate': 0.1,
              'max_depth': 3,
              'colsample_bytree': 1,
              'subsample': 1,
              'min_child_weight': 1,
              'gamma': 1,
              'random_state': 1502,
              'eval_metric': "rmse",
              'objective': "reg:squarederror"}

In [None]:
#XGBoost Model
model = xgb.train(params = parameters,
                  dtrain = Train,
                  num_boost_round = 100,
                  evals = [(Test, "y")],
                  verbose_eval = 15)

In [None]:
#Forecasting
predictions_xgb = pd.Series(model.predict(Test), name = "XGBoost")
predictions_xgb.index = test_set.ds
predictions_xgb[:2]

In [None]:
#set up index
training_set.index = training_set.ds
test_set.index = test_set.ds

In [None]:
#Viz
training_set.y['2012-07-01':].plot(figsize = (9,6), legend = True)
test_set.y.plot(legend = True)
predictions_xgb.plot(legend = True)

In [None]:
predictions_xgb.to_csv('../data/forcasts/predictions_xgb.csv', index = True)