# Using Fbprophet model for time series forecasting


In [60]:
#installing the library
!pip install pystan



In [61]:
#installing the fbprophet library
!pip install fbprophet



In [4]:
#mounting google drive 
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [62]:
#importing library for visualizing
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [63]:
#Importing library for data wrangling
import pandas as pd
df=pd.read_csv('drive/My Drive/analytics/train_E1GspfA.csv')

In [64]:
#creating copy of data
data=df.copy()

In [65]:
#Combine date and hour into one column
data['datetime'] = pd.to_datetime(data.date) + pd.to_timedelta(data.hour , unit= 'h')

data['ds']=data.datetime
data['y']=data.demand

In [66]:
#dropping the columns which are not neccessary in Fbprophet
data.drop(['demand','datetime','date','hour'],axis=1,inplace=True)

In [67]:
#checking the head of data
data.head()

Unnamed: 0,ds,y
0,2018-08-18 09:00:00,91
1,2018-08-18 10:00:00,21
2,2018-08-18 13:00:00,23
3,2018-08-18 14:00:00,104
4,2018-08-18 15:00:00,81


Unnamed: 0,ds,y
0,2018-08-18 09:00:00,91
1,2018-08-18 10:00:00,21
2,2018-08-18 13:00:00,23
3,2018-08-18 14:00:00,104
4,2018-08-18 15:00:00,81


In [68]:
#Checking the tail of data
data.tail()

Unnamed: 0,ds,y
18242,2021-02-28 19:00:00,95
18243,2021-02-28 20:00:00,88
18244,2021-02-28 21:00:00,39
18245,2021-02-28 22:00:00,104
18246,2021-02-28 23:00:00,85


Unnamed: 0,ds,y
18242,2021-02-28 19:00:00,95
18243,2021-02-28 20:00:00,88
18244,2021-02-28 21:00:00,39
18245,2021-02-28 22:00:00,104
18246,2021-02-28 23:00:00,85


In [69]:
#Checking the shape of the data
data.shape

(18247, 2)

(18247, 2)

In [71]:
#Using plotly you can se the demand graph
fig = go.Figure([go.Scatter(x=data['ds'], y=data['y'])])
fig.show()

In [72]:
#Using the data before 2020-11-27
train = data[data['ds']<'2020-11-27']

In [73]:
train

Unnamed: 0,ds,y
0,2018-08-18 09:00:00,91
1,2018-08-18 10:00:00,21
2,2018-08-18 13:00:00,23
3,2018-08-18 14:00:00,104
4,2018-08-18 15:00:00,81
...,...,...
16823,2020-11-26 19:00:00,94
16824,2020-11-26 20:00:00,77
16825,2020-11-26 21:00:00,35
16826,2020-11-26 22:00:00,15


Unnamed: 0,ds,y
0,2018-08-18 09:00:00,91
1,2018-08-18 10:00:00,21
2,2018-08-18 13:00:00,23
3,2018-08-18 14:00:00,104
4,2018-08-18 15:00:00,81
...,...,...
16823,2020-11-26 19:00:00,94
16824,2020-11-26 20:00:00,77
16825,2020-11-26 21:00:00,35
16826,2020-11-26 22:00:00,15


# Model Building

In [75]:
from fbprophet import Prophet

m = Prophet(yearly_seasonality = True, seasonality_prior_scale=0.1,changepoint_range=0.9)
m.fit(train)
future = m.make_future_dataframe(periods=720,freq='h')
forecast = m.predict(future)

In [77]:
#ckecking the predicted Values
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2018-08-18 09:00:00,55.641212,10.903112,104.005459,55.641212,55.641212,0.026922,0.026922,0.026922,-1.499171,...,12.568832,12.568832,12.568832,-11.042738,-11.042738,-11.042738,0.0,0.0,0.0,55.668134
1,2018-08-18 10:00:00,55.644651,15.313354,110.742216,55.644651,55.644651,6.855327,6.855327,6.855327,4.875428,...,12.985932,12.985932,12.985932,-11.006033,-11.006033,-11.006033,0.0,0.0,0.0,62.499978
2,2018-08-18 13:00:00,55.654970,32.019359,126.798009,55.654970,55.654970,25.158455,25.158455,25.158455,22.040230,...,14.014187,14.014187,14.014187,-10.895962,-10.895962,-10.895962,0.0,0.0,0.0,80.813425
3,2018-08-18 14:00:00,55.658410,29.088605,127.083194,55.658410,55.658410,27.684309,27.684309,27.684309,24.269061,...,14.274540,14.274540,14.274540,-10.859291,-10.859291,-10.859291,0.0,0.0,0.0,83.342719
4,2018-08-18 15:00:00,55.661850,37.365685,129.400630,55.661850,55.661850,28.529333,28.529333,28.529333,24.861724,...,14.490243,14.490243,14.490243,-10.822634,-10.822634,-10.822634,0.0,0.0,0.0,84.191183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17543,2020-12-26 19:00:00,68.638942,54.260620,148.984831,68.472761,68.836637,32.369829,32.369829,32.369829,10.105141,...,14.877274,14.877274,14.877274,7.387414,7.387414,7.387414,0.0,0.0,0.0,101.008771
17544,2020-12-26 20:00:00,68.635732,48.712367,144.731014,68.468830,68.834007,24.791781,24.791781,24.791781,2.588877,...,14.850375,14.850375,14.850375,7.352529,7.352529,7.352529,0.0,0.0,0.0,93.427513
17545,2020-12-26 21:00:00,68.632522,43.067295,132.547852,68.464899,68.831378,18.326344,18.326344,18.326344,-3.764353,...,14.773231,14.773231,14.773231,7.317467,7.317467,7.317467,0.0,0.0,0.0,86.958866
17546,2020-12-26 22:00:00,68.629312,36.672885,131.482146,68.460590,68.828749,13.995894,13.995894,13.995894,-7.932351,...,14.646016,14.646016,14.646016,7.282230,7.282230,7.282230,0.0,0.0,0.0,82.625206


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2018-08-18 09:00:00,55.641212,7.905818,103.329818,55.641212,55.641212,0.026922,0.026922,0.026922,-1.499171,...,12.568832,12.568832,12.568832,-11.042738,-11.042738,-11.042738,0.0,0.0,0.0,55.668134
1,2018-08-18 10:00:00,55.644651,11.307895,111.251660,55.644651,55.644651,6.855327,6.855327,6.855327,4.875428,...,12.985932,12.985932,12.985932,-11.006033,-11.006033,-11.006033,0.0,0.0,0.0,62.499978
2,2018-08-18 13:00:00,55.654970,34.825611,130.294975,55.654970,55.654970,25.158455,25.158455,25.158455,22.040230,...,14.014187,14.014187,14.014187,-10.895962,-10.895962,-10.895962,0.0,0.0,0.0,80.813425
3,2018-08-18 14:00:00,55.658410,35.368893,130.450513,55.658410,55.658410,27.684309,27.684309,27.684309,24.269061,...,14.274540,14.274540,14.274540,-10.859291,-10.859291,-10.859291,0.0,0.0,0.0,83.342719
4,2018-08-18 15:00:00,55.661850,38.052813,134.761495,55.661850,55.661850,28.529333,28.529333,28.529333,24.861724,...,14.490243,14.490243,14.490243,-10.822634,-10.822634,-10.822634,0.0,0.0,0.0,84.191183
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17543,2020-12-26 19:00:00,68.638942,55.620723,147.760428,68.461456,68.838713,32.369829,32.369829,32.369829,10.105141,...,14.877274,14.877274,14.877274,7.387414,7.387414,7.387414,0.0,0.0,0.0,101.008771
17544,2020-12-26 20:00:00,68.635732,42.136536,138.599639,68.457539,68.836381,24.791781,24.791781,24.791781,2.588877,...,14.850375,14.850375,14.850375,7.352529,7.352529,7.352529,0.0,0.0,0.0,93.427513
17545,2020-12-26 21:00:00,68.632522,39.351795,133.966033,68.453623,68.833992,18.326344,18.326344,18.326344,-3.764353,...,14.773231,14.773231,14.773231,7.317467,7.317467,7.317467,0.0,0.0,0.0,86.958866
17546,2020-12-26 22:00:00,68.629312,35.444298,131.055774,68.449706,68.831602,13.995894,13.995894,13.995894,-7.932351,...,14.646016,14.646016,14.646016,7.282230,7.282230,7.282230,0.0,0.0,0.0,82.625206


In [79]:
#Using the data before '2020-12-26
last=data[data['ds']>'2020-12-26']

In [80]:
#Missing 1 month data
middle=forecast[forecast['ds']>'2020-11-27']

In [81]:
#Selecting only Two columns
middle=middle[['ds','yhat']]

In [82]:
#Renaming the columns to concatenate
middle.rename(columns={'yhat':'y'},inplace=True)

In [83]:
middle

Unnamed: 0,ds,y
16829,2020-11-27 01:00:00,58.837195
16830,2020-11-27 02:00:00,54.751310
16831,2020-11-27 03:00:00,51.839849
16832,2020-11-27 04:00:00,51.700901
16833,2020-11-27 05:00:00,54.567471
...,...,...
17543,2020-12-26 19:00:00,101.008771
17544,2020-12-26 20:00:00,93.427513
17545,2020-12-26 21:00:00,86.958866
17546,2020-12-26 22:00:00,82.625206


Unnamed: 0,ds,y
16829,2020-11-27 01:00:00,58.837195
16830,2020-11-27 02:00:00,54.751310
16831,2020-11-27 03:00:00,51.839849
16832,2020-11-27 04:00:00,51.700901
16833,2020-11-27 05:00:00,54.567471
...,...,...
17543,2020-12-26 19:00:00,101.008771
17544,2020-12-26 20:00:00,93.427513
17545,2020-12-26 21:00:00,86.958866
17546,2020-12-26 22:00:00,82.625206


In [85]:
#Reading test data
test = pd.read_csv('drive/My Drive/analytics/test_6QvDdzb.csv')

In [86]:
#Checking the tail in test data
test.tail()

Unnamed: 0,date,hour
7645,2022-03-28,19
7646,2022-03-28,20
7647,2022-03-28,21
7648,2022-03-28,22
7649,2022-03-28,23


Unnamed: 0,date,hour
7645,2022-03-28,19
7646,2022-03-28,20
7647,2022-03-28,21
7648,2022-03-28,22
7649,2022-03-28,23


# Preparing the Test data for prediction

In [87]:
#merging the date  and the hour column 
test['datetime'] = pd.to_datetime(test.date) + pd.to_timedelta(test.hour , unit= 'h')

test['ds']=test.datetime

In [88]:
test.drop(['datetime','hour','date'],axis=1,inplace=True)

In [89]:
#Predicting on test data
forecast1 = m.predict(df=test)

In [90]:
forecast1

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-01 00:00:00,63.692074,6.890613,104.267088,62.237591,65.140095,-8.352906,-8.352906,-8.352906,-13.066116,...,0.469758,0.469758,0.469758,4.243453,4.243453,4.243453,0.0,0.0,0.0,55.339168
1,2021-03-01 01:00:00,63.688864,3.647442,96.140533,62.232798,65.137574,-12.559308,-12.559308,-12.559308,-16.711255,...,-0.116760,-0.116760,-0.116760,4.268706,4.268706,4.268706,0.0,0.0,0.0,51.129556
2,2021-03-01 02:00:00,63.685653,-0.372662,93.851596,62.228004,65.135054,-17.334642,-17.334642,-17.334642,-20.948394,...,-0.680039,-0.680039,-0.680039,4.293791,4.293791,4.293791,0.0,0.0,0.0,46.351012
3,2021-03-01 03:00:00,63.682443,-3.527559,90.815417,62.223211,65.132533,-20.930106,-20.930106,-20.930106,-24.030350,...,-1.218462,-1.218462,-1.218462,4.318706,4.318706,4.318706,0.0,0.0,0.0,42.752337
4,2021-03-01 05:00:00,63.676023,-1.087894,92.471608,62.213623,65.127491,-19.552817,-19.552817,-19.552817,-21.705282,...,-2.215556,-2.215556,-2.215556,4.368021,4.368021,4.368021,0.0,0.0,0.0,44.123206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7645,2022-03-28 19:00:00,33.429823,-18.303613,84.381649,13.658385,54.002784,-1.043690,-1.043690,-1.043690,10.105141,...,-6.103402,-6.103402,-6.103402,-5.045428,-5.045428,-5.045428,0.0,0.0,0.0,32.386133
7646,2022-03-28 20:00:00,33.426613,-29.480136,74.718140,13.653116,54.002475,-8.662294,-8.662294,-8.662294,2.588877,...,-6.205976,-6.205976,-6.205976,-5.045195,-5.045195,-5.045195,0.0,0.0,0.0,24.764319
7647,2022-03-28 21:00:00,33.423403,-30.618881,75.497522,13.647848,54.002166,-15.101859,-15.101859,-15.101859,-3.764353,...,-6.292662,-6.292662,-6.292662,-5.044844,-5.044844,-5.044844,0.0,0.0,0.0,18.321544
7648,2022-03-28 22:00:00,33.420192,-38.095977,65.020513,13.642579,54.001857,-19.341871,-19.341871,-19.341871,-7.932351,...,-6.365145,-6.365145,-6.365145,-5.044375,-5.044375,-5.044375,0.0,0.0,0.0,14.078321


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-01 00:00:00,63.692074,7.396225,103.374371,62.213233,65.173446,-8.352906,-8.352906,-8.352906,-13.066116,...,0.469758,0.469758,0.469758,4.243453,4.243453,4.243453,0.0,0.0,0.0,55.339168
1,2021-03-01 01:00:00,63.688864,8.326847,99.106550,62.209202,65.171343,-12.559308,-12.559308,-12.559308,-16.711255,...,-0.116760,-0.116760,-0.116760,4.268706,4.268706,4.268706,0.0,0.0,0.0,51.129556
2,2021-03-01 02:00:00,63.685653,0.230011,92.980865,62.205172,65.169241,-17.334642,-17.334642,-17.334642,-20.948394,...,-0.680039,-0.680039,-0.680039,4.293791,4.293791,4.293791,0.0,0.0,0.0,46.351012
3,2021-03-01 03:00:00,63.682443,-4.349833,89.619637,62.201141,65.167138,-20.930106,-20.930106,-20.930106,-24.030350,...,-1.218462,-1.218462,-1.218462,4.318706,4.318706,4.318706,0.0,0.0,0.0,42.752337
4,2021-03-01 05:00:00,63.676023,-2.771027,93.222137,62.193080,65.162932,-19.552817,-19.552817,-19.552817,-21.705282,...,-2.215556,-2.215556,-2.215556,4.368021,4.368021,4.368021,0.0,0.0,0.0,44.123206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7645,2022-03-28 19:00:00,33.429823,-20.948292,83.779973,15.115815,52.160421,-1.043690,-1.043690,-1.043690,10.105141,...,-6.103402,-6.103402,-6.103402,-5.045428,-5.045428,-5.045428,0.0,0.0,0.0,32.386133
7646,2022-03-28 20:00:00,33.426613,-27.161765,72.371888,15.110194,52.157574,-8.662294,-8.662294,-8.662294,2.588877,...,-6.205976,-6.205976,-6.205976,-5.045195,-5.045195,-5.045195,0.0,0.0,0.0,24.764319
7647,2022-03-28 21:00:00,33.423403,-33.560549,67.052466,15.104572,52.154727,-15.101859,-15.101859,-15.101859,-3.764353,...,-6.292662,-6.292662,-6.292662,-5.044844,-5.044844,-5.044844,0.0,0.0,0.0,18.321544
7648,2022-03-28 22:00:00,33.420192,-39.190150,62.067807,15.098951,52.151880,-19.341871,-19.341871,-19.341871,-7.932351,...,-6.365145,-6.365145,-6.365145,-5.044375,-5.044375,-5.044375,0.0,0.0,0.0,14.078321


In [92]:
test2=pd.read_csv("/content/drive/MyDrive/analytics/test_6QvDdzb.csv")

In [93]:
test2['demand']=forecast1['yhat']

In [94]:
test2

Unnamed: 0,date,hour,demand
0,2021-03-01,0,55.339168
1,2021-03-01,1,51.129556
2,2021-03-01,2,46.351012
3,2021-03-01,3,42.752337
4,2021-03-01,5,44.123206
...,...,...,...
7645,2022-03-28,19,32.386133
7646,2022-03-28,20,24.764319
7647,2022-03-28,21,18.321544
7648,2022-03-28,22,14.078321


Unnamed: 0,date,hour,demand
0,2021-03-01,0,55.339168
1,2021-03-01,1,51.129556
2,2021-03-01,2,46.351012
3,2021-03-01,3,42.752337
4,2021-03-01,5,44.123206
...,...,...,...
7645,2022-03-28,19,32.386133
7646,2022-03-28,20,24.764319
7647,2022-03-28,21,18.321544
7648,2022-03-28,22,14.078321


In [95]:
#submission file
test2.to_csv("resultfb.csv",index=False)

In [None]:
#

In [96]:
#Concatenating the 3 dataset After doing prediction for the missing the data
final_df= pd.concat([train,middle,last])

In [97]:
#Checking the shape of the data
final_df.shape

(18966, 2)

(18966, 2)

In [98]:
#Checking how the dataframe Looks Like
final_df

Unnamed: 0,ds,y
0,2018-08-18 09:00:00,91.0
1,2018-08-18 10:00:00,21.0
2,2018-08-18 13:00:00,23.0
3,2018-08-18 14:00:00,104.0
4,2018-08-18 15:00:00,81.0
...,...,...
18242,2021-02-28 19:00:00,95.0
18243,2021-02-28 20:00:00,88.0
18244,2021-02-28 21:00:00,39.0
18245,2021-02-28 22:00:00,104.0


Unnamed: 0,ds,y
0,2018-08-18 09:00:00,91.0
1,2018-08-18 10:00:00,21.0
2,2018-08-18 13:00:00,23.0
3,2018-08-18 14:00:00,104.0
4,2018-08-18 15:00:00,81.0
...,...,...
18242,2021-02-28 19:00:00,95.0
18243,2021-02-28 20:00:00,88.0
18244,2021-02-28 21:00:00,39.0
18245,2021-02-28 22:00:00,104.0


In [99]:
#Checking the null value 
final_df.isnull().sum()

ds    0
y     0
dtype: int64

ds    0
y     0
dtype: int64

In [101]:
#Observing the Plot After prediction for missing data 
fig = go.Figure([go.Scatter(x=final_df['ds'], y=final_df['y'])])
fig.show()

In [102]:
#training on final dataset
fb = Prophet(yearly_seasonality = True, seasonality_prior_scale=0.1,changepoint_range=0.9)
fb.fit(final_df)



<fbprophet.forecaster.Prophet at 0x7f4320c59f50>

<fbprophet.forecaster.Prophet at 0x7f4320d27e50>

In [103]:
#Prediction on final dataset 
forecast3 = fb.predict(df=test)

In [104]:
forecast3

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-01 00:00:00,72.987685,19.914352,110.328869,72.987685,72.987685,-7.973516,-7.973516,-7.973516,-13.443892,...,0.905358,0.905358,0.905358,4.565018,4.565018,4.565018,0.0,0.0,0.0,65.014169
1,2021-03-01 01:00:00,72.987150,15.858506,108.082273,72.987150,72.987150,-11.703128,-11.703128,-11.703128,-16.602161,...,0.310232,0.310232,0.310232,4.588800,4.588800,4.588800,0.0,0.0,0.0,61.284022
2,2021-03-01 02:00:00,72.986616,10.629554,100.168590,72.986616,72.986616,-16.112821,-16.112821,-16.112821,-20.460450,...,-0.264786,-0.264786,-0.264786,4.612416,4.612416,4.612416,0.0,0.0,0.0,56.873795
3,2021-03-01 03:00:00,72.986082,8.385302,93.836649,72.986082,72.986082,-19.617344,-19.617344,-19.617344,-23.435211,...,-0.817996,-0.817996,-0.817996,4.635863,4.635863,4.635863,0.0,0.0,0.0,53.368738
4,2021-03-01 05:00:00,72.985013,8.535145,102.764548,72.985013,72.985013,-18.687316,-18.687316,-18.687316,-21.516227,...,-1.853338,-1.853338,-1.853338,4.682248,4.682248,4.682248,0.0,0.0,0.0,54.297697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7645,2022-03-28 19:00:00,67.950099,20.365033,117.155238,59.312653,77.236259,-0.412894,-0.412894,-0.412894,10.221112,...,-6.197780,-6.197780,-6.197780,-4.436226,-4.436226,-4.436226,0.0,0.0,0.0,67.537205
7646,2022-03-28 20:00:00,67.949564,12.945318,108.320090,59.311379,77.236913,-8.281785,-8.281785,-8.281785,2.488222,...,-6.334379,-6.334379,-6.334379,-4.435627,-4.435627,-4.435627,0.0,0.0,0.0,59.667780
7647,2022-03-28 21:00:00,67.949030,8.637185,101.359011,59.309852,77.237567,-15.116456,-15.116456,-15.116456,-4.227688,...,-6.453849,-6.453849,-6.453849,-4.434919,-4.434919,-4.434919,0.0,0.0,0.0,52.832574
7648,2022-03-28 22:00:00,67.948495,4.609376,92.139676,59.308211,77.238222,-19.662275,-19.662275,-19.662275,-8.670523,...,-6.557651,-6.557651,-6.557651,-4.434100,-4.434100,-4.434100,0.0,0.0,0.0,48.286220


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-01 00:00:00,72.987685,17.189183,111.846822,72.987685,72.987685,-7.973516,-7.973516,-7.973516,-13.443892,...,0.905358,0.905358,0.905358,4.565018,4.565018,4.565018,0.0,0.0,0.0,65.014169
1,2021-03-01 01:00:00,72.987150,16.422127,107.058728,72.987150,72.987150,-11.703128,-11.703128,-11.703128,-16.602161,...,0.310232,0.310232,0.310232,4.588800,4.588800,4.588800,0.0,0.0,0.0,61.284022
2,2021-03-01 02:00:00,72.986616,10.585165,101.167570,72.986616,72.986616,-16.112821,-16.112821,-16.112821,-20.460450,...,-0.264786,-0.264786,-0.264786,4.612416,4.612416,4.612416,0.0,0.0,0.0,56.873795
3,2021-03-01 03:00:00,72.986082,7.605611,101.881179,72.986082,72.986082,-19.617344,-19.617344,-19.617344,-23.435211,...,-0.817996,-0.817996,-0.817996,4.635863,4.635863,4.635863,0.0,0.0,0.0,53.368738
4,2021-03-01 05:00:00,72.985013,12.278355,102.739047,72.985013,72.985013,-18.687316,-18.687316,-18.687316,-21.516227,...,-1.853338,-1.853338,-1.853338,4.682248,4.682248,4.682248,0.0,0.0,0.0,54.297697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7645,2022-03-28 19:00:00,67.950099,23.677368,114.267423,59.100574,76.569702,-0.412894,-0.412894,-0.412894,10.221112,...,-6.197780,-6.197780,-6.197780,-4.436226,-4.436226,-4.436226,0.0,0.0,0.0,67.537205
7646,2022-03-28 20:00:00,67.949564,11.047121,108.374600,59.098419,76.570705,-8.281785,-8.281785,-8.281785,2.488222,...,-6.334379,-6.334379,-6.334379,-4.435627,-4.435627,-4.435627,0.0,0.0,0.0,59.667780
7647,2022-03-28 21:00:00,67.949030,8.241638,98.659982,59.096264,76.571709,-15.116456,-15.116456,-15.116456,-4.227688,...,-6.453849,-6.453849,-6.453849,-4.434919,-4.434919,-4.434919,0.0,0.0,0.0,52.832574
7648,2022-03-28 22:00:00,67.948495,0.932901,96.304083,59.094109,76.572712,-19.662275,-19.662275,-19.662275,-8.670523,...,-6.557651,-6.557651,-6.557651,-4.434100,-4.434100,-4.434100,0.0,0.0,0.0,48.286220


In [105]:
#assigning  all the predict value in demand column
test2['demand']=forecast3['yhat']

In [106]:
test2

Unnamed: 0,date,hour,demand
0,2021-03-01,0,65.014169
1,2021-03-01,1,61.284022
2,2021-03-01,2,56.873795
3,2021-03-01,3,53.368738
4,2021-03-01,5,54.297697
...,...,...,...
7645,2022-03-28,19,67.537205
7646,2022-03-28,20,59.667780
7647,2022-03-28,21,52.832574
7648,2022-03-28,22,48.286220


Unnamed: 0,date,hour,demand
0,2021-03-01,0,65.014169
1,2021-03-01,1,61.284022
2,2021-03-01,2,56.873795
3,2021-03-01,3,53.368738
4,2021-03-01,5,54.297697
...,...,...,...
7645,2022-03-28,19,67.537205
7646,2022-03-28,20,59.667780
7647,2022-03-28,21,52.832574
7648,2022-03-28,22,48.286220


In [107]:
#Submission Final
test2.to_csv('final.csv',index=False)

# hyperparameter tuning on Fbprophet model


In [108]:
df = final_df.copy()

Dividing the dataset into train and test. The train has data from 2020-11-26 to 2021-02-28. Test Data has 3 months data

In [109]:
end_date = '2020-11-26'
mask1 = (df['ds'] <= end_date)
mask2 = (df['ds'] > end_date)

In [110]:
X_tr = df.loc[mask1]
X_tst = df.loc[mask2]
print("train shape",X_tr.shape)
print("test shape",X_tst.shape)

train shape (16807, 2)
test shape (2159, 2)
train shape (16807, 2)
test shape (2159, 2)


In [111]:
X_tst

Unnamed: 0,ds,y
16807,2020-11-26 01:00:00,21.0
16808,2020-11-26 02:00:00,82.0
16809,2020-11-26 04:00:00,14.0
16810,2020-11-26 06:00:00,39.0
16811,2020-11-26 07:00:00,81.0
...,...,...
18242,2021-02-28 19:00:00,95.0
18243,2021-02-28 20:00:00,88.0
18244,2021-02-28 21:00:00,39.0
18245,2021-02-28 22:00:00,104.0


Unnamed: 0,ds,y
16807,2020-11-26 01:00:00,21.0
16808,2020-11-26 02:00:00,82.0
16809,2020-11-26 04:00:00,14.0
16810,2020-11-26 06:00:00,39.0
16811,2020-11-26 07:00:00,81.0
...,...,...
18242,2021-02-28 19:00:00,95.0
18243,2021-02-28 20:00:00,88.0
18244,2021-02-28 21:00:00,39.0
18245,2021-02-28 22:00:00,104.0


n_changepoints is the number of change happen in the data. Prophet model detects them by its own. By default, its value is 25, which are uniformly placed in the first 80% of the time series. Changing n_changepoints can add value to the model.

changepoint_prior_scale to indicate how flexible the changepoints are allowed to be. In other words, how much can the changepoints fit to the data. If you make it high it will be more flexible, but you can end up overfitting. By default, this parameter is set to 0.05

seasonality_mode There are 2 types model seasonality mode. Additive & multiplicaticative. By default Prophet fits additive seasonalities, meaning the effect of the seasonality is added to the trend to get the forecast. Prophet can model multiplicative seasonality by setting seasonality_mode='multiplicative' in the model.

holiday_prior_scale just like changepoint_prior_scale, holiday_prior_scale is used to smoothning the effect of holidays. By default its value is 10, which provides very little regularization. Reducing this parameter dampens holiday effects

Seasonalities with fourier_order Prophet model, by default finds the seasonalities and adds the default parameters of the seasonality. We can modify the seasonalities effect by adding custom seasonalities as add_seasonality in the model with different fourier order.Yy default Prophet uses a Fourier order of 3 for weekly seasonality and 10 for yearly seasonality.

Now, we will do the hyperparameters tuning using parametergrid. parametergrid will create all the possible parameters combination and will test the model prediction using every combination. I am taking 4 parameters: n_changepoints, changepoint_prior_scale,seasonality_mode, holiday_prior_scale for tuning.

# HyperParameter Tuning using  ParameterGrid

In [112]:
#importing libraries for tuning
import pandas as pd
import random
from sklearn.metrics import mean_absolute_percentage_error

In [113]:
from sklearn.model_selection import ParameterGrid
params_grid = {'seasonality_mode':['additive'],
               'changepoint_prior_scale':[0.1,0.2,0.3,0.4,0.5],
               'n_changepoints' : [100,150,200]}
grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    print(p)
    cnt = cnt+1

print('Total Possible Models',cnt)

{'changepoint_prior_scale': 0.1, 'n_changepoints': 100, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.1, 'n_changepoints': 150, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.1, 'n_changepoints': 200, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.2, 'n_changepoints': 100, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.2, 'n_changepoints': 150, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.2, 'n_changepoints': 200, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.3, 'n_changepoints': 100, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.3, 'n_changepoints': 150, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.3, 'n_changepoints': 200, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.4, 'n_changepoints': 100, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.4, 'n_changepoints': 150, 'seasonality_mode': 'additive'}
{'changepoint_prior_scale': 0.4,

# Prophet Model Tuning

In [114]:
# strt='2020-11-26'
# end='2021-03-01'
# model_parameters = pd.DataFrame(columns = ['MAPE','Parameters'])
# for p in grid:
#     test = pd.DataFrame()
#     print(p)
#     random.seed(0)
#     train_model =Prophet(changepoint_prior_scale = p['changepoint_prior_scale'],
#                          n_changepoints = p['n_changepoints'],
#                          seasonality_mode = p['seasonality_mode'],
#                          weekly_seasonality=True,
#                          daily_seasonality = True,
#                          yearly_seasonality = True,
#                          interval_width=0.95)
#     train_model.fit(X_tr)
#     #train_forecast = train_model.make_future_dataframe(periods=57, freq='D',include_history = False)
#     train_forecast = train_model.predict(X_tst)
#     test=train_forecast[['ds','yhat']]
#     Actual = df[(df['ds']>strt) & (df['ds']<=end)]
#     MAPE = mean_absolute_percentage_error(Actual['y'],abs(test['yhat']))
#     print('Mean Absolute Percentage Error(MAPE)------------------------------------',MAPE)
#     model_parameters = model_parameters.append({'MAPE':MAPE,'Parameters':p},ignore_index=True)

In [116]:
final_model = Prophet(changepoint_prior_scale= 0.2,
                      n_changepoints = 200,
                      seasonality_mode = 'additive',
                      weekly_seasonality=True,
                      daily_seasonality = True,
                      yearly_seasonality = True,
                      interval_width=0.95)
#final_model.add_country_holidays(country_name='US')
final_model.fit(df)

<fbprophet.forecaster.Prophet at 0x7f433a16cdd0>

In [117]:
forecast4 = final_model.predict(df=test)

In [118]:
forecast4.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2021-03-01 00:00:00,73.175953,-7.851168,136.198609,73.175953,73.175953,-6.93332,-6.93332,-6.93332,-13.46903,...,0.923363,0.923363,0.923363,5.612347,5.612347,5.612347,0.0,0.0,0.0,66.242634
1,2021-03-01 01:00:00,73.173835,-3.764582,128.709523,73.173835,73.173835,-10.65518,-10.65518,-10.65518,-16.622533,...,0.327926,0.327926,0.327926,5.639428,5.639428,5.639428,0.0,0.0,0.0,62.518655
2,2021-03-01 02:00:00,73.171717,-12.409202,128.124733,73.171717,73.171717,-15.053016,-15.053016,-15.053016,-20.471856,...,-0.247503,-0.247503,-0.247503,5.666343,5.666343,5.666343,0.0,0.0,0.0,58.1187
3,2021-03-01 03:00:00,73.169598,-19.024823,126.996549,73.169598,73.169598,-18.550997,-18.550997,-18.550997,-23.442871,...,-0.801217,-0.801217,-0.801217,5.693091,5.693091,5.693091,0.0,0.0,0.0,54.618601
4,2021-03-01 05:00:00,73.165361,-11.717936,124.690209,73.165361,73.165584,-17.6326,-17.6326,-17.6326,-21.540862,...,-1.837823,-1.837823,-1.837823,5.746085,5.746085,5.746085,0.0,0.0,0.0,55.532761


In [119]:
forecast4.shape

(7650, 22)