<a href="https://colab.research.google.com/github/jeremysb1/forecasting/blob/main/fbprophet_parameter_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries and Data

In [2]:
# change directory
%cd /content/drive/MyDrive/Time Series Forecasting Product

/content/drive/MyDrive/Time Series Forecasting Product


In [18]:
# libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from prophet.plot import plot_cross_validation_metric
from sklearn.model_selection import ParameterGrid

In [5]:
# load the data
# YYYY-MM-DD
df = pd.read_csv('nyc_data.csv')
df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.7
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077


In [7]:
# rename variable
df = df.rename(columns = {'Demand': 'y',
                          'Date': 'ds'})
df.head(0)

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature,Marketing


In [8]:
# date variable
df.ds = pd.to_datetime(df.ds,
                       format = "%m/%d/%Y")
df.ds

0      2015-01-01
1      2015-01-02
2      2015-01-03
3      2015-01-04
4      2015-01-05
          ...    
2187   2020-12-27
2188   2020-12-28
2189   2020-12-29
2190   2020-12-30
2191   2020-12-31
Name: ds, Length: 2192, dtype: datetime64[ns]

# Holidays

In [10]:
# Easter
dates = pd.to_datetime(df[df.Easter == 1].ds)
easter = pd.DataFrame({'holiday': 'easter',
                       'ds': dates,
                       'lower_window': -5,
                       'upper_window': 2})

In [11]:
# Thankgiving
dates = pd.to_datetime(df[df.Thanksgiving == 1].ds)
thanksgiving = pd.DataFrame({'holiday': 'thanksgiving',
                             'ds': dates,
                             'lower_window': -3,
                             'upper_window': 6})
thanksgiving

Unnamed: 0,holiday,ds,lower_window,upper_window
329,thanksgiving,2015-11-26,-3,6
693,thanksgiving,2016-11-24,-3,6
1057,thanksgiving,2017-11-23,-3,6
1421,thanksgiving,2018-11-22,-3,6
1792,thanksgiving,2019-11-28,-3,6
2156,thanksgiving,2020-11-26,-3,6


In [12]:
# combining holidays
holidays = pd.concat([easter, thanksgiving])
holidays

Unnamed: 0,holiday,ds,lower_window,upper_window
94,easter,2015-04-05,-5,2
451,easter,2016-03-27,-5,2
836,easter,2017-04-16,-5,2
1186,easter,2018-04-01,-5,2
1571,easter,2019-04-21,-5,2
1928,easter,2020-04-12,-5,2
329,thanksgiving,2015-11-26,-3,6
693,thanksgiving,2016-11-24,-3,6
1057,thanksgiving,2017-11-23,-3,6
1421,thanksgiving,2018-11-22,-3,6


In [13]:
df = df.drop(columns = ["Easter", "Thanksgiving"])
df.head(0)

Unnamed: 0,ds,y,Christmas,Temperature,Marketing


# Facebook Prophet Model

In [16]:
# FB model
m = Prophet(holidays = holidays,
            seasonality_mode='multiplicative',
            seasonality_prior_scale=10,
            holidays_prior_scale=10,
            changepoint_prior_scale=0.05)
m.add_regressor('Christmas')
m.add_regressor('Temperature')
m.add_regressor('Marketing')
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpuuh887ub/8tvsekrc.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpuuh887ub/vgjefdn9.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.9/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=89606', 'data', 'file=/tmp/tmpuuh887ub/8tvsekrc.json', 'init=/tmp/tmpuuh887ub/vgjefdn9.json', 'output', 'file=/tmp/tmpuuh887ub/prophet_model7caxim1y/prophet_model-20230409182208.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
18:22:08 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
18:22:09 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7efc086cb940>

In [17]:
# check how many observations
df.shape[0] - 180

2012

In [19]:
# cross-validation
df_cv = cross_validation(m,
                         horizon = '31 days',
                         period = '16 days',
                         initial = '2012 days',
                         parallel = 'processes')
df_cv.head()

INFO:prophet:Making 10 forecasts with cutoffs between 2020-07-09 00:00:00 and 2020-11-30 00:00:00
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7efc0b8e0130>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpuuh887ub/utq36lec.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpuuh887ub/0uwog2ei.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpuuh887ub/lc3_ovh5.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.9/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33338', 'data', 'file=/tmp/tmpuuh887ub/utq36lec.json', 'init=/tmp/tmpuuh887ub/lc3_ovh5.json', 'output', 'file=/tmp/tmpuuh887ub/prophet_modelns0mn0xf/prophet_model-20230409182736.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
18:27:36 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpuuh887ub/3thz3bnl.json
DEB

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper,y,cutoff
0,2020-07-10,607.801474,542.377246,677.930652,571.069531,2020-07-09
1,2020-07-11,722.734395,655.646571,788.475468,677.106981,2020-07-09
2,2020-07-12,637.595488,573.663781,697.818605,619.572774,2020-07-09
3,2020-07-13,769.92248,702.407878,832.384737,808.932621,2020-07-09
4,2020-07-14,637.229498,572.404439,704.788959,661.791644,2020-07-09
