# Predict restaurant visitor using Facebook Prophet

## 1. Import dataset

> **We download our dataset from Kaggle's competition: https://www.kaggle.com/c/recruit-restaurant-visitor-forecasting**

In [None]:
import pandas as pd
y0 = pd.read_csv('https://drive.google.com/uc?id=17bLj5NmMBrUEUHrdFPaGGIDjPvIgbnZK', compression='zip')
y1 = y0.pivot(index='visit_date', columns='air_store_id')['visitors']
y2 = y1.fillna(0)
y = pd.DataFrame(y2.sum(axis=1))


In [None]:
y.head()

## 2. Transform dataset to the required format

In [None]:
y = y.reset_index(drop=False)
y.columns = ['ds', 'y']
y.dropna()
y.head()

In [None]:
y.info()

## 3. Creating a train-test split

In [None]:
train = y.iloc[:450,:]
test = y.iloc[450:,:]


## 4. Train Prophet model

In [None]:
if 'google.colab' in str(get_ipython()):
    from fbprophet import Prophet
else:
    from prophet import Prophet

import os
# Define a context manager to suppress stdout and stderr.
class suppress_stdout_stderr(object):
    '''
    A context manager for doing a "deep suppression" of stdout and stderr in
    Python, i.e. will suppress all print, even if the print originates in a
    compiled C/Fortran sub-function.
       This will not suppress raised exceptions, since exceptions are printed
    to stderr just before a script exits, and after the context manager has
    exited (at least, I think that is why it lets exceptions through).

    '''
    def __init__(self):
        # Open a pair of null files
        self.null_fds =  [os.open(os.devnull,os.O_RDWR) for x in range(2)]
        # Save the actual stdout (1) and stderr (2) file descriptors.
        self.save_fds = [os.dup(1), os.dup(2)]

    def __enter__(self):
        # Assign the null pointers to stdout and stderr.
        os.dup2(self.null_fds[0],1)
        os.dup2(self.null_fds[1],2)

    def __exit__(self, *_):
        # Re-assign the real stdout/stderr back to (1) and (2)
        os.dup2(self.save_fds[0],1)
        os.dup2(self.save_fds[1],2)
        # Close all file descriptors
        for fd in self.null_fds + self.save_fds:
            os.close(fd)

with suppress_stdout_stderr():
    m = Prophet()
    m.fit(train)


## 5. Run prediction using Prophet model above

In [None]:
#future = m.make_future_dataframe(periods=len(test))
future = y.copy()
future.drop('y', inplace=True, axis=1)
forecast = m.predict(future)


## 6. Calculate R2 score

In [None]:
from sklearn.metrics import r2_score
print(r2_score(list(test['y']), list(forecast.loc[450:,'yhat'] )))


## 7. Plotting the fit of the model

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20, 10)
plt.plot(list(test['y']), 'red')
plt.plot(list(forecast.loc[450:,'yhat']), "cyan")
plt.show()


## 8. Creating a Prophet forecast plot

In [None]:
if 'google.colab' in str(get_ipython()):
    from fbprophet.plot import add_changepoints_to_plot
else: 
    from prophet.plot import add_changepoints_to_plot
#from prophet.plot import plot_plotly, plot_components_plotly

#plot_components_plotly(m, forecast)
fig1 = m.plot(forecast,  figsize=(20, 12))
a = add_changepoints_to_plot(fig1.gca(), m, forecast)

#plt.show()


## 9. Creating a Prophet decomposition plot

In [None]:
fig2 = m.plot_components(forecast, figsize=(20, 12))
plt.show()


## 10. Add montly seasonality to the plot

In [None]:
m2 = Prophet()
m2.add_seasonality(name='monthly', period=30.5, fourier_order=5)

with suppress_stdout_stderr():
    m2.fit(train)

future2 = y.copy()
forecast2 = m2.predict(future)
print(r2_score(list(test['y']), list(forecast2.loc[450:,'yhat'] )))

fig2 = m2.plot_components(forecast2, figsize=(20, 12))
plt.show()


## 11. Prepare holidays data

In [None]:
holidays = pd.read_csv('https://drive.google.com/uc?id=1jw5V4noLicmWk-xu8qFzFCPxV5LaQRVB', compression='zip')
holidays = holidays[holidays['holiday_flg'] == 1]
holidays = holidays[['calendar_date', 'holiday_flg']]
holidays = holidays.drop(['holiday_flg'], axis=1)
holidays['holiday'] = 'holiday'
holidays.columns = ['ds', 'holiday']

## 12. Add holidays to the model

In [None]:
m3 = Prophet(holidays=holidays)
with suppress_stdout_stderr():
    m3.fit(train)
future3 =y.copy()
forecast3 = m3.predict(future)

print(r2_score(list(test['y']), list(forecast3.loc[450:,'yhat'] )))
#plot_components_plotly(m3, forecast3)
fig2 = m3.plot_components(forecast3, figsize=(20, 12))
plt.show()


## 13. Add reservations to the model

In [None]:
X_reservations = pd.read_csv('https://drive.google.com/uc?id=1XG_wEiSnXhoNi-_NBuHpxJSjRXaZeH4n', compression='zip')
X_reservations['visit_date'] = pd.to_datetime(X_reservations['visit_datetime']).dt.date
X_reservations = pd.DataFrame(X_reservations.groupby('visit_date')['reserve_visitors'].sum())
X_reservations = X_reservations.reset_index(drop = False)
train4 = train.copy()
train4['ds'] = pd.to_datetime(train4['ds']).dt.date
train4 = train4.merge(X_reservations, left_on = 'ds', right_on = 'visit_date', how = 'left')[['ds', 'y', 'reserve_visitors']].fillna(0)

## 14. Add reservations to the model

In [None]:
m4 = Prophet(holidays=holidays)
m4.add_regressor('reserve_visitors')
with suppress_stdout_stderr():
    m4.fit(train4)
future4 = m4.make_future_dataframe(periods=len(test))
future4['ds'] = pd.to_datetime(future4['ds']).dt.date

future4 = future4.merge(X_reservations, left_on = 'ds', right_on = 'visit_date', how = 'left')[['ds', 'reserve_visitors']].fillna(0)
forecast4 = m4.predict(future4)
print(r2_score(list(test['y']), list(forecast4.loc[450:,'yhat'] )))

plt.plot(list(test['y']), 'red')
plt.plot(list(forecast4.loc[450:,'yhat']), 'cyan')

fig2 = m4.plot_components(forecast4, figsize=(20, 12))
plt.show()

In [None]:
fig4 = m4.plot(forecast,  figsize=(20, 12))
a = add_changepoints_to_plot(fig4.gca(), m4, forecast4)


## 15. Comparison of R2 scores 


<div>
<img src="https://drive.google.com/uc?id=19YMWQ_8pqOo3QHtf66gGWbtEQ3XUeO0u" width="400" length="200"/>
</div>

## 16. Grid Searching the Prophet

> **List of hyper-parameters that may or may not be tuned by the user:
https://facebook.github.io/prophet/docs/diagnostics.html**

In [None]:
def model_test(holidays, weekly_seasonality,yearly_seasonality, add_monthly, add_reserve, changepoint_prior_scale, holidays_prior_scale, month_fourier):
    
    m4 = Prophet(
          yearly_seasonality=yearly_seasonality, 
          weekly_seasonality=weekly_seasonality, 
          holidays=holidays, 
          changepoint_prior_scale=changepoint_prior_scale, 
          holidays_prior_scale=holidays_prior_scale)
    
    if add_monthly:    
        m4.add_seasonality(
        name='monthly', 
        period=30.5, 
        fourier_order=month_fourier)
    
    if add_reserve:
        m4.add_regressor('reserve_visitors')

    with suppress_stdout_stderr():
        m4.fit(train4)

    future4 = m4.make_future_dataframe(periods=len(test))

    future4['ds'] = pd.to_datetime(future4['ds']).dt.date
    
    if add_reserve:
        future4 = future4.merge(
            X_reservations, 
            left_on = 'ds', 
            right_on = 'visit_date', 
            how = 'left')
        future4 = future4[['ds', 'reserve_visitors']]
        future4 = future4.fillna(0)

    forecast4 = m4.predict(future4)

    return r2_score(
              list(test['y']),
              list(forecast4.loc[450:,'yhat'] )) 
# Setting the grid
holidays_opt = [holidays, None]
weekly_seas = [ 5, 10, 30, 50]
yearly_seas = [ 5, 10, 30, 50]
add_monthly = [True, False]
add_reserve = [True, False]
changepoint_prior_scale = [0.1, 0.3, 0.5]
holidays_prior_scale = [0.1, 0.3, 0.5]
month_fourier = [5, 10, 30, 50]

# Looping through the grid
grid_results = []
for h in holidays_opt:
  for w in weekly_seas:
    for ys in yearly_seas:
      for m in add_monthly:
        for r in add_reserve:
           for c in changepoint_prior_scale:
             for hp in holidays_prior_scale:
               for mf in month_fourier:
                  r2=model_test(h,w,ys,m,r,c,hp,mf)
                  print([w,ys,m,r,c,hp,mf,r2])
                  grid_results.append([h,w,ys,m,r,c,hp,mf,r2])

# adding it all to a dataframe and extract the best model
benchmark = pd.DataFrame(grid_results)
benchmark = benchmark.sort_values(8, ascending=False)

h, w,ys, m, r, c,hp,mf,r2 = list(benchmark.iloc[0,:])

# Fit the Prophet with those best hyperparameters
m4 = Prophet(
          yearly_seasonality=ys, 
          weekly_seasonality=w, 
          holidays=h, 
          changepoint_prior_scale=c, 
          holidays_prior_scale=hp)
    
if m:    
    m4.add_seasonality(
        name='monthly', 
        period=30.5, 
        fourier_order=mf)

if r:
    m4.add_regressor('reserve_visitors')

with suppress_stdout_stderr():
    m4.fit(train4)

future4 = m4.make_future_dataframe(periods=len(test))

future4['ds'] = pd.to_datetime(future4['ds']).dt.date

if r:
    future4 = future4.merge(
                X_reservations, 
                left_on = 'ds', 
                right_on = 'visit_date', 
                how = 'left')
    future4 = future4[['ds', 'reserve_visitors']]
    future4 = future4.fillna(0)

forecast4 = m4.predict(future4)
