In [38]:
import pandas as pd
from prophet import Prophet
from prophet.make_holidays import make_holidays_df

import logging
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)


#Load the data
df = pd.read_csv('https://www.dropbox.com/scl/fi/ksf0nbmmiort5khbrgr61/allData.csv?rlkey=75e735fjk4ifttjt553ukxt3k&dl=1')
df.ds = pd.to_datetime(df.ds)
df.tail()

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
34711,2024-12-19 19:00:00,4,109,5,94,9,203,2,32,0,...,18,15,1.0,0.0,5,0,0,2,7,5
34712,2024-12-19 20:00:00,6,115,4,98,10,213,1,33,0,...,9,8,0.0,0.0,7,1,0,5,7,5
34713,2024-12-19 21:00:00,6,121,3,101,9,222,3,36,1,...,8,8,1.0,0.0,7,2,1,5,7,6
34714,2024-12-19 22:00:00,9,130,2,103,11,233,2,38,0,...,8,6,0.0,0.0,9,2,2,2,7,6
34715,2024-12-19 23:00:00,2,132,0,103,2,235,1,39,0,...,1,1,0.0,0.0,9,1,1,3,6,6


In [12]:
#Create a dataframe called dailyVisits with the columns ds and y, where ds is the date and y is the the value of the column 'Inflow_Cum_Total' at midnight of the following day
dailyVisits = df[['ds', 'Inflow_Cum_Total']].copy()
dailyVisits.columns = ['ds', 'y']
dailyVisits.ds = dailyVisits.ds + pd.DateOffset(days=1)
dailyVisits.y = dailyVisits.y.shift(-1)
dailyVisits = dailyVisits.dropna()

#filter dailyVisits to only include rows where the hour of the timestamp in column ds is 0
dailyVisits = dailyVisits[dailyVisits.ds.dt.hour == 23]
dailyVisits.ds = dailyVisits.ds.dt.date

dailyVisits.tail()

Unnamed: 0,ds,y
34595,2024-12-15,216.0
34619,2024-12-16,194.0
34643,2024-12-17,291.0
34667,2024-12-18,260.0
34691,2024-12-19,253.0


In [23]:
qc_holidays = make_holidays_df(
    year_list=[2019 + i for i in range(10)], country='CA', province='QC'
)
qc_holidays

Unnamed: 0,ds,holiday
0,2019-01-01,New Year's Day
1,2019-04-19,Good Friday
2,2019-07-01,Canada Day
3,2019-09-02,Labor Day
4,2019-12-25,Christmas Day
...,...,...
81,2028-09-04,Labor Day
82,2028-12-25,Christmas Day
83,2028-05-22,National Patriots' Day
84,2028-06-24,St. John the Baptist Day


In [22]:
ramq_holidays = pd.read_csv('ramq_holidays.csv')
ramq_holidays.ds = pd.to_datetime(ramq_holidays.ds)
ramq_holidays

Unnamed: 0,ds,holiday
0,2024-05-20,Journée nationale des Patriotes
1,2024-06-24,Fête nationale du Québec
2,2024-07-01,Fête du Canada
3,2024-09-02,Fête du Travail
4,2024-10-14,Action de grâces
...,...,...
85,2019-12-31,Veille du jour de l'An
86,2020-01-01,Jour de l'An
87,2019-10-09,Lendemain du jour de l'An
88,2020-04-10,Vendredi saint


In [21]:
pd.concat([qc_holidays, ramq_holidays])

Unnamed: 0,ds,holiday
0,2019-01-01,New Year's Day
1,2019-04-19,Good Friday
2,2019-07-01,Canada Day
3,2019-09-02,Labor Day
4,2019-12-25,Christmas Day
...,...,...
85,2019-12-31,Veille du jour de l'An
86,2020-01-01,Jour de l'An
87,2019-10-09,Lendemain du jour de l'An
88,2020-04-10,Vendredi saint


In [24]:
# Use prophet to forecast the number of visits for the next 30 days
m = prophet.Prophet(holidays=pd.concat([qc_holidays, ramq_holidays]))
m.add_country_holidays(country_name='CA')
m.add_country_holidays(country_name='IL')

m.fit(dailyVisits)
future = m.make_future_dataframe(periods=30)
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


Changing country holidays from 'CA' to 'IL'.


21:49:03 - cmdstanpy - INFO - Chain [1] start processing
21:49:03 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1473,2025-01-14,270.543126,225.642494,318.427067
1474,2025-01-15,273.030435,226.027107,320.155695
1475,2025-01-16,265.504483,219.830371,313.312045
1476,2025-01-17,259.208531,214.107795,310.113252
1477,2025-01-18,261.586749,214.714921,308.487256


In [27]:
#Calculate the RMSE of the forecast
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import plot_cross_validation_metric

df_cv = cross_validation(m, initial='1000 days', period='180 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

  0%|          | 0/3 [00:00<?, ?it/s]

21:51:18 - cmdstanpy - INFO - Chain [1] start processing
21:51:18 - cmdstanpy - INFO - Chain [1] done processing
21:51:19 - cmdstanpy - INFO - Chain [1] start processing
21:51:19 - cmdstanpy - INFO - Chain [1] done processing
21:51:19 - cmdstanpy - INFO - Chain [1] start processing
21:51:19 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,3461.6352,58.835663,51.18274,0.251877,0.131765,0.305856,0.666667


In [36]:
m = prophet.Prophet()
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1000 days', period='8 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

23:14:56 - cmdstanpy - INFO - Chain [1] start processing
23:14:56 - cmdstanpy - INFO - Chain [1] done processing


  0%|          | 0/56 [00:00<?, ?it/s]

23:14:56 - cmdstanpy - INFO - Chain [1] start processing
23:14:56 - cmdstanpy - INFO - Chain [1] done processing
23:14:56 - cmdstanpy - INFO - Chain [1] start processing
23:14:56 - cmdstanpy - INFO - Chain [1] done processing
23:14:56 - cmdstanpy - INFO - Chain [1] start processing
23:14:56 - cmdstanpy - INFO - Chain [1] done processing
23:14:57 - cmdstanpy - INFO - Chain [1] start processing
23:14:57 - cmdstanpy - INFO - Chain [1] done processing
23:14:57 - cmdstanpy - INFO - Chain [1] start processing
23:14:57 - cmdstanpy - INFO - Chain [1] done processing
23:14:57 - cmdstanpy - INFO - Chain [1] start processing
23:14:57 - cmdstanpy - INFO - Chain [1] done processing
23:14:57 - cmdstanpy - INFO - Chain [1] start processing
23:14:57 - cmdstanpy - INFO - Chain [1] done processing
23:14:57 - cmdstanpy - INFO - Chain [1] start processing
23:14:58 - cmdstanpy - INFO - Chain [1] done processing
23:14:58 - cmdstanpy - INFO - Chain [1] start processing
23:14:58 - cmdstanpy - INFO - Chain [1]

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,2913.46143,53.97649,36.811253,2.075661,0.106227,0.216036,0.767857


In [37]:
m = prophet.Prophet()
m.add_country_holidays(country_name='IL')
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1000 days', period='8 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

23:15:20 - cmdstanpy - INFO - Chain [1] start processing
23:15:20 - cmdstanpy - INFO - Chain [1] done processing


  0%|          | 0/56 [00:00<?, ?it/s]

23:15:21 - cmdstanpy - INFO - Chain [1] start processing
23:15:21 - cmdstanpy - INFO - Chain [1] done processing
23:15:21 - cmdstanpy - INFO - Chain [1] start processing
23:15:21 - cmdstanpy - INFO - Chain [1] done processing
23:15:21 - cmdstanpy - INFO - Chain [1] start processing
23:15:21 - cmdstanpy - INFO - Chain [1] done processing
23:15:21 - cmdstanpy - INFO - Chain [1] start processing
23:15:21 - cmdstanpy - INFO - Chain [1] done processing
23:15:21 - cmdstanpy - INFO - Chain [1] start processing
23:15:22 - cmdstanpy - INFO - Chain [1] done processing
23:15:22 - cmdstanpy - INFO - Chain [1] start processing
23:15:22 - cmdstanpy - INFO - Chain [1] done processing
23:15:22 - cmdstanpy - INFO - Chain [1] start processing
23:15:22 - cmdstanpy - INFO - Chain [1] done processing
23:15:22 - cmdstanpy - INFO - Chain [1] start processing
23:15:22 - cmdstanpy - INFO - Chain [1] done processing
23:15:23 - cmdstanpy - INFO - Chain [1] start processing
23:15:23 - cmdstanpy - INFO - Chain [1]

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,2829.957281,53.197343,35.754629,2.055438,0.105867,0.211877,0.803571


In [47]:
m = prophet.Prophet()
m.add_country_holidays(country_name='CA')
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1083 days', period='1 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

  0%|          | 0/364 [00:00<?, ?it/s]

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,1275.935974,35.720246,27.883966,0.107928,0.080801,0.113551,0.826923


In [44]:
m = prophet.Prophet(holidays=ramq_holidays)
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1083 days', period='1 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

  0%|          | 0/364 [00:00<?, ?it/s]

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,1264.625418,35.561572,27.714735,0.106951,0.081812,0.112255,0.821429


In [45]:
m = prophet.Prophet()
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1083 days', period='1 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

  0%|          | 0/364 [00:00<?, ?it/s]

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,1264.863551,35.56492,27.741628,0.107021,0.081526,0.112245,0.835165


In [46]:
m = prophet.Prophet(holidays=qc_holidays)
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1083 days', period='1 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

  0%|          | 0/364 [00:00<?, ?it/s]

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,smape,coverage
0,1 days,1276.447748,35.727409,27.848062,0.107786,0.078402,0.113474,0.824176


In [None]:
m = prophet.Prophet()
m.add_country_holidays(country_name='IL')
m.fit(dailyVisits)
df_cv = cross_validation(m, initial='1083 days', period='1 days', horizon='1 day')
df_p = performance_metrics(df_cv)
df_p

  0%|          | 0/364 [00:00<?, ?it/s]