In [99]:
import pandas as pd
import numpy as np
from datetime import datetime
from fbprophet.plot import plot_plotly, plot_components_plotly
import plotly.offline as py
import plotly.io as pio

In [67]:
#Time series data are calculated on mean per day to avoid the effect of non-like-for-like perimeter
data=pd.read_csv(r"/Users/VictoireRey/Downloads/Final_project_extract/Data_mean_time_series.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,ds,y
0,0,2018-01-01,3.964286
1,1,2018-01-02,12.011905
2,2,2018-01-03,11.059524
3,3,2018-01-04,11.627976
4,4,2018-01-05,16.059524


In [68]:
data.drop('Unnamed: 0', axis=1, inplace=True)
data

Unnamed: 0,ds,y
0,2018-01-01,3.964286
1,2018-01-02,12.011905
2,2018-01-03,11.059524
3,2018-01-04,11.627976
4,2018-01-05,16.059524
...,...,...
1551,2022-04-01,41.400834
1552,2022-04-02,46.744316
1553,2022-04-03,45.516969
1554,2022-04-04,70.457674


In [86]:
#Modeling lockdown effect (Mar20-May20) into the Prophet forecast (holidays module)
holidays = pd.DataFrame({'holiday': 'lockdown','ds': pd.date_range(start='03/17/2020', end='05/03/2020')})
holidays

Unnamed: 0,holiday,ds
0,lockdown,2020-03-17
1,lockdown,2020-03-18
2,lockdown,2020-03-19
3,lockdown,2020-03-20
4,lockdown,2020-03-21
5,lockdown,2020-03-22
6,lockdown,2020-03-23
7,lockdown,2020-03-24
8,lockdown,2020-03-25
9,lockdown,2020-03-26


In [87]:
#Initating Prophet object including lockdown (holidays) and french periods of holidays and fit it 
m = Prophet(holidays=holidays, daily_seasonality=True)
m.add_country_holidays(country_name='FR')
m.fit(data)

<prophet.forecaster.Prophet at 0x7fda7f445b80>

Initial log joint probability = -30.9091
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       2723.11    0.00665692       342.613      0.3774           1      121   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       2726.39     0.0021024       190.994           1           1      245   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     229       2728.49   0.000223456       117.439   2.121e-06       0.001      326  LS failed, Hessian reset 
     286       2729.61   0.000348433       128.352   4.453e-06       0.001      438  LS failed, Hessian reset 
     299       2729.65   2.09465e-05       77.3051      0.2968           1      456   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     333       2729.65   1.32861e-07       64.4485           1           1      509   
Optimization terminat

In [102]:
m.train_holiday_names

0               lockdown
1           Jour de l'an
2        Fête du Travail
3         Armistice 1945
4         Fête nationale
5         Armistice 1918
6        Lundi de Pâques
7     Lundi de Pentecôte
8              Ascension
9             Assomption
10             Toussaint
11                  Noël
dtype: object

In [92]:
future = m.make_future_dataframe(periods=730)
future.tail()

Unnamed: 0,ds
2281,2024-03-31
2282,2024-04-01
2283,2024-04-02
2284,2024-04-03
2285,2024-04-04


In [93]:
forecast = m.predict(future)

In [94]:
fig = plot_plotly(m, forecast)
py.iplot(fig)

In [95]:
fig2=plot_components_plotly(m, forecast)
py.iplot(fig2)

In [96]:
se = np.square(forecast.loc[:, 'yhat'] - data['y'])
mse = np.mean(se)
rmse = np.sqrt(mse)
rmse

11.823313926475919

In [100]:
#Export as html
pio.write_html(fig, file="Forecast.html", auto_open=True)
pio.write_html(fig2, file="Components.html", auto_open=True)