# Forecasting - Facebook Prophet

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [21]:
from fbprophet import Prophet

ModuleNotFoundError: No module named 'fbprophet'

In [None]:
%matplotlib inline
plt.style.use('fivethirtyeight')

# Trivial Forecasting (rolling mean)

In [None]:
df = pd.DataFrame({'X': np.arange(0,10)})      #generating an input data frame
df['y'] = df.rolling(3).mean()     #  Take the window and write the average as the result

In [None]:
df

# Small data set

In [None]:
df_all = pd.read_csv('../data/processed/COVID_small_flat_table.csv', sep = ';')
df = df_all[['date', 'Germany']]
df = df.rename(columns = {'date': 'ds',
                         'Germany': 'y'})


In [None]:
ax = df.set_index('ds').plot(figsize = (12,8),
                            logy = True)
ax.set_ylabel('Daily number of confirmed cases')
ax.set_xlabel('Date')

plt.show()

In [None]:
# Setting the uncertainty interval to 95%
#my_model = Prophet(interval_width = 0.95)
my_model = Prophet(growth = 'logistic')

In [None]:
df['cap'] = 10000000
my_model.fit(df)


In [None]:
future_dates = my_model.make_future_dataframe(periods = 7, freq = 'D')
future_dates['cap'] = 10000000
future_dates.tail()

In [None]:
forecast = my_model.predict(future_dates)

In [None]:
my_model.plot(forecast,
             uncertainty = True);  

In [None]:
import plotly.offline as py
from fbprophet.plot import plot_plotly

fig = plot_plotly(my_model, forecast)

fig.update_layout(
    width = 1024,
    height = 900,
    xaxis_title = "Time",
    yaxis_title = "confirmed infected people",
)
fig.update_yaxes(type = "log", range = [1.1, 5.5])
py.iplot(fig)

In [None]:
forecast.sort_values(by = 'ds').head()

In [None]:
my_model.plot_components(forecast);

In [None]:
forecast[['ds', 'trend']].set_index('ds').plot(figsize=(12,8), logy = True)

In [None]:
from fbprophet.diagnostics import cross_validation
df_cv = cross_validation(my_model,
                        initial = '30 days',
                        period = '1 days',
                        horizon = '7 days')
df_cv.head()

In [None]:
df_cv.sort_values(by = ['cutoff','ds'])[0:14]

In [None]:
df[['ds','y']][30:]

In [None]:
from fbprophet.diagnostics import performance_metrics
df_p = performance_metrics(df_cv)

In [None]:
df_p

In [None]:
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(df_cv, metric = 'mape',)

# Diagonal plot

In [None]:
horizon = '7 days'
df_cv['horizon'] = df_cv.ds-df_cv.cutoff

date_vec = df_cv[df_cv['horizon'] == horizon]['ds']
y_hat = df_cv[df_cv['horizon'] == horizon]['yhat']
y = df_cv[df_cv['horizon'] == horizon]['y']

In [None]:
df_cv_7 = df_cv[df_cv['horizon'] == horizon]
df_cv_7.tail()

In [None]:
type(df_cv['horizon'][0])

In [None]:
fig, ax = plt.subplots(1,1)

ax.plot(np.arange(max(y)), np.arange(max(y)), '--', label = 'diagnostics')
ax.plot(y,y_hat, '-', label = horizon)

ax.set_title('Diagonal Plot')
ax.set_ylim(10, max(y))

ax.set_xlabel('truth: y')
ax.set_ylabel('Prediction: y_hat')
ax.set_yscale('log')

ax.set_xlim(10, max(y))
ax.set_xscale('log')
ax.legend(loc = 'best',
         prop = {'size': 16})

# Trivial forecast

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    ''' MAPE calculation '''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
parse_dates = ['date']
df_all = pd.read_csv('../data/processed/COVID_small_flat_table.csv', sep = ';', parse_dates = parse_dates)
df_trivial = df_all[['date', 'Germany']]
df_trivial = df_trivial.rename(columns = {'date': 'ds',
                                         'Germany': 'y'})

In [None]:
df_trivial['y_mean_r3'] = df_trivial.y.rolling(3).mean()

In [None]:
df_trivial['cutoff'] = df_trivial['ds'].shift(7)
df_trivial['y_hat'] = df_trivial['y_mean_r3'].shift(7)
df_trivial['horizon'] = df_trivial['ds']-df_trivial['cutoff']
print('MAPE:   '+str(mean_absolute_percentage_error(df_trivial['y_hat'].iloc[12:,], df_trivial['y'].iloc[12:,])))
df_trivial