<a href="https://colab.research.google.com/github/female-coders-linz/workshops/blob/master/time_series/forecasting_prophet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Time Series Forecasting with Prophet

In [0]:
# Install mwviews (needed to access Wikipedia article views data)
from IPython.display import clear_output

!pip install mwviews

clear_output()

In [0]:
import pandas as pd
import matplotlib.pyplot as plt

from prophet import Prophet
from mwviews.api import PageviewsClient
from sklearn.metrics import mean_absolute_error, mean_squared_error

## Get input data

In [0]:
# Load data: page views of wikipedia site
p = PageviewsClient(user_agent="info@female-coders.at Prophet time series forecasting example")

# TODO: add a wikipedia topic you want to load data for, and a start date
article_views = p.article_views('en.wikipedia', [] , granularity='monthly', start='', end='20200430' )
df_article_views = pd.DataFrame(article_views).T.sort_index()
df_article_views.head()

In [0]:
# First view on the data
fig = plt.figure(figsize=(30, 6))
plt.plot(df_article_views)
plt.legend(df_article_views.columns)
plt.title('Wikipedia article views')
plt.show()

## Prepare dataframe

In [0]:
# let's use Wolfgang Amadeus Mozart's page views
# input data needs column ds (time) and y 

# TODO: provide correct column names
df_input = pd.DataFrame({ 
    '': df_article_views.index, 
    '': df_article_views['Wolfgang_Amadeus_Mozart']
})


In [0]:
# Define what data we want to use for training and test

# TODO: Add a proper test length
test_len =  # months
train, test = df_input[:-test_len], df_input[-test_len:]

print(f'Training size: {len(train)}')
print(f'Test size: {test_len}')

### Train Model

In [0]:
model = Prophet()

model.fit(train)

# if you are more interested in more details, have a look at the parameters
# model.params

In [0]:
# if you have daily data, you might want to include holidays, 
#    e.g. birthday, Salzburger Festspiele,...

# mozart_birthday = pd.DataFrame({
#   'holiday': 'mozart_birthday',
#   'ds': pd.to_datetime(['2018-01-27', '2019-01-27', '2020-01-27']),
#   'lower_window': 0,
#   'upper_window': 1,
# })
# model = Prophet( 
#       interval_width=0.95, 
#       seasonality_prior_scale=0.025,
#       holidays=mozart_birthday,
# )

In [0]:
# Create a 'future' dataframe 

# TODO add the number of periods you want to predict for (=test length)
df_future = model.make_future_dataframe(periods=, freq = 'M')
df_future.tail()

## Predict the future ✨

In [0]:
# predict the future

# TODO: provide your future dataframe
df_forecast = model.predict()
df_forecast.tail()

## Inspect your prediction

In [0]:
# have a look at your forecast
model.plot(df_forecast).show()

In [0]:
# inspect the model components (trend, saisonalities)
model.plot_components(df_forecast).show()

In [0]:
# How does the forecast dataframe look like? What information can you get out of it?

# TODO: look only at the timeframe you predicted
predicted = df_forecast
predicted.head()

In [0]:
# plot training and test

fig =plt.figure(figsize=(30, 5))
plt.plot(train['ds'], train['y'], label='train')
plt.plot(test['ds'], test['y'], label='test')

# TODO: add your prediction to the plot (also include the confidence interval)

plt.legend()
plt.show()

## Calculate Error Metrics

In [0]:
print('Error')
print('---')
print('Train')
print('MAE', mean_absolute_error(y_true=train['y'], y_pred=df_forecast[:-test_len]['yhat']))
print('RMSE', mean_squared_error(y_true=train['y'], y_pred=df_forecast[:-test_len]['yhat'], squared=False))
print('-')
print('Test')

# TODO: calculate the error metrics for the test set
print('MAE', )
print('RMSE', , squared=False))
print('---')

In [0]:
# TODO: can we improve the model?