In [2]:
import numpy as np
import pandas as pd
from fbprophet import Prophet
import os

import matplotlib.pyplot as plt
import seaborn as sns

  'Matplotlib is building the font cache using fc-list. '


RuntimeError: Python is not installed as a framework. The Mac OS X backend will not be able to function correctly if Python is not installed as a framework. See the Python documentation for more information on installing Python as a framework on Mac OS X. Please either reinstall Python as a framework, or try one of the other backends. If you are using (Ana)Conda please install python.app and replace the use of 'python' with 'pythonw'. See 'Working with Matplotlib on OSX' in the Matplotlib FAQ for more information.

In [None]:
dat = pd.read_excel('tfl-daily-cycle-hires.xls', sheetname='Data')

# we are only interested in the first two columns, drop the rest
dat = dat[ [u'Day', u'Number of Bicycle Hires']]
dat.head()

In [None]:
plt.plot( dat['Day'], dat['Number of Bicycle Hires'])
plt.xlabel('Date')
plt.ylabel('Number of cycle hires')
plt.show()

In [None]:
#things to note from plot
#Seasonality: There is a drop during the winter months and a rise during the summer months. This seasonality is easy to understand but needs to be accounted for in order to obtain reliable forecasts.
#Trends: There is a slight, overall upward trend. This means that the number of cycles hired has increased over time.
#Outliers: There are obvious outliers, notably during the summer of 2015. This coincides with the tube strike, which foreced commuters to find alternative routes. The histogram plot, shown below, highlights the presence of these large positive outliers.

In [None]:
plt.hist((dat['Number of Bicycle Hires']), bins=40)
plt.show()

In [None]:
# The prophet package expects input as a dataframe with the first column indicating time and 
# the second indicating the time series we wish to forecast
dat['Day'] = pd.DatetimeIndex( dat['Day'] )

# It also expects these columns to have the names 'ds' and 'y', so we rename them accordingly
dat = dat.rename(columns={'Day': 'ds', 'Number of Bicycle Hires': 'y'}) 
dat.head()

In [None]:
# Following our discussion, we add a set of outliers/holidays for our Prophet model.
# we make a separate DataFrame for bank holidays and for tube strikes

# We get the list of bank holidays from the following csv file:
bank_holidays = pd.DataFrame({
    'holiday': 'BankHoliday',
    'ds'     : pd.to_datetime( list(pd.read_csv('BankHolidayLists.csv')['Date']) )
})

strike_days = pd.DataFrame({
    'holiday': 'strike',
    'ds'     : pd.to_datetime( ['2017-08-05', '2017-08-06', '2017-02-06', '2015-07-09', '2015-07-08', '2015-03-08'] )
})


all_holidays_strikes = pd.concat( (bank_holidays, strike_days) )
all_holidays_strikes.tail(n=10)

In [None]:
# now we are ready to fit a forecast model with prophet
forecast_model = Prophet( growth='linear',  weekly_seasonality=3, yearly_seasonality=3, holidays=all_holidays_strikes )
forecast_model.fit( dat )

In [None]:
# we can now study the fit of the model - in order to do so, we need to creat another df
df_dates = forecast_model.make_future_dataframe(periods=365, 
                                                include_history=True)
model_predictions = forecast_model.predict( df_dates )
plot_pred = forecast_model.plot( model_predictions )
plt.legend(loc='best', fontsize=20)
plot_pred