## Timeseries forecasting using FB Prophet model

Straight forward notebook with minimal required steps for forecasting

### Importing required libraries

In [32]:
#Exploration
import numpy as np
import pandas as pd
import plotly.express as px
#Training
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, plot_components_plotly
# Evaluation
from statsmodels.tools.eval_measures import rmse

In [24]:
#Importing data
df = pd.read_csv(r'C:\STORAGE\Studies\Time Series Analysis\AirPassengers.csv')
df.dropna(inplace= True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Month,#Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


### Changing column names
As fbprophet model requires 'ds' as the column name for dates and 'y' as column name for the values to be predicted, chaning the column names

In [25]:
df.columns = ['ds','y']

In [26]:
df['ds'] = pd.to_datetime(df['ds']) # Changing the datatype of 'ds' to datetime
df.tail()

Unnamed: 0,ds,y
139,1960-08-01,606
140,1960-09-01,508
141,1960-10-01,461
142,1960-11-01,390
143,1960-12-01,432


### Visualizing original series

In [33]:
fig = px.line(df, x='ds', y="y")
fig.show()

### Splitting data into training and testing dataset

In [38]:
train = df.iloc[:len(df)-24]
test = df.iloc[len(df)-24:]

### Building and training model

In [45]:
m = Prophet()
m.fit(train)
future = m.make_future_dataframe(periods=36, freq = 'MS') 
forecast = m.predict(future)

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


**Note:** Freq = 'MS' to indicate monthly frequency periods = 36 to add 36 months to the end of training dataset

### Results table

In [46]:
forecast.tail()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
151,1961-08-01,501.312471,531.276249,579.264035,498.94371,504.001915,55.074386,55.074386,55.074386,55.074386,55.074386,55.074386,0.0,0.0,0.0,556.386857
152,1961-09-01,504.296878,493.368554,542.257604,501.798196,507.100185,13.138146,13.138146,13.138146,13.138146,13.138146,13.138146,0.0,0.0,0.0,517.435024
153,1961-10-01,507.185014,462.438924,510.194975,504.526473,510.106144,-22.059006,-22.059006,-22.059006,-22.059006,-22.059006,-22.059006,0.0,0.0,0.0,485.126008
154,1961-11-01,510.169422,433.933404,481.062276,507.384753,513.212674,-53.471416,-53.471416,-53.471416,-53.471416,-53.471416,-53.471416,0.0,0.0,0.0,456.698006
155,1961-12-01,513.057558,459.05159,507.076393,510.205642,516.230454,-29.821452,-29.821452,-29.821452,-29.821452,-29.821452,-29.821452,0.0,0.0,0.0,483.236106


In [55]:
forecast[['ds','yhat']].tail()

Unnamed: 0,ds,yhat
151,1961-08-01,556.386857
152,1961-09-01,517.435024
153,1961-10-01,485.126008
154,1961-11-01,456.698006
155,1961-12-01,483.236106


### Plot of whole modeled data

In [48]:
plot_plotly(m ,forecast)

### Evaluation of the model

In [73]:
test_predictions = forecast.iloc[-36:-12]['yhat']

print("Compared to the mean of values in the dataset which is " , test['y'].mean(),", the error is around ", round(rmse(predictions,test['y']),2))

Compared to the mean of values in the dataset which is  452.25 , the error is around  40.38
