In [None]:
import numpy as np
import pandas as pd
import pickle
from tqdm.notebook import tqdm
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from prophet import Prophet
import sys
sys.path.append('../utils')
from utils import load_processed_data, cv, get_test_metrics

# Prophet Model

Uses Facebook Prophet package to forecast speed of a single sensor using a decomposible time series model that takes into account many types of seasonality.

Read more: https://facebook.github.io/prophet/docs/quick_start.html#python-api



### Setup

In [None]:
station_meta = pd.read_csv('../data/processed/fwy_405_n_ds/meta.csv')

In [None]:
_, _, station_data = load_processed_data('../data/processed/rdp_ds')
station_data.head()

In [None]:
with open('./env.dat', 'rb') as f:
    ENV = pickle.load(f)

### Select Sensor

Choose sensor ID and split into train/test

In [None]:
# good example stations to try
#   717711  (difficult)
#   716659  (many random outliers)
#   761455  (periodic)
#   772455  (very periodic)

# select a station number to use
STATION = ENV['station_id']


# use may, june as train data and july as test data
station_data = station_data.loc[station_data.index.month.isin([5, 6, 7]), STATION]
train_data = station_data[(station_data.index.month == 5) | (station_data.index.month == 6)]
test_data = station_data[station_data.index.month == 7]

### Model

In [None]:
from time import time

In [None]:
# convert to fbprophet format
df = pd.DataFrame({'ds': train_data.index, 'y': train_data.values})
df.head()

# initialize model
m = Prophet(yearly_seasonality=False)
start = time()
m.fit(df)
end = time()
train_time = end - start

In [None]:
# with open('./trained/prophet/prophet.dat', 'wb') as f:
#     pickle.dump(m, f)

### Evaluate

In [None]:
# # use to load model for evaluation instead of training above
# import pickle
# with open('./trained/prophet/prophet.dat', 'rb') as f:
#     m = pickle.load(f)

In [None]:
# predict test data
future = pd.DataFrame({'ds': station_data.index})
forecast = m.predict(future)

In [None]:
train_preds = forecast[forecast['ds'].isin(train_data.index)]['yhat'].values
train_conf = forecast.loc[forecast['ds'].isin(train_data.index), ['yhat_lower', 'yhat_upper']].values

test_preds = forecast[forecast['ds'].isin(test_data.index)]['yhat'].values
test_conf = forecast.loc[forecast['ds'].isin(test_data.index), ['yhat_lower', 'yhat_upper']].values

In [None]:
import plotly.graph_objects as go

In [None]:
# results
mae = mean_absolute_error(test_data.values, test_preds)
rmse = mean_squared_error(test_data.values, test_preds, squared=False)
mse = mean_squared_error(test_data.values, test_preds)
print('MAE:  %.3f' % mae)
print('RMSE: %.3f' % rmse)
print('MSE: %.3f' % mse)

fig = go.Figure()
fig.add_trace(go.Line(x=station_data.index, y=station_data, name='True Values'))
fig.add_trace(go.Line(x=train_data.index, y=train_preds, name='Predicted Values (Train)', line=dict(color='rgba(255, 0, 0)')))
fig.add_trace(go.Line(x=test_data.index, y=test_preds, name='Predicted Values (Test)', line=dict(color='rgba(44, 160, 44)')))
fig.update_layout(
    title="Prophet Forecast Results",
    xaxis_title="Time",
    yaxis_title="Forecast")

fig.add_traces([go.Scatter(x=train_data.index, y=train_conf[:, 1],
                    mode = 'lines', line_color = 'rgba(0,0,0,0)',
                    showlegend = False),
                go.Scatter(x=train_data.index, y=train_conf[:,0],
                    mode = 'lines', line_color = 'rgba(0,0,0,0)',
                    name = '95% CI (Train)',
                    fill='tonexty', fillcolor = 'rgba(255, 0, 0, 0.2)')])

fig.add_traces([go.Scatter(x=test_data.index, y=test_conf[:, 1],
                    mode = 'lines', line_color = 'rgba(0,0,0,0)',
                    showlegend = False),
                go.Scatter(x=test_data.index, y=test_conf[:,0],
                    mode = 'lines', line_color = 'rgba(0,0,0,0)',
                    name = '95% CI (Test)',
                    fill='tonexty', fillcolor = 'rgba(44, 160, 44, 0.2)')])


In [None]:
# fig.write_html('../plots/prophet.html')

Evaluate:

In [None]:
cv_metrics = cv1(Prophet(yearly_seasonality=False), station_data, metrics=['mse', 'mae', 'rmse', 'r2'])
test_metrics = get_test_metrics(test_data, test_preds)

metrics = {'cv': cv_metrics, 'test': test_metrics}

In [None]:
# with open('./trained/prophet/metrics_prophet.dat', 'wb') as f:
#     pickle.dump(metrics, f)

In [None]:
# model component
p = m.plot_components(forecast, figsize=(10, 8))