In [113]:
import pandas as pd
import numpy as np
from prophet import Prophet
from prophet.serialize import model_to_json, model_from_json
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from datetime import datetime
import os
import warnings
import json
warnings.filterwarnings('ignore')

In [114]:
# loading the data

train_df = pd.read_csv('data/train/train_data.csv')
val_df = pd.read_csv('data/train/val_data.csv')
feb_2020_df = pd.read_csv('data/test/feb_2020_data.csv')

print(f"  Training samples: {len(train_df)} (from {train_df['Date'].min()} to {train_df['Date'].max()})")
print(f"  Validation samples: {len(val_df)} (from {val_df['Date'].min()} to {val_df['Date'].max()})")
print(f"  Test samples (Feb 2020): {len(feb_2020_df)} (from {feb_2020_df['Date'].min()} to {feb_2020_df['Date'].max()})")

  Training samples: 1277 (from 2012-01-01 to 2015-06-30)
  Validation samples: 200 (from 2015-07-01 to 2016-01-16)
  Test samples (Feb 2020): 29 (from 2020-02-01 to 2020-02-29)


In [115]:
prophet_train = pd.DataFrame({'ds': pd.to_datetime(train_df['Date']), 'y': train_df['Price']})

In [116]:
# Validation dates for prediction
val_dates = pd.to_datetime(val_df['Date'])

# February 2020 dates for prediction
feb_2020_dates = pd.to_datetime(feb_2020_df['Date'])

In [117]:
# Initialize Prophet 
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False,
    seasonality_mode='multiplicative', changepoint_prior_scale=0.05, seasonality_prior_scale=10.0, interval_width=0.95)

In [118]:
# Train
model.fit(prophet_train)

03:31:24 - cmdstanpy - INFO - Chain [1] start processing
03:31:25 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x210190a0fe0>

In [119]:
# Predictions for validation data
val_future = pd.DataFrame({'ds': val_dates})
val_forecast = model.predict(val_future)

val_predictions = val_forecast['yhat'].values

# Actual prices
val_actual = val_df['Price'].values

In [120]:
rmse = np.sqrt(mean_squared_error(val_actual, val_predictions))
mae = mean_absolute_error(val_actual, val_predictions)
r2 = r2_score(val_actual, val_predictions)

print(f"Mean squared error: {rmse:.2f}")
print(f"Mean absolute error: {mae:.2f}")
print(f"R2 score: {r2:.2f}")

Mean squared error: 8.64
Mean absolute error: 6.65
R2 score: 0.14


In [121]:
# validation predictions df
val_predictions_df = pd.DataFrame({'Date': val_df['Date'].values,'Actual_Price': val_actual.round(1),
    'Predicted_Price': val_predictions.round(1), 'Lower_Bound': val_forecast['yhat_lower'].values.round(1),
    'Upper_Bound': val_forecast['yhat_upper'].values.round(1), 'Prediction_Error': val_actual.round(1) - val_predictions.round(1)})

val_predictions_df.head(10)

Unnamed: 0,Date,Actual_Price,Predicted_Price,Lower_Bound,Upper_Bound,Prediction_Error
0,2015-07-01,124.0,125.0,111.9,138.0,-1.0
1,2015-07-02,132.0,125.2,112.4,138.1,6.8
2,2015-07-03,128.0,127.5,115.1,140.7,0.5
3,2015-07-04,126.0,125.7,113.6,138.2,0.3
4,2015-07-05,123.0,124.1,110.7,135.7,-1.1
5,2015-07-06,123.0,125.6,113.4,138.2,-2.6
6,2015-07-07,123.0,125.3,112.7,137.9,-2.3
7,2015-07-08,124.0,126.3,112.7,139.1,-2.3
8,2015-07-09,117.0,126.4,113.2,139.7,-9.4
9,2015-07-10,123.0,128.6,116.4,141.4,-5.6


In [None]:
# for feb 2020

feb_2020_future = pd.DataFrame({'ds': feb_2020_dates})
feb_2020_forecast = model.predict(feb_2020_future)

# Extract predictions
feb_2020_predictions = feb_2020_forecast['yhat'].values

In [None]:
feb_2020_predictions_df = pd.DataFrame({'Date': feb_2020_df['Date'].values, 'Predicted_Price': feb_2020_predictions.round(1),
        'Lower_Bound': feb_2020_forecast['yhat_lower'].values.round(1), 'Upper_Bound': feb_2020_forecast['yhat_upper'].values.round(1)})

feb_2020_predictions_df.head(10)

Unnamed: 0,Date,Predicted_Price,Lower_Bound,Upper_Bound
0,2020-02-01,200.6,188.3,213.2
1,2020-02-02,199.3,186.2,212.3
2,2020-02-03,202.2,189.0,214.8
3,2020-02-04,202.8,190.7,215.7
4,2020-02-05,205.1,191.3,218.1
5,2020-02-06,206.4,193.5,218.6
6,2020-02-07,210.4,197.3,223.2
7,2020-02-08,208.9,195.4,220.8
8,2020-02-09,207.6,194.9,220.2
9,2020-02-10,210.5,197.5,223.4


In [None]:
# save feb 2020 predictions

filename='predictions/prophet_feb_2020.csv'
feb_2020_predictions_df.to_csv(filename, index=False)

In [None]:
with open('models/saved_models/prophet_model.json', 'w') as f:
    json.dump(model_to_json(model), f)

In [None]:
with open('models/saved_models/prophet_model.json', 'r') as f:
    m_loaded = model_from_json(json.load(f))

In [None]:
feb_dates=pd.to_datetime(feb_2020_df.Date)

feb_df=pd.DataFrame({'ds': feb_dates})
feb_df.head()

Unnamed: 0,ds
0,2020-02-01
1,2020-02-02
2,2020-02-03
3,2020-02-04
4,2020-02-05


In [None]:
preds=m_loaded.predict(feb_df)
preds['yhat']

0     200.605559
1     199.311444
2     202.177842
3     202.781503
4     205.082030
5     206.353046
6     210.443976
7     208.892184
8     207.608018
9     210.469384
10    211.051536
11    213.325431
12    214.568888
13    218.643436
14    217.084301
15    215.816005
16    218.727656
17    219.392881
18    221.791834
19    223.202350
20    227.491919
21    226.185670
22    225.212706
23    228.462363
24    229.494590
25    232.285938
26    234.101091
27    238.800368
28    237.886094
Name: yhat, dtype: float64