In [15]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from datetime import datetime
import os
import warnings
warnings.filterwarnings('ignore')

In [16]:
# loading the data

train_df = pd.read_csv('data/train/train_data.csv')
val_df = pd.read_csv('data/train/val_data.csv')
feb_2020_df = pd.read_csv('data/test/feb_2020_data.csv')

print(f"  Training samples: {len(train_df)} (from {train_df['Date'].min()} to {train_df['Date'].max()})")
print(f"  Validation samples: {len(val_df)} (from {val_df['Date'].min()} to {val_df['Date'].max()})")
print(f"  Test samples (Feb 2020): {len(feb_2020_df)} (from {feb_2020_df['Date'].min()} to {feb_2020_df['Date'].max()})")

  Training samples: 1277 (from 2012-01-01 to 2015-06-30)
  Validation samples: 200 (from 2015-07-01 to 2016-01-16)
  Test samples (Feb 2020): 29 (from 2020-02-01 to 2020-02-29)


In [17]:
prophet_train = pd.DataFrame({'ds': pd.to_datetime(train_df['Date']), 'y': train_df['Price']})

In [18]:
# Validation dates for prediction
val_dates = pd.to_datetime(val_df['Date'])

# February 2020 dates for prediction
feb_2020_dates = pd.to_datetime(feb_2020_df['Date'])

In [19]:
# Initialize Prophet 
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False,
    seasonality_mode='multiplicative', changepoint_prior_scale=0.05, seasonality_prior_scale=10.0, interval_width=0.95)

In [20]:
# Train
model.fit(prophet_train)

03:47:01 - cmdstanpy - INFO - Chain [1] start processing
03:47:01 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x2101650a600>

In [21]:
# Predictions for validation data
val_future = pd.DataFrame({'ds': val_dates})
val_forecast = model.predict(val_future)

val_predictions = val_forecast['yhat'].values

# Actual prices
val_actual = val_df['Price'].values

In [22]:
rmse = np.sqrt(mean_squared_error(val_actual, val_predictions))
mae = mean_absolute_error(val_actual, val_predictions)
r2 = r2_score(val_actual, val_predictions)

print(f"Mean squared error: {rmse:.2f}")
print(f"Mean absolute error: {mae:.2f}")
print(f"R2 score: {r2:.2f}")

Mean squared error: 8.64
Mean absolute error: 6.65
R2 score: 0.14


In [23]:
# validation predictions df
val_predictions_df = pd.DataFrame({'Date': val_df['Date'].values,'Actual_Price': val_actual.round(1),
    'Predicted_Price': val_predictions.round(1), 'Lower_Bound': val_forecast['yhat_lower'].values.round(1),
    'Upper_Bound': val_forecast['yhat_upper'].values.round(1), 'Prediction_Error': val_actual.round(1) - val_predictions.round(1)})

val_predictions_df.head(10)

Unnamed: 0,Date,Actual_Price,Predicted_Price,Lower_Bound,Upper_Bound,Prediction_Error
0,2015-07-01,124.0,125.0,112.8,137.8,-1.0
1,2015-07-02,132.0,125.2,112.1,137.8,6.8
2,2015-07-03,128.0,127.5,114.6,140.2,0.5
3,2015-07-04,126.0,125.7,113.9,138.5,0.3
4,2015-07-05,123.0,124.1,112.2,136.5,-1.1
5,2015-07-06,123.0,125.6,113.9,139.4,-2.6
6,2015-07-07,123.0,125.3,113.0,137.8,-2.3
7,2015-07-08,124.0,126.3,114.3,138.2,-2.3
8,2015-07-09,117.0,126.4,114.4,138.6,-9.4
9,2015-07-10,123.0,128.6,116.2,141.8,-5.6


In [24]:
# for feb 2020

feb_2020_future = pd.DataFrame({'ds': feb_2020_dates})
feb_2020_forecast = model.predict(feb_2020_future)

# Extract predictions
feb_2020_predictions = feb_2020_forecast['yhat'].values

In [25]:
feb_2020_predictions_df = pd.DataFrame({'Date': feb_2020_df['Date'].values, 'Predicted_Price': feb_2020_predictions.round(1),
        'Lower_Bound': feb_2020_forecast['yhat_lower'].values.round(1), 'Upper_Bound': feb_2020_forecast['yhat_upper'].values.round(1)})

feb_2020_predictions_df.head(10)

Unnamed: 0,Date,Predicted_Price,Lower_Bound,Upper_Bound
0,2020-02-01,200.6,188.3,213.3
1,2020-02-02,199.3,187.7,211.2
2,2020-02-03,202.2,189.4,214.7
3,2020-02-04,202.8,190.6,215.3
4,2020-02-05,205.1,193.2,217.4
5,2020-02-06,206.4,194.2,219.8
6,2020-02-07,210.4,197.1,223.8
7,2020-02-08,208.9,196.4,221.8
8,2020-02-09,207.6,194.5,220.6
9,2020-02-10,210.5,197.9,223.0


In [26]:
# save feb 2020 predictions

filename='predictions/prophet_feb_2020.csv'
feb_2020_predictions_df.to_csv(filename, index=False)