In [2]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt

#the final attempt at forecasting is going to be done with facebook's prophet to handle multiple seasonalities.
df = pd.read_csv("data.csv", parse_dates=["ts"])
df = df.rename(columns={"ts": "ds", "v1": "y", "v2": "v2_exog"})

In [3]:
model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
model.add_seasonality(name="daily", period=1, fourier_order=10)
model.add_regressor("v2_exog")  #v2 added as an external regressor
model.fit(df)

21:18:13 - cmdstanpy - INFO - Chain [1] start processing
21:18:26 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x248e93c4830>

In [4]:
#cross-validation
def prophet_cross_validation(df, folds=5, holdout_size=30*24): #5 fold cross validation
    errors = []
    n = len(df)
    train_size = int(n * 0.7)  

    for i in range(folds):
        train_end = train_size + (i * holdout_size) 
        if train_end + holdout_size > n:
            break 

        train, test = df.iloc[:train_end], df.iloc[train_end:train_end+holdout_size]

        
        model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
        model.add_seasonality(name="daily", period=1, fourier_order=10)  #high fourier number since daily data is volatile
        model.add_regressor("v2_exog") 
        model.fit(train)

        future_test = model.make_future_dataframe(periods=len(test), freq="H")

        future_test = future_test.iloc[-len(test):].copy()  
        future_test["v2_exog"] = test["v2_exog"].values 

        forecast_test = model.predict(future_test)

        test_forecast = forecast_test.iloc[-len(test):]

        actuals = test["y"].values
        predictions = test_forecast["yhat"].values

        mae = mean_absolute_error(actuals, predictions)
        rmse = np.sqrt(mean_squared_error(actuals, predictions))  
        r2 = r2_score(actuals, predictions)

        errors.append({"Fold": i+1, "MAE": mae, "RMSE": rmse, "R2": r2})

    return pd.DataFrame(errors)

In [5]:
cv_results = prophet_cross_validation(df, folds=5, holdout_size=30*24)
print(cv_results)
print(cv_results.mean())

21:19:18 - cmdstanpy - INFO - Chain [1] start processing
21:19:25 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
21:19:26 - cmdstanpy - INFO - Chain [1] start processing
21:19:31 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
21:19:32 - cmdstanpy - INFO - Chain [1] start processing
21:19:38 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
21:19:40 - cmdstanpy - INFO - Chain [1] start processing
21:19:47 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
21:19:48 - cmdstanpy - INFO - Chain [1] start processing
21:19:55 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(


   Fold       MAE      RMSE        R2
0     1  0.226814  0.314107  0.884896
1     2  0.210677  0.294489  0.937346
2     3  0.266296  0.428533  0.903023
3     4  0.297561  0.429192  0.911501
4     5  0.320152  0.433744  0.897858
Fold    3.000000
MAE     0.264300
RMSE    0.380013
R2      0.906925
dtype: float64
