In [7]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# TS
from prophet import Prophet

# User Imports
import sys

sys.path.append("..")
from src import util

# modeling
import optuna
from sklearn.metrics import mean_squared_error as mse
import joblib

%load_ext autoreload
%autoreload 2

plt.rcParams["figure.figsize"] = (14, 7)

In [14]:
load = util.read_load("../data/load_hist_data.csv")
weather = util.read_weather("../data/weather_data.csv")
weather_features = util.featurize_weather(
    weather, lags=[24]
)  # 24 hours = 1 day lagged weather
mod_data = util.create_mod_data(load, weather_features)

In [15]:
from sktime.forecasting.model_selection import temporal_train_test_split

inference_data = mod_data[mod_data.ds >= "2008-01-01"]
mod_data = mod_data[mod_data.ds < '2008-01-01'] 

train_data, test_data = temporal_train_test_split(mod_data, test_size=1 / 3)
tune_data, test_data = temporal_train_test_split(test_data, test_size=1 / 3)

In [18]:
def objective(trial):
    params = {
        "changepoint_prior_scale": trial.suggest_float(
            "changepoint_prior_scale", 0.01, 10
        ),
        "seasonality_prior_scale": trial.suggest_float(
            "seasonality_prior_scale", 0.01, 10
        ),
        "seasonality_mode": trial.suggest_categorical(
            "seasonality_mode", ["additive", "multiplicative"]
        ),
        "dow_0_prior_scale": trial.suggest_float("dow_0_prior_scale", 0.01, 10),
        "dow_1_prior_scale": trial.suggest_float("dow_1_prior_scale", 0.01, 10),
        "dow_2_prior_scale": trial.suggest_float("dow_2_prior_scale", 0.01, 10),
        "dow_3_prior_scale": trial.suggest_float("dow_3_prior_scale", 0.01, 10),
        "dow_4_prior_scale": trial.suggest_float("dow_4_prior_scale", 0.01, 10),
        "dow_5_prior_scale": trial.suggest_float("dow_5_prior_scale", 0.01, 10),
        "dow_6_prior_scale": trial.suggest_float("dow_6_prior_scale", 0.01, 10),
    }
    display(params)

    m = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,  # added per dow
        # mcmc_samples=300,
        # seasonality_mode="multiplicative",  # "additive",  # "multiplicative",
        seasonality_mode=params["seasonality_mode"],
        changepoint_prior_scale=params["changepoint_prior_scale"],
        seasonality_prior_scale=params["seasonality_prior_scale"],
    )

    for i in range(7):
        dow_prior = params[f"dow_{i}_prior_scale"]
        m.add_seasonality(
            name=f"daily_dow{i}",
            period=1,
            fourier_order=4,
            condition_name=f"dow_{i}",
            prior_scale=dow_prior,
        )

    m.add_country_holidays(country_name="US")
    m.add_regressor("max_station_temp")
    m.add_regressor("min_station_temp")
    m.add_regressor("mean_station_temp")
    m.add_regressor("lag_24__min_station_temp")
    m.add_regressor("lag_24__max_station_temp")
    m.add_regressor("lag_24__mean_station_temp")

    m.fit(train_data)
    preds = m.predict(tune_data)
    rmse = mse(tune_data["y"], preds["yhat"], squared=False)
    return rmse


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=3)

study.best_params
joblib.dump(study, "../models/prophet_study.pkl")

[I 2023-12-20 22:37:50,761] A new study created in memory with name: no-name-92d9e4b3-1278-4b25-8b2c-25ec5e928e28


{'changepoint_prior_scale': 5.320365068322312,
 'seasonality_prior_scale': 4.613403157012447,
 'seasonality_mode': 'additive',
 'dow_0_prior_scale': 0.6127325685402715,
 'dow_1_prior_scale': 2.6839226947447603,
 'dow_2_prior_scale': 0.9188387596022104,
 'dow_3_prior_scale': 6.956858353974224,
 'dow_4_prior_scale': 2.075375205339147,
 'dow_5_prior_scale': 8.78605000329544,
 'dow_6_prior_scale': 5.28937615752377}

22:37:52 - cmdstanpy - INFO - Chain [1] start processing
22:39:14 - cmdstanpy - INFO - Chain [1] done processing
[I 2023-12-20 22:39:15,051] Trial 0 finished with value: 1119.8946972811127 and parameters: {'changepoint_prior_scale': 5.320365068322312, 'seasonality_prior_scale': 4.613403157012447, 'seasonality_mode': 'additive', 'dow_0_prior_scale': 0.6127325685402715, 'dow_1_prior_scale': 2.6839226947447603, 'dow_2_prior_scale': 0.9188387596022104, 'dow_3_prior_scale': 6.956858353974224, 'dow_4_prior_scale': 2.075375205339147, 'dow_5_prior_scale': 8.78605000329544, 'dow_6_prior_scale': 5.28937615752377}. Best is trial 0 with value: 1119.8946972811127.


{'changepoint_prior_scale': 7.0087096751807705,
 'seasonality_prior_scale': 4.391911528615275,
 'seasonality_mode': 'additive',
 'dow_0_prior_scale': 0.7520179943471652,
 'dow_1_prior_scale': 0.12731654579269636,
 'dow_2_prior_scale': 9.19259439784826,
 'dow_3_prior_scale': 8.727295897707606,
 'dow_4_prior_scale': 3.567394958523806,
 'dow_5_prior_scale': 8.005355119977136,
 'dow_6_prior_scale': 0.9115659927110578}

22:39:16 - cmdstanpy - INFO - Chain [1] start processing
22:40:27 - cmdstanpy - INFO - Chain [1] done processing
[I 2023-12-20 22:40:28,289] Trial 1 finished with value: 926.0657337456925 and parameters: {'changepoint_prior_scale': 7.0087096751807705, 'seasonality_prior_scale': 4.391911528615275, 'seasonality_mode': 'additive', 'dow_0_prior_scale': 0.7520179943471652, 'dow_1_prior_scale': 0.12731654579269636, 'dow_2_prior_scale': 9.19259439784826, 'dow_3_prior_scale': 8.727295897707606, 'dow_4_prior_scale': 3.567394958523806, 'dow_5_prior_scale': 8.005355119977136, 'dow_6_prior_scale': 0.9115659927110578}. Best is trial 1 with value: 926.0657337456925.


{'changepoint_prior_scale': 5.15802503854531,
 'seasonality_prior_scale': 4.74064525937558,
 'seasonality_mode': 'additive',
 'dow_0_prior_scale': 0.24971569624965956,
 'dow_1_prior_scale': 0.7742986207572464,
 'dow_2_prior_scale': 4.434822164746875,
 'dow_3_prior_scale': 9.83790962625793,
 'dow_4_prior_scale': 2.6902178091658846,
 'dow_5_prior_scale': 2.2228449402677217,
 'dow_6_prior_scale': 4.773890069642158}

22:40:30 - cmdstanpy - INFO - Chain [1] start processing
22:41:45 - cmdstanpy - INFO - Chain [1] done processing
[I 2023-12-20 22:41:46,249] Trial 2 finished with value: 1309.8393607984708 and parameters: {'changepoint_prior_scale': 5.15802503854531, 'seasonality_prior_scale': 4.74064525937558, 'seasonality_mode': 'additive', 'dow_0_prior_scale': 0.24971569624965956, 'dow_1_prior_scale': 0.7742986207572464, 'dow_2_prior_scale': 4.434822164746875, 'dow_3_prior_scale': 9.83790962625793, 'dow_4_prior_scale': 2.6902178091658846, 'dow_5_prior_scale': 2.2228449402677217, 'dow_6_prior_scale': 4.773890069642158}. Best is trial 1 with value: 926.0657337456925.


{'changepoint_prior_scale': 7.0087096751807705,
 'seasonality_prior_scale': 4.391911528615275,
 'seasonality_mode': 'additive',
 'dow_0_prior_scale': 0.7520179943471652,
 'dow_1_prior_scale': 0.12731654579269636,
 'dow_2_prior_scale': 9.19259439784826,
 'dow_3_prior_scale': 8.727295897707606,
 'dow_4_prior_scale': 3.567394958523806,
 'dow_5_prior_scale': 8.005355119977136,
 'dow_6_prior_scale': 0.9115659927110578}

['../models/prophet_study.pkl']