In [11]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

# TS
from prophet import Prophet

# User Imports
import sys

sys.path.append("..")
from src import util

# modeling
import optuna
from sklearn.metrics import mean_squared_error as mse
import joblib

%load_ext lab_black
%load_ext autoreload
%autoreload 2

plt.rcParams["figure.figsize"] = (14, 7)

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
load = util.read_load("../data/load_hist_data.csv")
weather = util.read_weather("../data/weather_data.csv")
weather_features = util.featurize_weather(
    weather, lags=[24]
)  # 24 hours = 1 day lagged weather
mod_data = util.create_mod_data(load, weather_features)

In [3]:
mod_data.tail()

Unnamed: 0,ds,y,school_break,dow_0,dow_1,dow_2,dow_3,dow_4,dow_5,dow_6,min_station_temp,max_station_temp,mean_station_temp,lag_24__min_station_temp,lag_24__max_station_temp,lag_24__mean_station_temp
26272,2007-12-31 19:00:00,1269,True,True,False,False,False,False,False,False,37.0,50.0,42.678571,45.0,65.0,52.178571
26273,2007-12-31 20:00:00,1255,True,True,False,False,False,False,False,False,30.0,48.0,41.785714,41.0,63.0,51.964286
26274,2007-12-31 21:00:00,1237,True,True,False,False,False,False,False,False,34.0,49.0,41.107143,42.0,63.0,52.035714
26275,2007-12-31 22:00:00,1212,True,True,False,False,False,False,False,False,32.0,49.0,40.571429,39.0,63.0,52.035714
26276,2007-12-31 23:00:00,1174,True,True,False,False,False,False,False,False,32.0,48.0,40.607143,38.0,63.0,51.535714


In [4]:
from sktime.forecasting.model_selection import temporal_train_test_split

train_data, test_data = temporal_train_test_split(mod_data, test_size=1 / 3)
tune_data, test_data = temporal_train_test_split(test_data, test_size=1 / 3)

In [8]:
def objective(trial):
    params = {
        "changepoint_prior_scale": trial.suggest_float(
            "changepoint_prior_scale", 0.01, 10
        ),
        "seasonality_prior_scale": trial.suggest_float(
            "seasonality_prior_scale", 0.01, 10
        ),
        "seasonality_mode": trial.suggest_categorical(
            "seasonality_mode", ["additive", "multiplicative"]
        ),
        "dow_0_prior_scale": trial.suggest_float("dow_0_prior_scale", 0.01, 10),
        "dow_1_prior_scale": trial.suggest_float("dow_1_prior_scale", 0.01, 10),
        "dow_2_prior_scale": trial.suggest_float("dow_2_prior_scale", 0.01, 10),
        "dow_3_prior_scale": trial.suggest_float("dow_3_prior_scale", 0.01, 10),
        "dow_4_prior_scale": trial.suggest_float("dow_4_prior_scale", 0.01, 10),
        "dow_5_prior_scale": trial.suggest_float("dow_5_prior_scale", 0.01, 10),
        "dow_6_prior_scale": trial.suggest_float("dow_6_prior_scale", 0.01, 10),
    }
    display(params)

    m = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=False,  # added per dow
        # mcmc_samples=300,
        # seasonality_mode="multiplicative",  # "additive",  # "multiplicative",
        seasonality_mode=params["seasonality_mode"],
        changepoint_prior_scale=params["changepoint_prior_scale"],
        seasonality_prior_scale=params["seasonality_prior_scale"],
    )

    for i in range(7):
        dow_prior = params[f"dow_{i}_prior_scale"]
        m.add_seasonality(
            name=f"daily_dow{i}",
            period=1,
            fourier_order=4,
            condition_name=f"dow_{i}",
            prior_scale=dow_prior,
        )

    m.add_country_holidays(country_name="US")
    m.add_regressor("max_station_temp")
    m.add_regressor("min_station_temp")
    m.add_regressor("mean_station_temp")
    m.add_regressor("lag_24__min_station_temp")
    m.add_regressor("lag_24__max_station_temp")
    m.add_regressor("lag_24__mean_station_temp")

    m.fit(train_data)
    preds = m.predict(tune_data)
    rmse = mse(tune_data["y"], preds["yhat"], squared=False)
    return rmse


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=3)

study.best_params

[I 2023-12-20 11:15:16,127] A new study created in memory with name: no-name-6147ace3-a0c6-4ace-90fa-e142ad004180


{'changepoint_prior_scale': 8.054490381265047,
 'seasonality_prior_scale': 8.561082628415585,
 'seasonality_mode': 'additive',
 'dow_0_prior_scale': 9.610498522521103,
 'dow_1_prior_scale': 6.273564818158533,
 'dow_2_prior_scale': 1.2421513615385664,
 'dow_3_prior_scale': 7.276835170768597,
 'dow_4_prior_scale': 7.056032644870886,
 'dow_5_prior_scale': 5.326932873410469,
 'dow_6_prior_scale': 0.4102098984096688}

11:15:18 - cmdstanpy - INFO - Chain [1] start processing
11:16:25 - cmdstanpy - INFO - Chain [1] done processing
[I 2023-12-20 11:16:26,023] Trial 0 finished with value: 1104.4511503136177 and parameters: {'changepoint_prior_scale': 8.054490381265047, 'seasonality_prior_scale': 8.561082628415585, 'seasonality_mode': 'additive', 'dow_0_prior_scale': 9.610498522521103, 'dow_1_prior_scale': 6.273564818158533, 'dow_2_prior_scale': 1.2421513615385664, 'dow_3_prior_scale': 7.276835170768597, 'dow_4_prior_scale': 7.056032644870886, 'dow_5_prior_scale': 5.326932873410469, 'dow_6_prior_scale': 0.4102098984096688}. Best is trial 0 with value: 1104.4511503136177.


{'changepoint_prior_scale': 0.8915501793366585,
 'seasonality_prior_scale': 3.1326350459192716,
 'seasonality_mode': 'multiplicative',
 'dow_0_prior_scale': 1.063526928591769,
 'dow_1_prior_scale': 9.037118335838306,
 'dow_2_prior_scale': 0.09636312971745115,
 'dow_3_prior_scale': 6.667609111443309,
 'dow_4_prior_scale': 1.8929555780509728,
 'dow_5_prior_scale': 1.0665655452081648,
 'dow_6_prior_scale': 3.946968529509098}

11:16:28 - cmdstanpy - INFO - Chain [1] start processing
11:17:28 - cmdstanpy - INFO - Chain [1] done processing
[I 2023-12-20 11:17:29,063] Trial 1 finished with value: 2955.1451014011295 and parameters: {'changepoint_prior_scale': 0.8915501793366585, 'seasonality_prior_scale': 3.1326350459192716, 'seasonality_mode': 'multiplicative', 'dow_0_prior_scale': 1.063526928591769, 'dow_1_prior_scale': 9.037118335838306, 'dow_2_prior_scale': 0.09636312971745115, 'dow_3_prior_scale': 6.667609111443309, 'dow_4_prior_scale': 1.8929555780509728, 'dow_5_prior_scale': 1.0665655452081648, 'dow_6_prior_scale': 3.946968529509098}. Best is trial 0 with value: 1104.4511503136177.


{'changepoint_prior_scale': 1.006703961654579,
 'seasonality_prior_scale': 7.827310458668701,
 'seasonality_mode': 'multiplicative',
 'dow_0_prior_scale': 5.081860187964938,
 'dow_1_prior_scale': 1.2047136147930617,
 'dow_2_prior_scale': 5.807991068099874,
 'dow_3_prior_scale': 3.860317673115575,
 'dow_4_prior_scale': 4.931903016137631,
 'dow_5_prior_scale': 8.976282534214771,
 'dow_6_prior_scale': 3.1675613059324847}

11:17:31 - cmdstanpy - INFO - Chain [1] start processing
11:18:39 - cmdstanpy - INFO - Chain [1] done processing
[I 2023-12-20 11:18:40,222] Trial 2 finished with value: 2905.3715363859787 and parameters: {'changepoint_prior_scale': 1.006703961654579, 'seasonality_prior_scale': 7.827310458668701, 'seasonality_mode': 'multiplicative', 'dow_0_prior_scale': 5.081860187964938, 'dow_1_prior_scale': 1.2047136147930617, 'dow_2_prior_scale': 5.807991068099874, 'dow_3_prior_scale': 3.860317673115575, 'dow_4_prior_scale': 4.931903016137631, 'dow_5_prior_scale': 8.976282534214771, 'dow_6_prior_scale': 3.1675613059324847}. Best is trial 0 with value: 1104.4511503136177.


{'changepoint_prior_scale': 8.054490381265047,
 'seasonality_prior_scale': 8.561082628415585,
 'seasonality_mode': 'additive',
 'dow_0_prior_scale': 9.610498522521103,
 'dow_1_prior_scale': 6.273564818158533,
 'dow_2_prior_scale': 1.2421513615385664,
 'dow_3_prior_scale': 7.276835170768597,
 'dow_4_prior_scale': 7.056032644870886,
 'dow_5_prior_scale': 5.326932873410469,
 'dow_6_prior_scale': 0.4102098984096688}

In [15]:
joblib.dump(study, "../models/prophet_study.pkl")

['../models/prophet_study.pkl']