In [None]:
from pathlib import Path 
import sys 
sys.path.append(str(Path.cwd().parent))

In [None]:
from utils import predictors, data_loading as dl
from prophet import Prophet
import pandas as pd
import numpy as np

import itertools
from prophet.diagnostics import performance_metrics, cross_validation

In [None]:
sunspots = dl.DataLoader(dl.DATASET.SUNSPOTS)
electricity = dl.DataLoader(dl.DATASET.ELECTRICITY)
mackey_glass = dl.DataLoader(dl.DATASET.MACKEY_GLASS)
temperature = dl.DataLoader(dl.DATASET.TEMPERATURE)

In [None]:
dataset = mackey_glass

train = dataset.y_train_df
val = dataset.y_val_df
cv_data = pd.concat([train, val], ignore_index=True)

horizon = 200
cutoffs = pd.date_range(start=val.iloc[0]["ds"], end=val.iloc[- horizon]["ds"], freq='80D')

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
    's': [50, 100, 200],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    s = params["s"]
    params_to_model = params.copy()
    del params_to_model["s"]
    m = Prophet(**params_to_model)
    m.add_seasonality(name=f"{s} days", period=s, fourier_order=10)
    m.fit(cv_data)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon=f'{horizon} days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

best_params = all_params[np.argmin(rmses)]
print(best_params)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/bxehce4_.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/csw_pk4z.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=29168', 'data', 'file=/tmp/tmpbwzdfy0n/bxehce4_.json', 'init=/tmp/tmpbwzdfy0n/csw_pk4z.json', 'output', 'file=/tmp/tmpbwzdfy0n/prophet_modelpzaxe3e9/prophet_model-20230507200008.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:00:08 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:00:08 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fc9b94ff880>
DEBUG:cmdstanpy:input tempfil

    changepoint_prior_scale  seasonality_prior_scale seasonality_mode    s  \
0                     0.001                     0.01         additive   50   
1                     0.001                     0.01         additive  100   
2                     0.001                     0.01         additive  200   
3                     0.001                     0.01   multiplicative   50   
4                     0.001                     0.01   multiplicative  100   
..                      ...                      ...              ...  ...   
91                    0.500                    10.00         additive  100   
92                    0.500                    10.00         additive  200   
93                    0.500                    10.00   multiplicative   50   
94                    0.500                    10.00   multiplicative  100   
95                    0.500                    10.00   multiplicative  200   

        rmse  
0   0.146668  
1   0.120540  
2   0.125485  
3  

In [None]:
dataset = sunspots

train = dataset.y_train_df
val = dataset.y_val_df
cv_data = pd.concat([train, val], ignore_index=True)

horizon = 3650
cutoffs = pd.date_range(start=val.iloc[0]["ds"], end=val.iloc[-120]["ds"], freq='60M')

cutoffs

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
    's': [int(10 * 365), int(10.5 * 365), int(10.8 * 365), int(11 * 365)],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    s = params["s"]
    params_to_model = params.copy()
    del params_to_model["s"]
    m = Prophet(**params_to_model, weekly_seasonality=False, daily_seasonality=False)
    m.add_seasonality(name=f"{s} days", period=s, fourier_order=10)
    m.fit(cv_data)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon=f'{horizon} days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

best_params = all_params[np.argmin(rmses)]
print(best_params)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
20:13:51 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:13:52 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fc9b92f3eb0>
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/oddffdsa.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/vayocrg_.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/absdsfvn.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/ytbd_rp_.json
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=44654', 'data', 'file=/tmp/tmpbwzdfy0n/vayocrg_.json', 'init=/tmp/tmpbwzdfy0n/absdsfvn.json', 'output

     changepoint_prior_scale  seasonality_prior_scale seasonality_mode     s  \
0                      0.001                     0.01         additive  3650   
1                      0.001                     0.01         additive  3832   
2                      0.001                     0.01         additive  3942   
3                      0.001                     0.01         additive  4015   
4                      0.001                     0.01   multiplicative  3650   
..                       ...                      ...              ...   ...   
123                    0.500                    10.00         additive  4015   
124                    0.500                    10.00   multiplicative  3650   
125                    0.500                    10.00   multiplicative  3832   
126                    0.500                    10.00   multiplicative  3942   
127                    0.500                    10.00   multiplicative  4015   

         rmse  
0    0.185491  
1    0.

In [None]:
dataset = temperature

train = dataset.y_train_df
val = dataset.y_val_df
cv_data = pd.concat([train, val], ignore_index=True)

horizon = 150
cutoffs = pd.date_range(start=val.iloc[0]["ds"], end=val.iloc[-150]["ds"], freq='70D')

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params)
    m.fit(cv_data)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon=f'{horizon} days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

best_params = all_params[np.argmin(rmses)]
print(best_params)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/dtpmw8kd.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/ba7auwqt.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=68680', 'data', 'file=/tmp/tmpbwzdfy0n/dtpmw8kd.json', 'init=/tmp/tmpbwzdfy0n/ba7auwqt.json', 'output', 'file=/tmp/tmpbwzdfy0n/prophet_modelzsoalad4/prophet_model-20230507203036.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:30:36 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:30:37 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Applying in parallel with <concurrent.futures.process.ProcessPoolExecutor object at 0x7fc9b94c38b0>
DEBUG:cmdstanpy:input tempfil

    changepoint_prior_scale  seasonality_prior_scale seasonality_mode  \
0                     0.001                     0.01         additive   
1                     0.001                     0.01   multiplicative   
2                     0.001                     0.10         additive   
3                     0.001                     0.10   multiplicative   
4                     0.001                     1.00         additive   
5                     0.001                     1.00   multiplicative   
6                     0.001                    10.00         additive   
7                     0.001                    10.00   multiplicative   
8                     0.010                     0.01         additive   
9                     0.010                     0.01   multiplicative   
10                    0.010                     0.10         additive   
11                    0.010                     0.10   multiplicative   
12                    0.010                     1.0

In [None]:
dataset = electricity

train = dataset.y_train_df
val = dataset.y_val_df
cv_data = pd.concat([train, val], ignore_index=True)

horizon = 14
cutoffs = pd.date_range(start=val.iloc[0]["ds"], end=val.iloc[-14*24]["ds"], freq='14D')

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params)
    m.fit(cv_data)  # Fit model with given params
    df_cv = cross_validation(m, cutoffs=cutoffs, horizon=f'{horizon} days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

best_params = all_params[np.argmin(rmses)]
print(best_params)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
20:45:04 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:45:08 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/nqbbtldf.json
20:45:10 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/39fx00mo.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpbwzdfy0n/qkwnfnk8.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=33316', 'data', 'file=/tmp/tmpbwzdfy0n/nqbbtldf.json', 'init=/tmp/tmpbwzdfy0n/qkwnfnk8.json', 'output', 'file=/tmp/tmpbwzdfy0n/prophet_modelhymx6dki/prophet_model-20230507204515.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:45:15 - cmdsta

    changepoint_prior_scale  seasonality_prior_scale seasonality_mode  \
0                     0.001                     0.01         additive   
1                     0.001                     0.01   multiplicative   
2                     0.001                     0.10         additive   
3                     0.001                     0.10   multiplicative   
4                     0.001                     1.00         additive   
5                     0.001                     1.00   multiplicative   
6                     0.001                    10.00         additive   
7                     0.001                    10.00   multiplicative   
8                     0.010                     0.01         additive   
9                     0.010                     0.01   multiplicative   
10                    0.010                     0.10         additive   
11                    0.010                     0.10   multiplicative   
12                    0.010                     1.0