In [None]:
import pandas as pd
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA
import statsmodels
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf
import numpy as np
from statsmodels.graphics.tsaplots import plot_predict
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
df = pd.read_csv('entities-2023-09-02_17 33 41.csv', usecols=['start', 'mean'])
df['start'] = pd.to_datetime(df['start'], format='%Y-%m-%d %H:%M:%S')
df['dow'] = df['start'].dt.dayofweek
df = df.set_index('start')
df = df.asfreq(freq='H')
df = df[['mean', 'dow']].tail(7*7*24)

In [None]:

df["pred"] = np.nan
df["pred_l"] = np.nan
df["pred_u"] = np.nan
start = 4*7*24
# train_period = 3*7*24
model = sm.tsa.statespace.SARIMAX(df['mean'][:start], exog=df['dow'][:start], order=(1, 1, 1), seasonal_order=(1,1,1,24))
results = model.fit()
for x in range(7*3):
    slice_start = start + 24 * x
    slice_end = slice_start + 24
    pred = results.get_prediction(slice_start, slice_end - 1, dynamic=True, exog=df['dow'][slice_start:slice_end])
    
    df.update(pred.predicted_mean.rename("pred"))
    conf_int = pred.conf_int(alpha=0.7)
    df.update(conf_int.rename(columns={"upper mean": "pred_u", "lower mean": "pred_l"}))
    results = results.append(df['mean'][slice_start:slice_end], exog=df['dow'][slice_start:slice_end], refit=True)


In [None]:
sum_error = 0
# for x in range(7*4):
#     slice_start = start + 24 * x
#     slice_end = slice_start + 24
#     print(f'{x} {df.iloc[slice_start].name}: {df["mean"][slice_start:slice_end].sum()} {df[f"pred"][slice_start:slice_end].sum()} {df["pred_l"][slice_start:slice_end].sum()} {df["pred_u"][slice_start:slice_end].sum()}')
#     sum_error += abs(df["mean"][slice_start:slice_end].sum() - df[f"pred"][slice_start:slice_end].sum())
# print(sum_error)

daily_sums = df[start:].resample('D').sum()
print(daily_sums[['mean', 'pred']])
print(mean_squared_error(daily_sums['mean'], daily_sums['pred']))
print(mean_absolute_error(daily_sums['mean'], daily_sums['pred']))
print(mean_absolute_error(df[start:]['mean'], df[start:]['pred']))

pyplot.figure()
fig, ax = pyplot.subplots(figsize=(15, 5))
df['mean'][start:].plot(ax=ax)
df['pred'][start:].plot(ax=ax)
(df['mean'][start:] - df['pred'][start:]).rolling(12).mean().plot(ax=ax)

ax.fill_between(df.index, df['pred_l'], df['pred_u'], color='k', alpha=0.1);    
pyplot.legend()
pyplot.show()

In [None]:
from skforecast.model_selection_sarimax import grid_search_sarimax
from skforecast.ForecasterSarimax import ForecasterSarimax
from pmdarima import ARIMA

forecaster = ForecasterSarimax(
                 regressor=ARIMA(order=(1, 1, 1), seasonal_order=(1, 1, 1, 24), maxiter=200),
             )

orders = []
seasonal_orders = []
for p in [0, 1, 2, 12]:
    for d in [0, 1, 2]:
        for q in [0, 1, 2]:
            orders.append((p, d, q))
            seasonal_orders.append((p, d, q, 24))

param_grid = {
    'order': orders,
    'seasonal_order': seasonal_orders,
    # 'trend': [None]
}

results_grid = grid_search_sarimax(
                   forecaster         = forecaster,
                   y                  = df['mean'],
                   exog               = df['dow'],
                   param_grid         = param_grid,
                   steps              = 24,
                   refit              = True,
                   metric             = 'mean_absolute_error',
                   initial_train_size = 24*7*4,
                   fixed_train_size   = False,
                   return_best        = True,
                   n_jobs             = 2,
                   verbose            = False,
                   show_progress      = True
               )

results_grid.head(5)