In [None]:
import numpy as np
import pandas as pd
from statsmodels.tsa.api import STLForecast
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.datasets import macrodata
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.statespace.exponential_smoothing import ExponentialSmoothing
pd.set_option('display.max_rows', None)

# df = pd.read_csv('entities-2023-09-06_10 17 37.csv', usecols=['start', 'mean'])
df = pd.read_csv('entities-2023-09-02_17 33 41.csv', usecols=['start', 'mean'])
df['start'] = pd.to_datetime(df['start'], format='%Y-%m-%d %H:%M:%S')
df = df.set_index('start')
df = df.asfreq(freq='H')
df = df[['mean']].tail(9*7*24)

df['dow'] = df.index.dayofweek

df['mean_log'] = np.log(df['mean'])
# df['pred'] = np.nan


train_period = 24 * 7 * 6
for x in range(0, len(df) - train_period, 24):
    train_start = x
    train_end = x + train_period
    print(f"Training from {train_start} - {train_end}: {df.iloc[train_start].name} - {df.iloc[train_end].name}")
    # stlf = STLForecast(df[train_start:train_end]['mean_log'], SARIMAX, model_kwargs={"order": (1, 1, 1)})
    stlf = STLForecast(df[train_start:train_end]['mean_log'], SARIMAX, model_kwargs={"order": (2, 1, 1), 'enforce_invertibility': False, 'enforce_stationarity': False})
    # stlf = STLForecast(df[train_start:train_end]['mean_log'], ExponentialSmoothing, model_kwargs={'trend': True})
    # stlf = STLForecast(df[train_start:train_end]['mean_log'], ARIMA, model_kwargs={"order": (2, 1, 0)})
    res = stlf.fit()
    # res = stlf.fit(fit_kwargs={'gls': True})
    # forecasts = res.get_prediction(train_end, train_end + 24)
    # df.update(forecasts.predicted_mean.rename('pred').to_frame())
    # forecasts = res.forecast(24)
    predic = res.get_prediction(start=df.iloc[train_end].name, end=df.iloc[train_end].name + pd.DateOffset(hours=23))
    # predic = res.get_prediction(start=df.iloc[train_end].name, end=df.iloc[train_end].name + pd.DateOffset(hours=23))
    # forecasts = res.forecast(24, exog=df[train_end:train_end+24]['dow'])
    # df.update(np.exp(forecasts.rename('pred').to_frame()))
    # print(np.exp(forecasts.rename('pred').to_frame()))
    pred = np.exp(predic.predicted_mean.rename('pred').to_frame())
    # pred = np.exp(forecasts.rename('pred').to_frame())
    # print(f"Pred from {pred.iloc[0].name} - {pred.iloc[-1].name}")
    # print(pred)
    conf_int = predic.conf_int(alpha=0.5)
    conf_int = np.exp(conf_int.rename(columns={"upper": "pred_u", "lower": "pred_l"}))
    # print(conf_int)
    # df = pred.combine_first(df)
    df = pred.combine_first(conf_int).combine_first(df)

    # df = df.merge(np.exp(forecasts.rename('pred').to_frame()), how='outer', left_index=True, right_index=True, suffixes=(None, None))
    # print(df[train_start:train_end])
    # print(forecasts.predicted_mean)
    # break




In [None]:
pyplot.figure()
fig, ax = pyplot.subplots(figsize=(15, 5))
df[train_period-24:]['mean'].plot(ax=ax)
df[train_period-24:]['pred'].plot(ax=ax)
ax.fill_between(df.index, df['pred_l'], df['pred_u'], color='k', alpha=0.1);  
pyplot.legend()
pyplot.show()

daily_sums = df[train_period:].resample('D').sum()
print(daily_sums)
print(mean_squared_error(daily_sums['mean'], daily_sums['pred']))
print(mean_absolute_error(daily_sums['mean'], daily_sums['pred']))
print(mean_absolute_error(df[train_period:]['mean'], df[train_period:]['pred']))