In [None]:
import numpy as np
import pandas as pd
import os
import sys

from statsmodels.tsa.api import STLForecast
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.datasets import macrodata
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.statespace.exponential_smoothing import ExponentialSmoothing
import xgboost as xgb

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_rows', None)

df = pd.read_csv('entities-2023-09-02_17 33 41.csv', usecols=['start', 'mean'])
df['start'] = pd.to_datetime(df['start'], format='%Y-%m-%d %H:%M:%S')
df = df.set_index('start')
df = df.asfreq(freq='H').resample('D').sum()
df = df[['mean']].head(9*7*24)

train_period = 7 * 6
for x in range(0, len(df) - train_period):
# for x in range(1):
    train_start = x
    train_end = x + train_period
    print(f"Training from {train_start} - {train_end - 1}: {df.iloc[train_start].name} - {df.iloc[train_end - 1].name}")

    model = STLForecast(
        df.iloc[train_start:train_end]["mean"],
        SARIMAX,
        model_kwargs={
            "order": (4, 3, 4),
            "enforce_invertibility": False,
            "enforce_stationarity": False,
        },
    )
    results = model.fit(fit_kwargs={"disp": False, "warn_convergence": False})
    print(results.period)
    # results.model_result.plot_diagnostics()
    model_prediction = results.get_prediction(
        start=df.iloc[train_end].name,
        end=df.iloc[train_end].name
    )
    predicted_mean: pd.DataFrame = model_prediction.predicted_mean.rename("predicted").to_frame()
    df = df.combine_first(predicted_mean)
# print(df)



In [None]:
pyplot.figure()
fig, ax = pyplot.subplots(figsize=(15, 5))
df[train_period:]['mean'].plot(ax=ax)
df[train_period:]['predicted'].plot(ax=ax)
# (df[train_period-48:]['mean'] - df[train_period-48:]['predicted']).plot(ax=ax)
# ax.fill_between(df.index, df['predicted_lower'], df['predicted_upper'], color='k', alpha=0.1);  
pyplot.legend()
pyplot.show()

# daily_sums = df[train_period:].resample('D').sum()
print(df[train_period:][['mean', 'predicted']])
print("Predicted")
print(mean_absolute_error(df[train_period:]['mean'], df[train_period:]['predicted']))
print(mean_squared_error(df[train_period:]['mean'], df[train_period:]['predicted']))