In [None]:
import numpy as np
import pandas as pd
import os
import sys
module_path = os.path.abspath(os.path.join('../custom_components/solar_battery_forecast/brains/'))
if module_path not in sys.path:
    sys.path.append(module_path)

from statsmodels.tsa.api import STLForecast
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.datasets import macrodata
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.statespace.exponential_smoothing import ExponentialSmoothing
import xgboost as xgb

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from load_forecaster import LoadForecaster
pd.set_option('display.max_rows', None)

# df = pd.read_csv('entities-2023-09-06_10 17 37.csv', usecols=['start', 'mean'])
df = pd.read_csv('entities-2023-09-02_17 33 41.csv', usecols=['start', 'mean'])
df['start'] = pd.to_datetime(df['start'], format='%Y-%m-%d %H:%M:%S')
df = df.set_index('start')
df = df.asfreq(freq='H')
df = df[['mean']].head(9*7*24)

exog = pd.DataFrame({'date': df.index})
exog = exog.set_index('date')
exog = exog.asfreq(freq='H')
exog['sin365'] = np.sin(2 * np.pi * exog.index.dayofweek / 7)
exog['cos365'] = np.cos(2 * np.pi * exog.index.dayofweek / 7)
exog['sin365_2'] = np.sin(4 * np.pi * exog.index.dayofweek / 7)
exog['cos365_2'] = np.cos(4 * np.pi * exog.index.dayofweek / 7)

train_period = 24 * 7 *6
for x in range(0, len(df) - train_period, 24):
# for x in range(1):
    train_start = x
    train_end = x + train_period
    print(f"Training from {train_start} - {train_end - 1}: {df.iloc[train_start].name} - {df.iloc[train_end - 1].name}")
    f = LoadForecaster()
    prediction, res = f.predict(df.iloc[train_start:train_end].copy(), exog.iloc[train_start:train_end], exog.iloc[train_end:train_end+24])
    df = df.combine_first(prediction)

    # print(res)
    res.loc[prediction.iloc[0].name, 'res'] = np.nan
    res['hour'] = res.index.hour
    res['dayofweek'] = res.index.dayofweek

    exogs = ['hour', 'dayofweek']
    for i in range(1, 48+1):
        res[f"lag_{i}"] = res['res'].shift(i)
        exogs.append(f"lag_{i}")
    X_train = res[:-1][exogs]
    y_train = res[:-1]['res']

    model = xgb.XGBRegressor(verbosity=0)
    model.fit(X_train, y_train, verbose=False)
    # xgb.plot_importance(model, height=0.9)

    for i in range(24):
        res_pred = model.predict(res[-1:][exogs])
        df.loc[prediction.iloc[i].name, 'xg_res'] = res_pred[0]
        # print(res[-1:])
        # print(res_pred)
        # print(df.loc[prediction.iloc[i].name]['xg_res'])
        if i < 23:
            # Fill in this residual (where previously there was nan)
            res.loc[prediction.iloc[i].name, 'res'] = res_pred[0]
            # Add a new row
            res.loc[prediction.iloc[i+1].name, 'res'] = np.nan
            res['hour'] = res.index.hour
            res['dayofweek'] = res.index.dayofweek
            for j in range(1, 48+1):
                res[f"lag_{j}"] = res['res'].shift(j)

    # for i in range(48):
    #     predicted_res = model.predict()

    # res = df.iloc[train_end:train_end+24]
    # res['res'] = res['mean'] - res['predicted']

df['predicted_xg'] = df['predicted'] + df['xg_res']
# print(df[['mean', 'predicted', 'xg_res', 'predicted_xg']])

In [None]:
pyplot.figure()
fig, ax = pyplot.subplots(figsize=(15, 5))
df[train_period-48:]['mean'].plot(ax=ax)
df[train_period-48:]['predicted'].plot(ax=ax)
# (df[train_period-48:]['mean'] - df[train_period-48:]['predicted']).plot(ax=ax)
ax.fill_between(df.index, df['predicted_lower'], df['predicted_upper'], color='k', alpha=0.1);  
pyplot.legend()
pyplot.show()

pyplot.figure()
fig, ax = pyplot.subplots(figsize=(15, 5))
df[train_period-48:]['mean'].plot(ax=ax)
df[train_period-48:]['predicted_xg'].plot(ax=ax)
# (df[train_period-48:]['mean'] - df[train_period-48:]['predicted']).plot(ax=ax)
# ax.fill_between(df.index, df['predicted_lower'], df['predicted_upper'], color='k', alpha=0.1);  
pyplot.legend()
pyplot.show()

daily_sums = df[train_period:].resample('D').sum()
print(daily_sums[['mean', 'predicted', 'predicted_xg', 'xg_res']])
print("Predicted")
print(mean_absolute_error(df[train_period:]['mean'], df[train_period:]['predicted']))
print(mean_squared_error(daily_sums['mean'], daily_sums['predicted']))
print(mean_absolute_error(daily_sums['mean'], daily_sums['predicted']))
print("XG")
print(mean_absolute_error(df[train_period:]['mean'], df[train_period:]['predicted_xg']))
print(mean_squared_error(daily_sums['mean'], daily_sums['predicted_xg']))
print(mean_absolute_error(daily_sums['mean'], daily_sums['predicted_xg']))

In [None]:
print(mean_absolute_error(df[train_period:]['mean'], df[train_period:]['predicted_xg']))
res = df[train_period:].copy()
res['res'] = res['mean'] - res['predicted']
pyplot.figure()
res = res[['res']]
res['res'].plot()

for i in range(1, 25):
    res[f'lag_{i}'] = res['res'].shift(i)

print(res)