# Update Refit and Evaluate Test

- load airlines
- define forecaster
- wrap in update*
- use evaluate
- visualise the forecasts as well to check

In [None]:
from sktime.datasets import load_airline
from sktime.utils.plotting import plot_series
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.stream import (
    UpdateEvery,
    UpdateRefitsEvery,
    DontUpdate
)
from sktime.forecasting.model_selection import (
    ExpandingWindowSplitter,
    SlidingWindowSplitter
)
from sktime.forecasting.model_evaluation import evaluate

import numpy as np
import pandas as pd


In [None]:
y = load_airline()

# plotting for visualization
plot_series(y)

In [None]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

need to change y to period index for this to run

In [None]:
y = y.reset_index(drop=True)
y

doesn't seem to work anyway

naive forecaster is runs with UpdatEvery

- updates every time whilst in evaulate with strategy='update'
- it should be running with update_params=False every three intervals

In [None]:
mod = NaiveForecaster()
# up_mod = UpdateEvery(mod, update_interval=pd.Timedelta(31, "D"))
up_mod = UpdateEvery(mod, update_interval=3)
fh = np.arange(1, 7)
cv = ExpandingWindowSplitter(fh=fh, initial_window=12, step_length=1)
# cv = SlidingWindowSplitter(fh=fh, window_length=12, step_length=1)

df = evaluate(
    forecaster=up_mod,
    y=y,
    cv=cv,
    strategy="update",
    return_data=True,
    error_score='raise'
)


fig, ax = plot_series(
    y.iloc[1:20],
    df["y_pred"].iloc[0],
    df["y_pred"].iloc[1],
    df["y_pred"].iloc[2],
    df["y_pred"].iloc[3],
    df["y_pred"].iloc[4],
    df["y_pred"].iloc[5],
    markers=["o", "", "", "", "", "", ""],
    labels=["y_true"] + ["y_pred (Backtest " + str(x) + ")" for x in range(6)],
)
ax.legend()


Trying to use naive forecaster with UpdateRefitsEvery

- raises a bug here

In [None]:
mod = NaiveForecaster()
# up_mod = UpdateEvery(mod, update_interval=pd.Timedelta(31, "D"))
up_mod = UpdateRefitsEvery(mod, refit_interval=3)
fh = np.arange(1, 7)
cv = ExpandingWindowSplitter(fh=fh, initial_window=12, step_length=1)

df = evaluate(
    forecaster=up_mod,
    y=y,
    cv=cv,
    strategy="update",
    return_data=True,
    error_score='raise'
)


fig, ax = plot_series(
    y.iloc[1:20],
    df["y_pred"].iloc[0],
    df["y_pred"].iloc[1],
    df["y_pred"].iloc[2],
    df["y_pred"].iloc[3],
    df["y_pred"].iloc[4],
    df["y_pred"].iloc[5],
    markers=["o", "", "", "", "", "", ""],
    labels=["y_true"] + ["y_pred (Backtest " + str(x) + ")" for x in range(6)],
)
ax.legend()

let's try with an ARIMA model

In [None]:
from sktime.forecasting.arima import ARIMA

- this seems to run as expected
- the model is only updated with the most recent data every three intervals

In [None]:
mod = ARIMA(order=(1,0,0))
up_mod = UpdateEvery(mod, update_interval=3)
fh = np.arange(1, 7)
cv = ExpandingWindowSplitter(fh=fh, initial_window=12, step_length=1)

df = evaluate(
    forecaster=up_mod,
    y=y,
    cv=cv,
    strategy="update",
    return_data=True,
    error_score='raise'
)


fig, ax = plot_series(
    y.iloc[1:20],
    df["y_pred"].iloc[0],
    df["y_pred"].iloc[1],
    df["y_pred"].iloc[2],
    df["y_pred"].iloc[3],
    df["y_pred"].iloc[4],
    df["y_pred"].iloc[5],
    markers=["o", "", "", "", "", "", ""],
    labels=["y_true"] + ["y_pred (Backtest " + str(x) + ")" for x in range(6)],
)
ax.legend()


Now with the UpdateRefitsEvery

- not working as expectd, even with bugfixes in the source code of UpdateRefitsEvery

In [None]:
mod = ARIMA(order=(1,0,0))
up_mod = UpdateRefitsEvery(mod, refit_interval=3)
fh = np.arange(1, 7)
cv = ExpandingWindowSplitter(fh=fh, initial_window=12, step_length=1)

df = evaluate(
    forecaster=up_mod,
    y=y,
    cv=cv,
    strategy="update",
    return_data=True,
    error_score='raise'
)


fig, ax = plot_series(
    y.iloc[1:20],
    df["y_pred"].iloc[0],
    df["y_pred"].iloc[1],
    df["y_pred"].iloc[2],
    df["y_pred"].iloc[3],
    df["y_pred"].iloc[4],
    df["y_pred"].iloc[5],
    markers=["o", "", "", "", "", "", ""],
    labels=["y_true"] + ["y_pred (Backtest " + str(x) + ")" for x in range(6)],
)
ax.legend()


- coefficients are being updated correctly with the bugfix
- but the data/predictions aren't moving forward in time

In [None]:
mod = ARIMA(order=(1,0,0))
up_mod = UpdateRefitsEvery(mod, refit_interval=3)
# up_mod = mod
fh = np.arange(1, 7)
cv = ExpandingWindowSplitter(fh=fh, initial_window=12, step_length=1)

cv = list(cv.split(y))
for i in range(len(cv)):
    
    if not up_mod.is_fitted:
        indx_train = cv[i][0]
        y_train = y.iloc[indx_train].copy()
        up_mod.fit(y_train)
    else:
        indx_train = np.setxor1d(cv[i][0], cv[i-1][0])
        # todo: update params every X iters
        y_new = y.iloc[indx_train].copy()
        up_mod.update(
            y_new,
            update_params=False
        )

    print(up_mod.cutoff)
    print(up_mod.predict(fh=fh[0]))
    print(up_mod.forecaster_.get_fitted_params())
    # print(up_mod.get_fitted_params())

- update doesn't even work as expected

In [None]:
mod = ARIMA(order=(1,0,0))
fh = np.arange(1, 7)
cv = ExpandingWindowSplitter(fh=fh, initial_window=12, step_length=1)

cv = list(cv.split(y))
for i in range(len(cv)):
    
    if not mod.is_fitted:
        indx_train = cv[i][0]
        y_train = y.iloc[indx_train].copy()
        mod.fit(y_train)
    else:
        indx_train = np.setxor1d(cv[i][0], cv[i-1][0])
        # todo: update params every X iters
        y_new = y.iloc[indx_train].copy()
        mod.update(
            y_new,
            update_params=True,
            maxiter=0
        )

    print(mod.cutoff)
    print(mod.predict(fh=fh[0:2]))