In [0]:
%pip install -r requirements.txt
%restart_python

## Model Evaluation for Total Household Deposits

This notebook evaluates the final forecast models for each horizon using the hold-out test set (the last 12 months of data). Evaluation is performed using a **backtesting** approach: for each point in the test set, the model is trained on all available data up to time _t - h_ and used to predict the value at time _t_, where _h_ is the forecast horizon.

Performance metrics used for assessing robustness and accuracy on unseen data:
- **Root Mean Squared Error (RMSE)**
- **Mean Absolute Error (MAE)**, and
- **Maximum Absolute Error** 


In [0]:
from sktime.split import temporal_train_test_split
from sktime.forecasting.base import ForecastingHorizon
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error, mean_squared_error
from sktime.forecasting.residual_booster import ResidualBoostingForecaster
from sklearn.ensemble import GradientBoostingRegressor
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.ets import AutoETS
from sktime.forecasting.model_evaluation import evaluate
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.forecasting.compose import TransformedTargetForecaster
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
import random

# Set seeds
random.seed(42)
np.random.seed(42)

In [0]:
def create_features(df, target_col, predictor_cols, lags=[1,3,4, 6,12], rolling_windows=[3,4, 6,12]):
    """
    Creates lag-based and rolling statistical features for both the target and predictor columns.

    Parameters:
    - df (pd.DataFrame): Input DataFrame with 'date', target, and predictor columns
    - target_col (str): Name of the target column
    - predictor_cols (list): List of predictor column names
    - lags (list): Lags to apply for lag, diff, and pct change features
    - rolling_windows (list): Window sizes for rolling statistics (mean, std, min, max)

    Returns:
    - pd.DataFrame: DataFrame with engineered features and datetime features (year, month, quarter)
    """
    # Ensure date is in datetime format
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')
    df.set_index('date', inplace=True)

    new_features = {}

    # For each predictor (and target), compute lags, rolling stats, etc.
    for col in [target_col] + predictor_cols:
        # Lag features
        for lag in lags:
            new_features[f'{col}_lag{lag}'] = df[col].shift(lag) # lag

        # Differences and pct changes
        for lag in lags:
            new_features[f'{col}_diff{lag}'] = df[col].diff(lag) # difference
            if (df[col] != 0).all():
                new_features[f'{col}_roc{lag}'] = df[col].pct_change(lag) # rate of change

        # Rolling stats
        for win in rolling_windows:
            new_features[f'{col}_ma{win}'] = df[col].rolling(win).mean()
            new_features[f'{col}_std{win}'] = df[col].rolling(win).std()
            new_features[f'{col}_min{win}'] = df[col].rolling(win).min()
            new_features[f'{col}_max{win}'] = df[col].rolling(win).max()

    # Combine original and new features        
    df_new = pd.concat([df]+ [pd.DataFrame(new_features, index=df.index)], axis=1)

    # Add datetime features
    df_new['year'] = df_new.index.year
    df_new['month'] = df_new.index.month
    df_new['quarter'] = df_new.index.quarter

    # Drop rows with missing values caused by shifting/rolling
    df_new = df_new.dropna()
    df_new.reset_index(inplace=True)

    return df_new

def prepare_features(df, target, lags, rolling_windows, use_cols):
    """
    Wrapper around create_features to:
    - Generate features
    - Subset to required columns
    - Set proper monthly period index

    Parameters:
    - df (pd.DataFrame): Input DataFrame with 'date', target and predictors
    - target (str): Target column name
    - lags (list): List of lag values
    - rolling_windows (list): List of rolling window sizes
    - use_cols (list): Final features to retain

    Returns:
    - pd.DataFrame: Processed and indexed DataFrame ready for forecasting
    """

    predictor_cols = [x for x in df.columns if x not in ['date', target]]
    df = create_features(df, target, predictor_cols, lags=lags, rolling_windows=rolling_windows)
    df = df[['date', target] + use_cols].copy()
    df = df.set_index('date').asfreq('MS')  # Set frequency to monthly start
    df.index = df.index.to_period("M")  # Use PeriodIndex (monthly) as required for sktime
    return df

def build_forecaster(sp, n_estimators, learning_rate, max_depth, window_length, min_samples_leaf, subsample):
    """
    Builds the combined forecasting model (AutoETS + Gradient Boosting residual corrector).

    Parameters:
    - sp (int): Seasonal period (e.g. 12 for monthly data with yearly seasonality)
    - n_estimators (int): Number of boosting trees
    - learning_rate (float): Learning rate for GBT
    - max_depth (int): Max tree depth
    - window_length (int): Number of past time points used in the residual booster
    - min_samples_leaf (int): Minimum samples in leaf node
    - subsample (float): Fraction of samples used in boosting
     
    Returns:
    - sktime.forecasting.compose._pipeline.ForecasterPipeline: A composite forecaster object
    """
    base_forecaster = AutoETS(auto=True, sp=sp, n_jobs=-1)

    regressor = GradientBoostingRegressor(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        min_samples_leaf = min_samples_leaf, 
        subsample = subsample, 
        random_state=42
    )
    
    residual_model = make_reduction(regressor, window_length=window_length, strategy="direct")
    residual_forecaster = TransformedTargetForecaster([
        ("scaler", MinMaxScaler()),
        ("regressor", residual_model)
    ])

    full_forecaster = ResidualBoostingForecaster(base_forecaster, residual_forecaster)
    return full_forecaster

def plot_forecast_results(results_df, title="Forecast vs Actual"):
    """
    Plots actual vs. predicted values along with RMSE, MAPE, and Max Error.

    Parameters:
    - results_df (pd.DataFrame): DataFrame with columns ["y_true", "y_pred"] and a datetime index
    - title (str): Plot title

    Returns:
    - tuple: (RMSE, MAPE, Max Error) as floats
    """

    y_true = results_df["y_true"]
    y_pred = results_df["y_pred"]

    # Calculate error metrics
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    max_error = np.max(np.abs(y_true - y_pred))

    # Create plot
    plt.figure(figsize=(15, 5))
    plt.plot(results_df.index, y_true, label="Actual", marker='o', linestyle='--', color='black')
    plt.plot(results_df.index, y_pred, label=f"Predicted RMSE={rmse:.1f}, MAPE={mape:.2f}%, Max Err={max_error:.1f}",
             marker='o', color='tab:green')
    
    # Annotate forecast errors above or below the points
    for i in range(len(results_df)):
        date = results_df.index[i]
        actual = y_true[i]
        predicted = y_pred[i]
        diff = actual - predicted
        plt.text(date, max(actual, predicted) + 1000, f"{diff:+.1f}", color='red', fontsize=12,
                 ha='center', va='bottom' if diff > 0 else 'top')

    # Axis and plot settings
    plt.xlabel("Date")
    plt.ylabel("Household Deposits (NZDm)")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

    return rmse, mape, max_error

def run_residual_boosting_pipeline(raw_df, target='household_deposits', use_cols=[], lags=[], rolling_windows=[], test_size=12, window_length=160, h=2, sp=12, n_estimators=200, learning_rate=0.01, max_depth=4, min_samples_leaf = 1, subsample = 1):

    """
    End-to-end pipeline for evaluating the Residual Boosting Forecasting model
    using AutoETS as base model and GradientBoosting for residual correction.

    Parameters:
    - raw_df (pd.DataFrame): Raw input DataFrame containing 'date', target, and predictors
    - target (str): Target variable name
    - use_cols (list): Features to use based on the SHAP feature selection
    - lags (list): Lags to use based on the SHAP feature selection
    - rolling_windows (list): Rolling window sizes for rolling features
    - test_size (int): Number of months to reserve for test set (must be >= h)
    - window_length (int): Number of past points used in GBT residual correction
    - h (int): Forecast horizon (e.g. 2 = 2-month ahead, from the data perpsective)
    - sp (int): Seasonal period (set to 12 for the deposits data as strong yearly seasonaility observed)
    - n_estimators, learning_rate, max_depth, min_samples_leaf, subsample: parameters for GBT

    Returns:
    - plots the actual versus prediction (plot not returned)
    - tuple: (RMSE, MAPE, Max Error) from forecast evaluation
    """

    # Data preparation
    df = prepare_features(raw_df, target, lags, rolling_windows, use_cols)

    # Train-test split
    y_train, y_test = temporal_train_test_split(df, test_size=test_size)

    # Build forecasting pipeline
    forecaster = build_forecaster(sp, n_estimators, learning_rate, max_depth, window_length, min_samples_leaf, subsample)

    # Define expanding CV
    splitter = ExpandingWindowSplitter(
        initial_window=len(y_train) - h + 1,
        step_length=1,
        fh=[h]
    )

    # Evaluate with cross-validation
    cv_results = evaluate(
        forecaster=forecaster,
        y=df[target],
        X=df.drop(columns=[target]),
        cv=splitter,
        strategy="refit",
        return_data=True,
    )

    # Extract predictions
    results = pd.DataFrame({
        "y_true": [s.values[0] for s in cv_results["y_test"]],
        "y_pred": [s.values[0] for s in cv_results["y_pred"]]
    })
    results.index = y_test.index
    results.index = results.index.to_timestamp()

    # Plot results
    return plot_forecast_results(results, title="AutoETS + Residual Boosting Forecast")



In [0]:
## Read Data
raw_df = pd.read_csv('../data/processed/combined_total_household_data_interpolate.csv')
raw_df = raw_df.sort_values('date')

# Read config
with open("model_configs.json", "r") as f:
    param_dict = json.load(f)



## Evaluation for 2-Month Horizon Prediction

In [0]:
# initialisation
h = 2
test_size = 12
horizon_params = param_dict[f'model_forecast_h{h}']
predictors = horizon_params['predictors']
target = 'household_deposits'

# evaluation for 2-month horizon
rmse, mape, max_error = run_residual_boosting_pipeline(
    raw_df=raw_df[['date',target]+predictors],
    use_cols=horizon_params['best_features'],
    lags=horizon_params['best_lags'],
    rolling_windows=horizon_params['best_rolling_windows'],
    test_size=test_size,
    window_length=horizon_params['window_length'],
    h=h,
    sp=horizon_params['sp'],
    n_estimators=horizon_params['n_estimators'],
    learning_rate=horizon_params['learning_rate'],
    max_depth=horizon_params['max_depth'],
    min_samples_leaf = horizon_params['min_samples_leaf'],
    subsample = horizon_params['subsample']
)

print(f"Evaluation Results for Horizon {h}")
print(f"RMSE: {round(rmse,2)}")
print(f"MAPE: {round(mape,2)}%")
print(f"Max Error: {round(max_error, 2)}")

## Evaluation for 5-Month Horizon Prediction

In [0]:
# initialisation
h = 5
test_size = 12
horizon_params = param_dict[f'model_forecast_h{h}']
predictors = horizon_params['predictors']
target = 'household_deposits'

#  evaluation for 5-month horizon 
rmse, mape, max_error = run_residual_boosting_pipeline(
    raw_df=raw_df[['date',target]+predictors],
    use_cols=horizon_params['best_features'],
    lags=horizon_params['best_lags'],
    rolling_windows=horizon_params['best_rolling_windows'],
    test_size=test_size,
    window_length=horizon_params['window_length'],
    h=h,
    sp=12,#horizon_params['sp'],
    n_estimators=horizon_params['n_estimators'],
    learning_rate=horizon_params['learning_rate'],
    max_depth=horizon_params['max_depth'],
    min_samples_leaf = horizon_params['min_samples_leaf'],
    subsample = horizon_params['subsample']
)

print(f"Evaluation Results for Horizon {h}")
print(f"RMSE: {round(rmse,2)}")
print(f"MAPE: {round(mape,2)}%")
print(f"Max Error: {round(max_error, 2)}")

## Evaluation for 8-Month Horizon Prediction

In [0]:
# initialisation
h = 8
test_size = 12
horizon_params = param_dict[f'model_forecast_h{h}']
predictors = horizon_params['predictors']
target = 'household_deposits'

#  evaluation for 8-month horizon
rmse, mape, max_error = run_residual_boosting_pipeline(
    raw_df=raw_df[['date',target]+predictors],
    use_cols=horizon_params['best_features'],
    lags=horizon_params['best_lags'],
    rolling_windows=horizon_params['best_rolling_windows'],
    test_size=test_size,
    window_length=horizon_params['window_length'],
    h=h,
    sp=12,#horizon_params['sp'],
    n_estimators=horizon_params['n_estimators'],
    learning_rate=horizon_params['learning_rate'],
    max_depth=horizon_params['max_depth'],
    min_samples_leaf = horizon_params['min_samples_leaf'],
    subsample = horizon_params['subsample']
)

print(f"Evaluation Results for Horizon {h}")
print(f"RMSE: {round(rmse,2)}")
print(f"MAPE: {round(mape,2)}%")
print(f"Max Error: {round(max_error, 2)}")


## Evaluation for 14-Month Horizon Prediction

In [0]:
# initialisation
h = 14
test_size = 12
horizon_params = param_dict[f'model_forecast_h{h}']
predictors = horizon_params['predictors']
target = 'household_deposits'

#  evaluation for 14-month horizon
rmse, mape, max_error = run_residual_boosting_pipeline(
    raw_df=raw_df[['date',target]+predictors],
    use_cols=horizon_params['best_features'],
    lags=horizon_params['best_lags'],
    rolling_windows=horizon_params['best_rolling_windows'],
    test_size=test_size,
    window_length=horizon_params['window_length'],
    h=h,
    sp=12,
    n_estimators=horizon_params['n_estimators'],
    learning_rate=horizon_params['learning_rate'],
    max_depth=horizon_params['max_depth'],
    min_samples_leaf = horizon_params['min_samples_leaf'],
    subsample = horizon_params['subsample']
)

print(f"Evaluation Results for Horizon {h}")
print(f"RMSE: {round(rmse,2)}")
print(f"MAPE: {round(mape,2)}%")
print(f"Max Error: {round(max_error, 2)}")