In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
import random 
from itertools import cycle
import os
import json
import joblib

#establish GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

#configuration
input_width = 1
output_width = 1
offset = 1
batch_size = 128 
epochs = 1000

patience = 180  #number of epochs with no improvement after which training will be stopped
min_delta = 1e-5  #minimum change in the monitored quantity to qualify as an improvement

#load datasets
csv_paths = [f"Unit2_dataset_rev1_{i}.csv" for i in range(1, 8)] + \
            [f"Unit4_dataset_rev1_{i}.csv" for i in range(1, 9)]

datasets_raw = []  #list of (X_raw, Y_raw) tuples

for path in csv_paths:
    df = pd.read_csv(path)
    X_raw = df[[f"X{i}" for i in range(1, 13)]].values  #12 inputs
    Y_raw = df[["Y1", "Y2"]].values  #Y1 = heat load; Y2 = efficiency
    datasets_raw.append((X_raw, Y_raw))

#split data (70/20/10)
datasets_split = []  #list of (X_train, X_val, X_test, Y_train, Y_val, Y_test)

for X_raw, Y_raw in datasets_raw:
    n = len(X_raw)
    train_end = int(n * 0.7)
    val_end = int(n * 0.9)

    X_train = X_raw[:train_end]
    X_val   = X_raw[train_end:val_end]
    X_test  = X_raw[val_end:]

    Y_train = Y_raw[:train_end]
    Y_val   = Y_raw[train_end:val_end]
    Y_test  = Y_raw[val_end:]

    datasets_split.append((X_train, X_val, X_test, Y_train, Y_val, Y_test))

#global scaler on all train data
X_train_all = np.concatenate([X_train for (X_train, X_val, X_test, Y_train, Y_val, Y_test) in datasets_split], axis=0)
Y_train_all = np.concatenate([Y_train for (X_train, X_val, X_test, Y_train, Y_val, Y_test) in datasets_split], axis=0)

X_scaler_global = MinMaxScaler()
Y_scaler_global = RobustScaler()

X_scaler_global.fit(X_train_all)
Y_scaler_global.fit(Y_train_all)


#scaling (fit only on train, transform train/val/test)
datasets_scaled = []  #list of (X_train_scaled, X_val_scaled, X_test_scaled, Y_train_scaled, Y_val_scaled, Y_test_scaled)


# >>> CHANGED <<<  # Use global scalers; no per-dataset fit
datasets_scaled = []  # (X_train_scaled, X_val_scaled, X_test_scaled, Y_train_scaled, Y_val_scaled, Y_test_scaled)

for X_train, X_val, X_test, Y_train, Y_val, Y_test in datasets_split:
    X_train_scaled = X_scaler_global.transform(X_train)
    X_val_scaled   = X_scaler_global.transform(X_val)
    X_test_scaled  = X_scaler_global.transform(X_test)

    Y_train_scaled = Y_scaler_global.transform(Y_train)
    Y_val_scaled   = Y_scaler_global.transform(Y_val)
    Y_test_scaled  = Y_scaler_global.transform(Y_test)

    datasets_scaled.append((X_train_scaled, X_val_scaled, X_test_scaled, Y_train_scaled, Y_val_scaled, Y_test_scaled))

    
#sliding window function
def make_windows(x_scaled, y_scaled):
    #initialize lists for input and target windows
    x_windows = []
    y_windows = []

    #maximum valid start index for a full window
    max_start_idx = len(x_scaled) - input_width - offset - output_width + 1

    #generate overlapping windows
    for start_idx in range(max_start_idx):
        x_window = x_scaled[start_idx : start_idx + input_width, :].flatten()

        y_start = start_idx + input_width - 1 + offset
        y_end = y_start + output_width
        y_window = y_scaled[y_start:y_end].flatten()

        x_windows.append(x_window)
        y_windows.append(y_window)

    #convert lists to numpy arrays
    X_windows = np.array(x_windows, dtype=np.float32)
    Y_windows = np.array(y_windows, dtype=np.float32)
    return X_windows, Y_windows


#apply sliding window to each set of data
datasets_windowed = []  #list of (X_train_win, X_val_win, X_test_win, Y_train_win, Y_val_win, Y_test_win)

for X_train, X_val, X_test, Y_train, Y_val, Y_test in datasets_scaled:
    X_train_win, Y_train_win = make_windows(X_train, Y_train)
    X_val_win,   Y_val_win   = make_windows(X_val,   Y_val)
    X_test_win,  Y_test_win  = make_windows(X_test,  Y_test)

    datasets_windowed.append((X_train_win, X_val_win, X_test_win,Y_train_win, Y_val_win, Y_test_win))

#torch tensors and dataloaders; tensor = multidimensional array optimised for GPU; dataloader = makes batches
train_loaders = []
val_loaders = []
test_loaders = []

for X_train, X_val, X_test, Y_train, Y_val, Y_test in datasets_windowed:
    train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(Y_train))
    val_dataset   = TensorDataset(torch.tensor(X_val),   torch.tensor(Y_val))
    test_dataset  = TensorDataset(torch.tensor(X_test),  torch.tensor(Y_test))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

    train_loaders.append(train_loader)
    val_loaders.append(val_loader)
    test_loaders.append(test_loader)

#MLP class definition
#dropout for regularisation
class MLP(nn.Module):
    def __init__(self, din, dout, num_neurons, dropout_rate=0.2):
        super().__init__()
        self.fc1 = nn.Linear(din, num_neurons)
        self.fc2 = nn.Linear(num_neurons, num_neurons)
        self.fc3 = nn.Linear(num_neurons, num_neurons)
       # self.fc4 = nn.Linear(num_neurons, num_neurons)
       # self.fc5 = nn.Linear(num_neurons, num_neurons)
       # self.fc6 = nn.Linear(num_neurons, num_neurons)
        self.fc7 = nn.Linear(num_neurons, dout)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        h = nn.functional.leaky_relu(self.fc1(x))
        h = self.dropout(h)
        h = nn.functional.leaky_relu(self.fc2(h))
        h = self.dropout(h)
        h = nn.functional.leaky_relu(self.fc3(h))
        h = self.dropout(h)
     #   h = nn.functional.leaky_relu(self.fc4(h))
      #  h = self.dropout(h)
       # h = nn.functional.leaky_relu(self.fc5(h))
      #  h = self.dropout(h)
       # h = nn.functional.leaky_relu(self.fc6(h))
       # h = self.dropout(h)
        h = self.fc7(h)
        return h
    
#model initialisation
num_input_features = 12  #X1 to X12
input_dim = input_width * num_input_features  
output_dim = output_width * 2  #2 outputs
hidden_neurons = 64

model = MLP(input_dim, output_dim, hidden_neurons, dropout_rate=0.2).to(device)

#Adam uses momentum - better than SGD
#weight decay helps prevent overfitting by penalising large weights
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.5e-4)

#times LR by factor if no improvement after patience 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=20, min_lr=1e-6
)

#appropriate for outliers - precision of MSE for small errors and robustness of MAE for outliers
#SmoothL1(x) = 
#     0.5 * x²     if |x| < 1
#    |x| - 0.5     if |x| ≥ 1
criterion = torch.nn.SmoothL1Loss()

best_val_loss = float('inf')
epochs_no_improve = 0
best_model_state = None


from itertools import zip_longest

#interleaved A1 B1 C1 -> A2 B2 C2
def round_robin_batches(loaders):
    #convert each DataLoader to a list of batches
    loaders_batches = [list(loader) for loader in loaders]

    #zip_longest interleaves them round-robin style
    for batch_group in zip_longest(*loaders_batches):
        for batch in batch_group:
            if batch is not None:
                yield batch

train_losses = []
val_losses = []
best_epoch = None
lr_history = []


#train loop
for epoch in range(epochs):
    total_train_loss = 0.0
    total_train_samples = 0
    model.train()

    round_robin_iter = round_robin_batches(train_loaders)

    for batch_X, batch_Y in round_robin_iter:
        batch_X = batch_X.to(device)
        batch_Y = batch_Y.to(device)

        predictions = model(batch_X)
        loss = criterion(predictions, batch_Y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_train_loss += loss.item() * batch_X.size(0)
        total_train_samples += batch_X.size(0)

    avg_train_loss = total_train_loss / total_train_samples
    

    #validation
    model.eval()
    total_val_loss = 0.0
    total_val_samples = 0

    with torch.no_grad():
        for val_loader in val_loaders:
            for batch_X, batch_Y in val_loader:
                batch_X = batch_X.to(device)
                batch_Y = batch_Y.to(device)

                val_preds = model(batch_X)
                val_loss = criterion(val_preds, batch_Y)

                total_val_loss += val_loss.item() * batch_X.size(0)
                total_val_samples += batch_X.size(0)

    avg_val_loss = total_val_loss / total_val_samples
    scheduler.step(avg_val_loss)

    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    current_lr = optimizer.param_groups[0]['lr']
    lr_history.append(current_lr)   

    
    #early stopping
    if avg_val_loss + min_delta < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
        best_model_state = model.state_dict()
        best_epoch = epoch
    else:
        epochs_no_improve += 1

    if epoch % 50 == 0 or epoch == epochs - 1:
        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {avg_train_loss:.6f} - Val Loss: {avg_val_loss:.6f}")

    if epochs_no_improve >= patience:
        print(f"Early stopping triggered at epoch {epoch+1}")
        break

#restore best model
if best_model_state is not None:
    model.load_state_dict(best_model_state)


"""
# Directory to save artifacts
os.makedirs("artifacts", exist_ok=True)

# Save model state_dict
MODEL_PATH = "artifacts/mlp_boiler_state_dict.pth"
torch.save(model.state_dict(), MODEL_PATH)

# Save config needed for inference
config = {
    "input_width": int(input_width),
    "output_width": int(output_width),
    "num_input_features": int(num_input_features),
    "hidden_neurons": int(hidden_neurons),
    "output_dim": int(output_dim),
    "offset": int(offset)
}
CONFIG_PATH = "artifacts/model_config.json"
with open(CONFIG_PATH, "w") as f:
    json.dump(config, f, indent=2)

print(f"Saved model to {MODEL_PATH}")
print(f"Saved config to {CONFIG_PATH}")

# >>> NEW <<<  # Save global scalers for inference
joblib.dump(X_scaler_global, "artifacts/X_scaler_global.joblib")
joblib.dump(Y_scaler_global, "artifacts/Y_scaler_global.joblib")
print("Saved scalers to artifacts/X_scaler_global.joblib and artifacts/Y_scaler_global.joblib")
"""


epochs_ran = len(train_losses)
epoch_axis = list(range(1, epochs_ran + 1))

#plot training and validation losses
plt.figure(figsize=(10, 6))
plt.plot(epoch_axis, train_losses, label='train loss')
plt.plot(epoch_axis, val_losses, label='validation loss')

#mark the best epoch with a star if available
if best_epoch is not None and 0 <= best_epoch < epochs_ran:
    plt.scatter(
        best_epoch + 1,
        val_losses[best_epoch],
        marker='*',
        s=200,
        label='best (early-stop restore)',
    )

plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('training and validation loss')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()

#print best epoch info
if best_epoch is not None:
    print(f"best epoch (model restored): {best_epoch + 1}  |  val loss: {val_losses[best_epoch]:.6f}")

#plot learning rate over epochs
plt.figure(figsize=(10, 4))
plt.plot(epoch_axis, lr_history, label='learning rate')
plt.xlabel('epoch')
plt.ylabel('learning rate')
plt.title('learning rate over epochs')
plt.grid(True)
plt.tight_layout()
plt.show()

#print final learning rate value
print(f"final learning rate: {lr_history[-1]:.6e}")




In [None]:
import joblib

#load the single global scaler saved after training if wanted
#Y_scaler_global = joblib.load("artifacts/Y_scaler_global.joblib")

#select dataset
dataset_idx = 6  

x_test_win_scaled = datasets_windowed[dataset_idx][2]
y_test_win_scaled = datasets_windowed[dataset_idx][5]

#model inference
model.eval()
with torch.no_grad():
    x_test_tensor = torch.tensor(x_test_win_scaled, dtype=torch.float32).to(device)
    preds_scaled = model(x_test_tensor).cpu().numpy()

#invert scaling to original units
preds_unscaled = Y_scaler_global.inverse_transform(preds_scaled)
targets_unscaled = Y_scaler_global.inverse_transform(y_test_win_scaled)

#split interleaved targets: even columns → y1 (heat load), odd columns → y2 (efficiency)
pred_y1 = preds_unscaled[:, ::2].flatten()
pred_y2 = preds_unscaled[:, 1::2].flatten()
true_y1 = targets_unscaled[:, ::2].flatten()
true_y2 = targets_unscaled[:, 1::2].flatten()

#heat load (y1)
true_y1_dim = true_y1 / 1e8
pred_y1_dim = pred_y1 / 1e8

#time axis in days (96 samples = 1 day)
x_days_y1 = np.arange(len(true_y1_dim)) / 96.0

plt.figure(figsize=(11, 5))
plt.plot(x_days_y1, true_y1_dim, label='actual heat load', linewidth=1.5)
plt.plot(x_days_y1, pred_y1_dim, label='predicted heat load', linewidth=1.2)
plt.xlabel('time (days)')
plt.ylabel('heat load (×1e8)')
plt.title('heat load — actual vs predicted')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

#efficiency (y2)
n2 = min(len(true_y2), len(pred_y2))
x_days_y2 = np.arange(n2) / 96.0

plt.figure(figsize=(11, 5))
plt.plot(x_days_y2, true_y2[:n2], label='actual efficiency', linewidth=1.5)
plt.plot(x_days_y2, pred_y2[:n2], label='predicted efficiency', linewidth=1.2)
plt.xlabel('time (days)')
plt.ylabel('efficiency')
plt.title('efficiency — actual vs predicted')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_percentage_error
import joblib

#load the single global y scaler if not already in memory
#Y_scaler_global = joblib.load("artifacts/Y_scaler_global.joblib")

#evaluate unscaled MAPE for each dataset (y1 and y2), supporting output_width > 1
model.eval()
mape_per_dataset = []         
total_y1_mape_sum = 0.0      
total_y2_mape_sum = 0.0        
total_samples = 0              
total_mape_sum = 0.0           

for dataset_idx, (X_test, Y_test_scaled) in enumerate((ds[2], ds[5]) for ds in datasets_windowed):
    X_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    with torch.no_grad():
        Y_pred_scaled = model(X_tensor).cpu().numpy()

    Y_pred_unscaled = Y_scaler_global.inverse_transform(Y_pred_scaled)
    Y_true_unscaled = Y_scaler_global.inverse_transform(Y_test_scaled)

    Y_all_true_flat = Y_true_unscaled.flatten()
    Y_all_pred_flat = Y_pred_unscaled.flatten()
    total_mape_sum += mean_absolute_percentage_error(Y_all_true_flat, Y_all_pred_flat) * 100 * len(Y_all_true_flat)

    Y1_true = Y_true_unscaled[:, ::2]
    Y2_true = Y_true_unscaled[:, 1::2]
    Y1_pred = Y_pred_unscaled[:, ::2]
    Y2_pred = Y_pred_unscaled[:, 1::2]

    Y1_true_flat = Y1_true.flatten()
    Y1_pred_flat = Y1_pred.flatten()
    Y2_true_flat = Y2_true.flatten()
    Y2_pred_flat = Y2_pred.flatten()

    #compute MAPE for y1 and y2
    mape_y1 = mean_absolute_percentage_error(Y1_true_flat, Y1_pred_flat) * 100
    mape_y2 = mean_absolute_percentage_error(Y2_true_flat, Y2_pred_flat) * 100

    mape_per_dataset.append((mape_y1, mape_y2))
    print(f"dataset {dataset_idx + 1}: y1 mape = {mape_y1:.4f}%, y2 mape = {mape_y2:.4f}%")

    total_y1_mape_sum += mape_y1 * len(Y1_true_flat)
    total_y2_mape_sum += mape_y2 * len(Y2_true_flat)
    total_samples += len(Y1_true_flat)

#global average mape (weighted by sample count)
avg_y1_mape = total_y1_mape_sum / total_samples
avg_y2_mape = total_y2_mape_sum / total_samples

print("\nWeighted average mape across all datasets:")
print(f"y1: {avg_y1_mape:.4f}%")
print(f"y2: {avg_y2_mape:.4f}%")

#overall combined mape across both outputs and all time steps
overall_mape = total_mape_sum / (total_samples * 2)
print(f"\nOverall combined mape across all outputs and time steps:\n{overall_mape:.4f}%")



In [None]:
#MAPE CODE IF OUTPUT WIDTH is > 1
from sklearn.metrics import mean_absolute_percentage_error
import joblib
import numpy as np
import torch

#load the single global y scaler if not already presen#
# Y_scaler_global = joblib.load("artifacts/Y_scaler_global.joblib")

#evaluate unscaled MAPE for each dataset (y1 and y2), supporting output_width > 1
model.eval()
mape_per_dataset = []   
total_y1_mape_sum = 0.0
total_y2_mape_sum = 0.0
total_samples = 0       
total_mape_sum = 0.0    

for dataset_idx, ds in enumerate(datasets_windowed, start=1):
    X_test, Y_test_scaled = ds[2], ds[5]

    X_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    with torch.no_grad():
        Y_pred_scaled = model(X_tensor).cpu().numpy()

    n_samples = Y_pred_scaled.shape[0]
    output_width = Y_pred_scaled.shape[1] // 2

    Y_pred_steps = Y_pred_scaled.reshape(n_samples * output_width, 2)
    Y_true_steps = Y_test_scaled.reshape(n_samples * output_width, 2)

    Y_pred_unscaled_steps = Y_scaler_global.inverse_transform(Y_pred_steps)
    Y_true_unscaled_steps = Y_scaler_global.inverse_transform(Y_true_steps)

    Y_pred_unscaled = Y_pred_unscaled_steps.reshape(n_samples, output_width * 2)
    Y_true_unscaled = Y_true_unscaled_steps.reshape(n_samples, output_width * 2)

    Y_all_true_flat = Y_true_unscaled.flatten()
    Y_all_pred_flat = Y_pred_unscaled.flatten()
    total_mape_sum += mean_absolute_percentage_error(Y_all_true_flat, Y_all_pred_flat) * 100 * len(Y_all_true_flat)

    Y1_true = Y_true_unscaled[:, ::2]
    Y2_true = Y_true_unscaled[:, 1::2]
    Y1_pred = Y_pred_unscaled[:, ::2]
    Y2_pred = Y_pred_unscaled[:, 1::2]

    Y1_true_flat = Y1_true.flatten()
    Y1_pred_flat = Y1_pred.flatten()
    Y2_true_flat = Y2_true.flatten()
    Y2_pred_flat = Y2_pred.flatten()

    mape_y1 = mean_absolute_percentage_error(Y1_true_flat, Y1_pred_flat) * 100
    mape_y2 = mean_absolute_percentage_error(Y2_true_flat, Y2_pred_flat) * 100

    mape_per_dataset.append((mape_y1, mape_y2))
    print(f"dataset {dataset_idx}: y1 mape = {mape_y1:.4f}%, y2 mape = {mape_y2:.4f}%")

    #weighted accumulation for per-target averages
    total_y1_mape_sum += mape_y1 * len(Y1_true_flat)
    total_y2_mape_sum += mape_y2 * len(Y2_true_flat)
    total_samples += len(Y1_true_flat) 

#final aggregated results
avg_y1_mape = total_y1_mape_sum / total_samples
avg_y2_mape = total_y2_mape_sum / total_samples
overall_mape = total_mape_sum / (total_samples * 2) 

print("\nWeighted average mape across all datasets:")
print(f"y1: {avg_y1_mape:.4f}%")
print(f"y2: {avg_y2_mape:.4f}%")
print(f"\nOverall combined mape across all outputs and time steps:\n{overall_mape:.4f}%")


In [None]:
#MAPE per time-step (horizon) for output_width = 8
from sklearn.metrics import mean_absolute_percentage_error
import joblib, numpy as np, torch


#Y_scaler_global = joblib.load("artifacts/Y_scaler_global.joblib")

model.eval()


overall_y1_h_sum = None   
overall_y2_h_sum = None
overall_both_h_sum = None
overall_h_counts  = None  #

#print flag for per-dataset details
PRINT_PER_DATASET = False

#iterate through test sets in datasets_windowed
for dataset_idx, (X_test, Y_test_scaled) in enumerate([(ds[2], ds[5]) for ds in datasets_windowed]):
    if X_test is None or len(X_test) == 0:
        continue

    X_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    with torch.no_grad():
        Y_pred_scaled = model(X_tensor).cpu().numpy()

    n_samples = Y_pred_scaled.shape[0]
    output_width = Y_pred_scaled.shape[1] // 2  # two outputs

    Y_pred_steps = Y_pred_scaled.reshape(n_samples * output_width, 2)
    Y_true_steps = Y_test_scaled.reshape(n_samples * output_width, 2)
    Y_pred_unscaled_steps = Y_scaler_global.inverse_transform(Y_pred_steps)
    Y_true_unscaled_steps = Y_scaler_global.inverse_transform(Y_true_steps)

    Y_pred = Y_pred_unscaled_steps.reshape(n_samples, output_width, 2)
    Y_true = Y_true_unscaled_steps.reshape(n_samples, output_width, 2)

    Y1_pred, Y2_pred = Y_pred[:, :, 0], Y_pred[:, :, 1]
    Y1_true, Y2_true = Y_true[:, :, 0], Y_true[:, :, 1]

    if overall_y1_h_sum is None:
        overall_y1_h_sum = np.zeros(output_width, dtype=float)
        overall_y2_h_sum = np.zeros(output_width, dtype=float)
        overall_both_h_sum = np.zeros(output_width, dtype=float)
        overall_h_counts  = np.zeros(output_width, dtype=int)

    if PRINT_PER_DATASET:
        print(f"\n=== Dataset {dataset_idx + 1} MAPE by Horizon ===")

    for h in range(output_width):  # h = 0..7 → t+1..t+8
        y1_t, y1_p = Y1_true[:, h].ravel(), Y1_pred[:, h].ravel()
        y2_t, y2_p = Y2_true[:, h].ravel(), Y2_pred[:, h].ravel()
        n_h = len(y1_t)

        mape_y1_h = mean_absolute_percentage_error(y1_t, y1_p) * 100.0
        mape_y2_h = mean_absolute_percentage_error(y2_t, y2_p) * 100.0
        mape_both_h = mean_absolute_percentage_error(
            np.concatenate([y1_t, y2_t]),
            np.concatenate([y1_p, y2_p])
        ) * 100.0


        overall_y1_h_sum[h]  += mape_y1_h * n_h
        overall_y2_h_sum[h]  += mape_y2_h * n_h
        overall_both_h_sum[h] += mape_both_h * (2 * n_h)
        overall_h_counts[h]  += n_h

        if PRINT_PER_DATASET:
            print(f"H{h + 1}: Y1 {mape_y1_h:.2f}% | Y2 {mape_y2_h:.2f}% | Both {mape_both_h:.2f}% (n={n_h})")

#overall (weighted) MAPE per horizon across all dataset
overall_y1_h = overall_y1_h_sum / np.maximum(overall_h_counts, 1)
overall_y2_h = overall_y2_h_sum / np.maximum(overall_h_counts, 1)
overall_both_h = overall_both_h_sum / np.maximum(2 * overall_h_counts, 1)

print("\n=== Overall MAPE by Horizon (t+1..t+8) Across All Datasets ===")
for h in range(output_width):
    print(f"H{h + 1}:  Y1 {overall_y1_h[h]:.2f}%   Y2 {overall_y2_h[h]:.2f}%   Both {overall_both_h[h]:.2f}%")

#report averages across horizons
print("\n=== Averages Across Horizons (Weighted) ===")
print(f"Y1 Avg:  {overall_y1_h.mean():.2f}%")
print(f"Y2 Avg:  {overall_y2_h.mean():.2f}%")
print(f"Both Avg: {overall_both_h.mean():.2f}%")