In [None]:
import pandas as pd
import numpy as np
import torch
from argparse import Namespace
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Add the parent directory of 'ml' to sys.path
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import warnings
warnings.filterwarnings('ignore')

from ml.utils.data_utils import prepare_dataset
from ml.models.lstm import LSTM
from ml.models.multi_step_lstm import MultiStepLSTM
from ml.models.seq2seq_lstm import Seq2SeqLSTM
from ml.models.transformer import TimeSeriesTransformer

In [None]:
# -----------------------------
# 0) CONFIG
# -----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TARGETS = ['rnti_count', 'rb_down', 'rb_up', 'down', 'up']
H = 6  # forecast_steps

# checkpoints
CKPT_BASE_T1     = "base_lstm_t1.pt"
CKPT_MULTI       = "multi_step_lstm.pt"
CKPT_S2S_MULTI   = "seq2seq_lstm_multistep.pth"
CKPT_S2S_CLU     = "seq2seq_cluster_huber.pt"              # Seq2Seq Clusters
CKPT_TRANS       = "transformer_multistep.pt"
CKPT_TRANS_CLU   = "transformer_multistep_cluster.pt"      # Transformer Clusters

In [None]:
# -----------------------------
# 1) DATA (full dataset) for 4 models
# -----------------------------
args_full = Namespace(
    data_path='../dataset/full_dataset.csv',
    targets=TARGETS,
    num_lags=10,
    forecast_steps=H,
    test_size=0.2,
    ignore_cols=None,
    identifier='District',
    nan_constant=0,
    x_scaler='minmax',
    y_scaler='minmax',
    outlier_detection=True,
    batch_size=128,
    cuda=torch.cuda.is_available(),
    seed=42
)
X_train, y_train, X_test, y_test, x_scaler_full, y_scaler_full, *_ = prepare_dataset(args_full)
# t+1 ground-truth on SCALED space (for Strategy A)
y_test_t1_scaled_full = y_test[:, 0, :]  # [N, 5]
N, L, D = X_test.shape
T = y_test.shape[2]

In [None]:
# -----------------------------
# 2) DATA (cluster dataset) for clustered models
# -----------------------------
args_cluster = Namespace(
    data_path='../dataset/combined_with_cluster_feature.csv',
    targets=TARGETS,
    num_lags=10,
    forecast_steps=H,
    test_size=0.2,
    ignore_cols=None,
    identifier='District',
    nan_constant=0,
    x_scaler='minmax',
    y_scaler='minmax',
    outlier_detection=True,
    batch_size=128,
    cuda=torch.cuda.is_available(),
    seed=42,
    use_time_features=False
)
X_tr_c, y_tr_c, X_te_c, y_te_c, x_scaler_c, y_scaler_c, *_ = prepare_dataset(args_cluster)
# t+1 ground-truth on SCALED space (for Strategy A, cluster set)
y_te_c_t1_scaled = y_te_c[:, 0, :]
Nc, Lc, Dc = X_te_c.shape

In [None]:
# -----------------------------
# 3) HELPERS 
# -----------------------------

# Core metrics on SCALED space (expects 2D arrays)
def metrics_scaled_space(y_true_scaled_2d, y_pred_scaled_2d):
    mse  = mean_squared_error(y_true_scaled_2d, y_pred_scaled_2d)
    rmse = mean_squared_error(y_true_scaled_2d, y_pred_scaled_2d, squared=False)
    mae  = mean_absolute_error(y_true_scaled_2d, y_pred_scaled_2d)
    r2   = r2_score(y_true_scaled_2d, y_pred_scaled_2d)
    # NRMSE on the scaled range present in y_true
    nrmse = rmse / (np.max(y_true_scaled_2d) - np.min(y_true_scaled_2d) + 1e-8)
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "R2": r2, "NRMSE": nrmse}

# Percent metrics on ORIGINAL space (single target column)
def mape(y_true, y_pred, eps=1e-8):
    denom = np.clip(np.abs(y_true), eps, None)
    return float(np.mean(np.abs((y_true - y_pred) / denom)) * 100.0)

def smape(y_true, y_pred, eps=1e-8):
    denom = np.clip((np.abs(y_true) + np.abs(y_pred)) / 2.0, eps, None)
    return float(np.mean(np.abs(y_true - y_pred) / denom) * 100.0)

def masked_mape(y_true, y_pred, mask_thresh=1e-6, eps=1e-8):
    mask = np.abs(y_true) >= mask_thresh
    if mask.sum() == 0:
        return float('nan')
    denom = np.clip(np.abs(y_true[mask]), eps, None)
    return float(np.mean(np.abs((y_true[mask] - y_pred[mask]) / denom)) * 100.0)

def clamp_nonneg(x):
    return np.maximum(x, 0.0)

def inverse_single_col(y_scaled_1d, scaler, j):
    """
    Inverse-transform one target column j using fitted MinMaxScaler 'scaler'.
    y_scaled_1d: shape [N,]
    """
    min_j   = np.asarray(scaler.min_)[j]
    scale_j = np.asarray(scaler.scale_)[j]
    return (y_scaled_1d - min_j) / scale_j

def percent_metrics_original_space_single(y_true_scaled_1d, y_pred_scaled_1d, scaler, j, clamp=True):
    """
    Inverse only target column j back to ORIGINAL scale and compute percent metrics there.
    """
    yt = inverse_single_col(np.asarray(y_true_scaled_1d), scaler, j)
    yp = inverse_single_col(np.asarray(y_pred_scaled_1d), scaler, j)
    if clamp:
        yp = clamp_nonneg(yp)
    return {
        "MAPE%":        mape(yt, yp),
        "sMAPE%":       smape(yt, yp),
        "MAPE_masked%": masked_mape(yt, yp, mask_thresh=1e-6),
    }

In [None]:
# -----------------------------
# 4) LOAD MODELS
# -----------------------------

# Base paper LSTM (t+1)
base_model = LSTM(
    input_dim=D,
    lstm_hidden_size=128,          # MUST match training
    num_lstm_layers=2,             # 2 layers in checkpoint
    lstm_dropout=0.0,
    layer_units=[128, 64],         # MLP head like checkpoint
    num_outputs=T,
    matrix_rep=True,
    exogenous_dim=0
).to(DEVICE)
base_model.load_state_dict(torch.load(CKPT_BASE_T1, map_location=DEVICE), strict=True)
base_model.eval()

# Basic Multistep LSTM
multi_model = MultiStepLSTM(
    input_size=D, hidden_size=128, num_layers=1,
    output_size=T, forecast_steps=H
).to(DEVICE)
multi_model.load_state_dict(torch.load(CKPT_MULTI, map_location=DEVICE), strict=True)
multi_model.eval()

# Seq2Seq Multistep LSTM (full dataset)
s2s_model = Seq2SeqLSTM(
    input_size=D, hidden_size=64, output_size=T, forecast_steps=H, num_layers=1
).to(DEVICE)
s2s_model.load_state_dict(torch.load(CKPT_S2S_MULTI, map_location=DEVICE), strict=True)
s2s_model.eval()

# Seq2Seq Multistep LSTM (cluster dataset)
s2s_cluster_model = Seq2SeqLSTM(
    input_size=Dc, hidden_size=64, output_size=T, forecast_steps=H, num_layers=1
).to(DEVICE)
s2s_cluster_model.load_state_dict(torch.load(CKPT_S2S_CLU, map_location=DEVICE), strict=True)
s2s_cluster_model.eval()

# Transformer (full dataset)
transformer_model = TimeSeriesTransformer(
    input_size=D, output_size=T, forecast_steps=H,
    d_model=128, nhead=4, num_encoder_layers=2, num_decoder_layers=2,
    dim_feedforward=256, dropout=0.1
).to(DEVICE)
transformer_model.load_state_dict(torch.load(CKPT_TRANS, map_location=DEVICE), strict=True)
transformer_model.eval()

# Transformer (cluster dataset)
transformer_cluster_model = TimeSeriesTransformer(
    input_size=Dc, output_size=T, forecast_steps=H,
    d_model=128, nhead=4, num_encoder_layers=2, num_decoder_layers=2,
    dim_feedforward=256, dropout=0.1
).to(DEVICE)
transformer_cluster_model.load_state_dict(torch.load(CKPT_TRANS_CLU, map_location=DEVICE), strict=True)
transformer_cluster_model.eval()

TimeSeriesTransformer(
  (input_proj): Linear(in_features=7, out_features=128, bias=True)
  (enc_pos): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (dec_pos): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (decoder): TransformerDecoder(
    (lay

In [None]:
# -----------------------------
# 5) PREDICT t+1 (SCALED space)
# -----------------------------
with torch.no_grad():
    xb_full = torch.tensor(X_test, dtype=torch.float32, device=DEVICE)     # [N, L, D]
    xb_clu  = torch.tensor(X_te_c, dtype=torch.float32, device=DEVICE)     # [Nc, Lc, Dc]

    # Base (t+1)
    base_t1_scaled = base_model(xb_full, device=DEVICE)                    # [N, 5]

    # Basic multistep -> pick t+1
    multi_all = multi_model(xb_full)                                       # [N, 6, 5]
    multi_t1_scaled = multi_all[:, 0, :]

    # Seq2Seq multistep (full)
    s2s_all = s2s_model(xb_full, teacher_forcing_ratio=0.0)
    s2s_t1_scaled = s2s_all[:, 0, :]

    # Seq2Seq multistep (cluster dataset)
    s2s_clu_all = s2s_cluster_model(xb_clu, teacher_forcing_ratio=0.0)
    s2s_clu_t1_scaled = s2s_clu_all[:, 0, :]

    # Transformer (full)
    trans_all = transformer_model(xb_full)
    trans_t1_scaled = trans_all[:, 0, :]

    # Transformer (cluster dataset)  <-- NEW
    trans_clu_all = transformer_cluster_model(xb_clu)
    trans_clu_t1_scaled = trans_clu_all[:, 0, :]

# to numpy
base_t1_scaled      = base_t1_scaled.cpu().numpy()
multi_t1_scaled     = multi_t1_scaled.cpu().numpy()
s2s_t1_scaled       = s2s_t1_scaled.cpu().numpy()
s2s_clu_t1_scaled   = s2s_clu_t1_scaled.cpu().numpy()
trans_t1_scaled     = trans_t1_scaled.cpu().numpy()
trans_clu_t1_scaled = trans_clu_t1_scaled.cpu().numpy()

In [None]:
# -----------------------------
# 6) STRATEGY A: BUILD COMPARISON TABLE (t+1)
#    Core metrics on SCALED; percent metrics on ORIGINAL
# -----------------------------

rows = []

def add_rows_for_model(name, y_pred_scaled, y_true_scaled, scaler, targets):
    for i, var in enumerate(targets):
        yt_s = y_true_scaled[:, i]
        yp_s = y_pred_scaled[:, i]
        # core on scaled
        m_core = metrics_scaled_space(yt_s.reshape(-1, 1), yp_s.reshape(-1, 1))
        # percent on original (inverse-transform this column only)
        m_pct  = percent_metrics_original_space_single(yt_s, yp_s, scaler, j=i, clamp=True)
        rows.append({
            "Strategy": "A_t+1",
            "Model": name,
            "Target": var,
            "MSE": m_core["MSE"],
            "RMSE": m_core["RMSE"],
            "MAE": m_core["MAE"],
            "R2": m_core["R2"],
            "NRMSE": m_core["NRMSE"],
            "MAPE%": m_pct["MAPE%"],
            "sMAPE%": m_pct["sMAPE%"],
            "MAPE_masked%": m_pct["MAPE_masked%"],
        })

# Four "full" models use (X_test / y_test / y_scaler_full)
FULL_MODELS_A = [
    ("Base paper LSTM (t+1)", base_t1_scaled, y_test_t1_scaled_full, y_scaler_full),
    ("Basic Multistep LSTM (t+1)", multi_t1_scaled, y_test_t1_scaled_full, y_scaler_full),
    ("Seq2Seq Multistep LSTM (t+1)", s2s_t1_scaled, y_test_t1_scaled_full, y_scaler_full),
    ("Transformer model (t+1)",      trans_t1_scaled, y_test_t1_scaled_full, y_scaler_full),
]
for mdl_name, pred_s, ytrue_s, scaler in FULL_MODELS_A:
    add_rows_for_model(mdl_name, pred_s, ytrue_s, scaler, TARGETS)

# Cluster models use (X_te_c / y_te_c / y_scaler_c)
CLU_MODELS_A = [
    ("Seq2Seq Multistep LSTM with Clusters (t+1)", s2s_clu_t1_scaled,   y_te_c_t1_scaled, y_scaler_c),
    ("Transformer model with Clusters (t+1)",      trans_clu_t1_scaled, y_te_c_t1_scaled, y_scaler_c),  # <-- NEW
]
for mdl_name, pred_s, ytrue_s, scaler in CLU_MODELS_A:
    add_rows_for_model(mdl_name, pred_s, ytrue_s, scaler, TARGETS)

df_t1 = pd.DataFrame(rows, columns=[
    "Strategy","Model","Target","MSE","RMSE","MAE","R2","NRMSE","MAPE%","sMAPE%","MAPE_masked%"
])
print("\n=== Strategy A (t+1) ===")
display(df_t1.head(15))


=== Strategy A (t+1) ===


Unnamed: 0,Strategy,Model,Target,MSE,RMSE,MAE,R2,NRMSE,MAPE%,sMAPE%,MAPE_masked%
0,A_t+1,Base paper LSTM (t+1),rnti_count,0.008045,0.089696,0.066063,0.485215,0.125351,25.48788,24.742963,25.48788
1,A_t+1,Base paper LSTM (t+1),rb_down,0.008326,0.091249,0.043579,0.559291,0.091397,47.25889,36.282234,47.258889
2,A_t+1,Base paper LSTM (t+1),rb_up,0.012164,0.110291,0.05219,0.610206,0.110291,2718.907,123.95536,2097.916283
3,A_t+1,Base paper LSTM (t+1),down,0.00734,0.085676,0.050102,0.480701,0.085995,37.16739,31.237519,37.167388
4,A_t+1,Base paper LSTM (t+1),up,0.010708,0.10348,0.045972,0.556588,0.10348,22148420000000.0,95.755641,1250.913328
5,A_t+1,Basic Multistep LSTM (t+1),rnti_count,0.006707,0.081893,0.06221,0.57088,0.114447,27.13494,23.737117,27.134945
6,A_t+1,Basic Multistep LSTM (t+1),rb_down,0.008512,0.092262,0.047042,0.549452,0.092412,51.33693,36.843247,51.336928
7,A_t+1,Basic Multistep LSTM (t+1),rb_up,0.013288,0.115274,0.056397,0.574183,0.115274,1149.287,127.6113,1108.54622
8,A_t+1,Basic Multistep LSTM (t+1),down,0.007688,0.087681,0.053393,0.456111,0.088007,42.20614,32.552472,42.206141
9,A_t+1,Basic Multistep LSTM (t+1),up,0.012111,0.110049,0.052907,0.498501,0.110049,533.6696,104.555285,534.263324


In [None]:
# -----------------------------
# 7) STRATEGY B: multi-step (t+1..t+6)
# -----------------------------

def infer_target_positions_from_data(X_test, y_t1_scaled):
    """
    Heuristic: for each target (col in y_t1_scaled), find the input feature
    column in X (last time step) with the highest absolute correlation.
    """
    assert X_test.ndim == 3 and y_t1_scaled.ndim == 2
    N, L, D = X_test.shape
    T = y_t1_scaled.shape[1]
    X_last = X_test[:, -1, :]
    pos, used = [], set()
    for i in range(T):
        yt = y_t1_scaled[:, i]
        yt = yt - yt.mean()
        yt_std = yt.std() + 1e-12
        corrs = []
        for j in range(D):
            xj = X_last[:, j]
            xj = xj - xj.mean()
            xj_std = xj.std() + 1e-12
            corr = float(np.mean((xj / xj_std) * (yt / yt_std)))
            corrs.append(abs(corr))
        for j in np.argsort(corrs)[::-1]:
            if j not in used:
                pos.append(int(j)); used.add(int(j)); break
    return pos

def validate_positions(pos_list, D, T):
    ok = (len(pos_list) == T and all(0 <= p < D for p in pos_list) and len(set(pos_list)) == T)
    if ok: return pos_list, True
    if T <= D:
        fallback = list(range(T))
        print(f"[WARN] Invalid TARGET_POS_IN_X={pos_list} for D={D}. "
              f"Falling back to {fallback}. Verify this mapping!")
        return fallback, False
    raise ValueError(f"Cannot fallback: T={T} > D={D}.")

# map targets to feature positions in X for FULL dataset (for base roll)
inferred = infer_target_positions_from_data(X_test, y_test_t1_scaled_full)
TARGET_POS_IN_X, _ = validate_positions(inferred, D=X_test.shape[2], T=y_test.shape[2])
print("Using TARGET_POS_IN_X =", TARGET_POS_IN_X)

def roll_base_lstm_to_horizon(base_model, X_init, steps, target_pos_in_x, device="cpu"):
    base_model.eval()
    x_win = torch.tensor(X_init, dtype=torch.float32, device=device)  # [N, L, D]
    outs = []
    with torch.no_grad():
        for _ in range(steps):
            y_next = base_model(x_win, device=device)  # [N, T] scaled
            outs.append(y_next.unsqueeze(1))
            last_row = x_win[:, -1, :].clone()
            for k, pos in enumerate(target_pos_in_x):
                last_row[:, pos] = y_next[:, k]
            x_win = torch.cat([x_win[:, 1:, :], last_row.unsqueeze(1)], dim=1)
    return torch.cat(outs, dim=1).detach().cpu().numpy()  # [N, steps, T]

# ----- FULL DATASET: base rolled + multistep models
base_rolled_scaled = roll_base_lstm_to_horizon(
    base_model, X_test, steps=H, target_pos_in_x=TARGET_POS_IN_X, device=DEVICE
)
with torch.no_grad():
    xb_full = torch.tensor(X_test, dtype=torch.float32, device=DEVICE)
    basic_multi_scaled = multi_model(xb_full).detach().cpu().numpy()
    s2s_scaled         = s2s_model(xb_full, teacher_forcing_ratio=0.0).detach().cpu().numpy()
    trans_scaled       = transformer_model(xb_full).detach().cpu().numpy()
y_true_full_scaled = y_test

FULL_MODELS_STEPS = [
    ("Base LSTM (rolled t+1..t+6)", base_rolled_scaled, y_true_full_scaled, y_scaler_full),
    ("Basic Multistep LSTM",         basic_multi_scaled, y_true_full_scaled, y_scaler_full),
    ("Seq2Seq Multistep LSTM",       s2s_scaled,         y_true_full_scaled, y_scaler_full),
    ("Transformer model",            trans_scaled,       y_true_full_scaled, y_scaler_full),
]

# ----- CLUSTER DATASET: clustered models
with torch.no_grad():
    xb_clu = torch.tensor(X_te_c, dtype=torch.float32, device=DEVICE)
    s2s_clu_scaled   = s2s_cluster_model(xb_clu, teacher_forcing_ratio=0.0).detach().cpu().numpy()
    trans_clu_scaled = transformer_cluster_model(xb_clu).detach().cpu().numpy()  # <-- NEW
y_true_clu_scaled = y_te_c

CLU_MODELS_STEPS = [
    ("Seq2Seq Multistep LSTM with Clusters", s2s_clu_scaled,   y_true_clu_scaled, y_scaler_c),
    ("Transformer model with Clusters",      trans_clu_scaled, y_true_clu_scaled, y_scaler_c),  # <-- NEW
]

def evaluate_multistep_models(models_steps, targets, strategy_tag="B"):
    rows_steps, rows_over = [], []
    for name, y_pred_s, y_true_s, scaler in models_steps:
        if isinstance(y_pred_s, torch.Tensor): y_pred_s = y_pred_s.cpu().numpy()
        if isinstance(y_true_s, torch.Tensor): y_true_s = y_true_s.cpu().numpy()
        # per-step
        for step in range(H):
            for j, var in enumerate(targets):
                yt = y_true_s[:, step, j]
                yp = y_pred_s[:, step, j]
                m_core = metrics_scaled_space(yt.reshape(-1,1), yp.reshape(-1,1))
                m_pct  = percent_metrics_original_space_single(yt, yp, scaler, j=j, clamp=True)
                rows_steps.append({
                    "Strategy": f"{strategy_tag}_t+{step+1}",
                    "Step": step+1,
                    "Model": name,
                    "Target": var,
                    "MSE": m_core["MSE"],
                    "RMSE": m_core["RMSE"],
                    "MAE": m_core["MAE"],
                    "R2": m_core["R2"],
                    "NRMSE": m_core["NRMSE"],
                    "MAPE%": m_pct["MAPE%"],
                    "sMAPE%": m_pct["sMAPE%"],
                    "MAPE_masked%": m_pct["MAPE_masked%"],
                })
        # overall flattened
        for j, var in enumerate(targets):
            yt_all = y_true_s[:, :, j].reshape(-1)
            yp_all = y_pred_s[:, :, j].reshape(-1)
            m_core_all = metrics_scaled_space(yt_all.reshape(-1,1), yp_all.reshape(-1,1))
            m_pct_all  = percent_metrics_original_space_single(yt_all, yp_all, scaler, j=j, clamp=True)
            rows_over.append({
                "Strategy": f"{strategy_tag}_overall",
                "Model": name,
                "Target": var,
                "MSE": m_core_all["MSE"],
                "RMSE": m_core_all["RMSE"],
                "MAE": m_core_all["MAE"],
                "R2": m_core_all["R2"],
                "NRMSE": m_core_all["NRMSE"],
                "MAPE%": m_pct_all["MAPE%"],
                "sMAPE%": m_pct_all["sMAPE%"],
                "MAPE_masked%": m_pct_all["MAPE_masked%"],
            })
    df_steps = pd.DataFrame(rows_steps, columns=[
        "Strategy","Step","Model","Target",
        "MSE","RMSE","MAE","R2","NRMSE","MAPE%","sMAPE%","MAPE_masked%"
    ])
    df_over  = pd.DataFrame(rows_over, columns=[
        "Strategy","Model","Target",
        "MSE","RMSE","MAE","R2","NRMSE","MAPE%","sMAPE%","MAPE_masked%"
    ])
    return df_steps, df_over

# Evaluate FULL & CLUSTER
df_B_steps_full, df_B_overall_full = evaluate_multistep_models(
    FULL_MODELS_STEPS, targets=TARGETS, strategy_tag="B(FULL)"
)
df_B_steps_cluster, df_B_overall_cluster = evaluate_multistep_models(
    CLU_MODELS_STEPS, targets=TARGETS, strategy_tag="B(CLUSTER)"
)


Using TARGET_POS_IN_X = [0, 1, 2, 3, 4]


In [None]:
df_B_steps_full

Unnamed: 0,Strategy,Step,Model,Target,MSE,RMSE,MAE,R2,NRMSE,MAPE%,sMAPE%,MAPE_masked%
0,B(FULL)_t+1,1,Base LSTM (rolled t+1..t+6),rnti_count,0.008045,0.089696,0.066063,0.485215,0.125351,2.548788e+01,24.742963,25.487880
1,B(FULL)_t+1,1,Base LSTM (rolled t+1..t+6),rb_down,0.008326,0.091249,0.043579,0.559291,0.091397,4.725889e+01,36.282234,47.258889
2,B(FULL)_t+1,1,Base LSTM (rolled t+1..t+6),rb_up,0.012164,0.110291,0.052190,0.610206,0.110291,2.718907e+03,123.955360,2097.916283
3,B(FULL)_t+1,1,Base LSTM (rolled t+1..t+6),down,0.007340,0.085676,0.050102,0.480701,0.085995,3.716739e+01,31.237519,37.167388
4,B(FULL)_t+1,1,Base LSTM (rolled t+1..t+6),up,0.010708,0.103480,0.045972,0.556588,0.103480,2.214842e+13,95.755641,1250.913328
...,...,...,...,...,...,...,...,...,...,...,...,...
115,B(FULL)_t+6,6,Transformer model,rnti_count,0.008373,0.091502,0.068258,0.464889,0.127875,2.820985e+01,25.350329,28.209849
116,B(FULL)_t+6,6,Transformer model,rb_down,0.013979,0.118234,0.062566,0.259387,0.118425,6.814866e+01,46.381172,68.148656
117,B(FULL)_t+6,6,Transformer model,rb_up,0.024121,0.155309,0.083074,0.224826,0.155309,9.040568e+02,165.974300,905.249964
118,B(FULL)_t+6,6,Transformer model,down,0.010237,0.101178,0.061748,0.275143,0.101555,4.607308e+01,36.714351,46.073084


In [None]:
df_B_steps_cluster

Unnamed: 0,Strategy,Step,Model,Target,MSE,RMSE,MAE,R2,NRMSE,MAPE%,sMAPE%,MAPE_masked%
0,B(CLUSTER)_t+1,1,Seq2Seq Multistep LSTM with Clusters,rnti_count,0.006578,0.081103,0.060159,0.579123,0.113342,25.46613,22.700099,25.466134
1,B(CLUSTER)_t+1,1,Seq2Seq Multistep LSTM with Clusters,rb_down,0.00832,0.091212,0.042843,0.559653,0.091359,42.45123,33.904991,42.451233
2,B(CLUSTER)_t+1,1,Seq2Seq Multistep LSTM with Clusters,rb_up,0.01207,0.109865,0.048668,0.61321,0.109865,1116.393,111.354024,922.361071
3,B(CLUSTER)_t+1,1,Seq2Seq Multistep LSTM with Clusters,down,0.007053,0.083981,0.048396,0.501046,0.084293,35.77251,30.067656,35.772513
4,B(CLUSTER)_t+1,1,Seq2Seq Multistep LSTM with Clusters,up,0.010509,0.102514,0.046178,0.564829,0.102514,7649607000000.0,94.609674,705.398858
5,B(CLUSTER)_t+2,2,Seq2Seq Multistep LSTM with Clusters,rnti_count,0.007232,0.085041,0.063856,0.537377,0.118846,27.66098,24.161699,27.660983
6,B(CLUSTER)_t+2,2,Seq2Seq Multistep LSTM with Clusters,rb_down,0.009311,0.096494,0.046777,0.50703,0.09665,49.08285,37.194283,49.082848
7,B(CLUSTER)_t+2,2,Seq2Seq Multistep LSTM with Clusters,rb_up,0.014151,0.118958,0.054107,0.546151,0.118958,1744.133,119.306079,1413.07904
8,B(CLUSTER)_t+2,2,Seq2Seq Multistep LSTM with Clusters,down,0.007697,0.087731,0.050923,0.455398,0.088058,38.38189,31.742458,38.381892
9,B(CLUSTER)_t+2,2,Seq2Seq Multistep LSTM with Clusters,up,0.012285,0.110839,0.050256,0.490553,0.110839,11540890000000.0,100.822851,896.175286


In [None]:
df_B_overall_full

Unnamed: 0,Strategy,Model,Target,MSE,RMSE,MAE,R2,NRMSE,MAPE%,sMAPE%,MAPE_masked%
0,B(FULL)_overall,Base LSTM (rolled t+1..t+6),rnti_count,0.035469,0.188333,0.146515,-1.268102,0.263197,49.8092,71.783886,49.809201
1,B(FULL)_overall,Base LSTM (rolled t+1..t+6),rb_down,0.019102,0.138209,0.069796,-0.01167,0.138433,60.31678,64.432056,60.316784
2,B(FULL)_overall,Base LSTM (rolled t+1..t+6),rb_up,0.0262,0.161864,0.089242,0.159088,0.161864,5902.77,150.109993,4405.889792
3,B(FULL)_overall,Base LSTM (rolled t+1..t+6),down,0.017395,0.131889,0.08401,-0.231383,0.13238,49.20035,65.18955,49.200349
4,B(FULL)_overall,Base LSTM (rolled t+1..t+6),up,0.020193,0.142103,0.073382,0.161388,0.142103,42026380000000.0,126.791798,2397.080643
5,B(FULL)_overall,Basic Multistep LSTM,rnti_count,0.007651,0.087469,0.067382,0.510763,0.122239,30.56769,25.787734,30.567688
6,B(FULL)_overall,Basic Multistep LSTM,rb_down,0.011023,0.10499,0.05639,0.416203,0.10516,65.93857,44.627017,65.938571
7,B(FULL)_overall,Basic Multistep LSTM,rb_up,0.017975,0.134071,0.067151,0.423073,0.134071,1452.835,132.419971,1423.208809
8,B(FULL)_overall,Basic Multistep LSTM,down,0.009058,0.095175,0.059343,0.35875,0.09553,48.69219,36.543371,48.69219
9,B(FULL)_overall,Basic Multistep LSTM,up,0.015295,0.123672,0.062848,0.364817,0.123672,1291440000000.0,115.102816,845.745857


In [None]:
df_B_overall_cluster

Unnamed: 0,Strategy,Model,Target,MSE,RMSE,MAE,R2,NRMSE,MAPE%,sMAPE%,MAPE_masked%
0,B(CLUSTER)_overall,Seq2Seq Multistep LSTM with Clusters,rnti_count,0.007655,0.087495,0.065804,0.510479,0.122275,28.78679,24.891901,28.786791
1,B(CLUSTER)_overall,Seq2Seq Multistep LSTM with Clusters,rb_down,0.010641,0.103157,0.050989,0.436412,0.103324,54.6489,40.256063,54.648896
2,B(CLUSTER)_overall,Seq2Seq Multistep LSTM with Clusters,rb_up,0.016619,0.128914,0.060093,0.466599,0.128914,1725.051,123.114499,1451.936528
3,B(CLUSTER)_overall,Seq2Seq Multistep LSTM with Clusters,down,0.008558,0.092508,0.054063,0.394185,0.092853,40.91578,33.413003,40.915782
4,B(CLUSTER)_overall,Seq2Seq Multistep LSTM with Clusters,up,0.014199,0.119159,0.055064,0.410333,0.119159,10987930000000.0,103.75239,941.065723
5,B(CLUSTER)_overall,Transformer model with Clusters,rnti_count,0.007224,0.084993,0.062871,0.53807,0.118779,25.08314,24.148789,25.083142
6,B(CLUSTER)_overall,Transformer model with Clusters,rb_down,0.010461,0.10228,0.049761,0.445952,0.102446,50.43308,40.621209,50.433084
7,B(CLUSTER)_overall,Transformer model with Clusters,rb_up,0.016938,0.130146,0.05688,0.456355,0.130146,550.7506,156.313438,551.419542
8,B(CLUSTER)_overall,Transformer model with Clusters,down,0.008175,0.090416,0.053509,0.421286,0.090752,40.6493,33.114082,40.649297
9,B(CLUSTER)_overall,Transformer model with Clusters,up,0.01395,0.118108,0.049012,0.420682,0.118108,297.3373,113.064348,297.668063


In [None]:
pd.concat([df_B_steps_full, df_B_steps_cluster]).to_excel("compare_rolled_over_steps.xlsx", index=False)

In [None]:
pd.concat([df_B_overall_full, df_B_overall_cluster]).to_excel("compare_rolled_over_overall.xlsx", index=False)

In [None]:
df_t1.to_excel("strategyA_compare_t1.xlsx", index=False)