In [1]:
import torch
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.metrics.point import MAE, MAPE, RMSE
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning_fabric/__init__.py:41: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.


In [2]:
ori_data = pd.read_csv("All_Data_Reducted.csv", sep=";")
ori_data['Timestamp'] = pd.to_datetime(ori_data['Timestamp'])
ori_data = ori_data[:-120]

In [3]:
def make_synth_df(
    X_num, X_cat, X_time, X_lag, Y_target,
    num_feature_names,                      # names (len == X_num.shape[2]) in SAME order as X_num
    target_name="Energy",
    time_cols=("hour_sin","hour_cos","dow_sin","dow_cos"),
    cal_cols=("is_low_usage","is_low_usage_next"),
):
    """
    Build synthetic mini-series:
      - encoder rows [0..L-1]: copy known time + calendar, sensors; Energy from X_lag (lag == Energy)
      - decoder row  [L]:      copy *all* encoder features (ffill one step) and set Energy from Y_target
    Shapes:
      X_num  : (N, L, D_num)
      X_cat  : (N, L, 2) -> [is_low_usage, is_low_usage_next]
      X_time : (N, L, 4) -> [hour_sin,hour_cos,dow_sin,dow_cos]
      X_lag  : (N, L, 1) -> lag == Energy at encoder rows
      Y_target: (N, 1) or (N,)
    """
    N, L, _ = X_time.shape
    assert X_time.shape[2] == 4, "X_time must have 4 columns"
    assert X_cat.shape[2]  == 2, "X_cat must have 2 columns: [is_low_usage, is_low_usage_next]"
    assert X_lag.shape     == (N, L, 1), "X_lag must be (N, L, 1)"
    if X_num.size:
        assert len(num_feature_names) == X_num.shape[2], "num_feature_names length must match X_num last dim"
    else:
        num_feature_names = []

    Y_target = np.ravel(Y_target).astype(float)

    frames = []
    for i in range(N):
        gid = f"synth_{i:05d}"

        # Encoder rows
        df_i = pd.DataFrame({
            "group_id": gid,
            "time_idx": np.arange(L, dtype=int),
            time_cols[0]: X_time[i, :, 0],
            time_cols[1]: X_time[i, :, 1],
            time_cols[2]: X_cat[i, :, 0].astype(int),   # if your X_time order is [hour_sin,hour_cos,dow_sin,dow_cos], keep next line as-is
            time_cols[3]: X_cat[i, :, 1].astype(int),   # <- remove these two lines if X_time already holds dow sin/cos; see note below
        })
        # NOTE: If your X_time contains the 4 sin/cos features already, comment out the two lines above
        # that wrote cal flags into time_cols[2:4]. Then add calendar flags separately:

        # Proper calendar flags:
        df_i[cal_cols[0]] = X_cat[i, :, 0].astype(int)
        df_i[cal_cols[1]] = X_cat[i, :, 1].astype(int)

        # If X_time already contains the 4 time features, overwrite them correctly:
        df_i[time_cols[0]] = X_time[i, :, 0]
        df_i[time_cols[1]] = X_time[i, :, 1]
        df_i[time_cols[2]] = X_time[i, :, 2]
        df_i[time_cols[3]] = X_time[i, :, 3]

        # Sensors (unknown reals)
        for j, col in enumerate(num_feature_names):
            df_i[col] = X_num[i, :, j] if X_num.size else np.nan

        # Target on encoder rows from lag (lag == Energy)
        df_i[target_name] = X_lag[i, :, 0].astype(float)

        # Decoder row (time_idx = L): copy features to avoid NaNs; Energy from Y_target
        dec = {
            "group_id": gid,
            "time_idx": L,
            target_name: float(Y_target[i]),
            # known time features for decoder (reuse last encoder step)
            time_cols[0]: float(df_i.iloc[-1][time_cols[0]]),
            time_cols[1]: float(df_i.iloc[-1][time_cols[1]]),
            time_cols[2]: float(df_i.iloc[-1][time_cols[2]]),
            time_cols[3]: float(df_i.iloc[-1][time_cols[3]]),
            # calendar flags at decoder: shift next -> now (simple, consistent)
            cal_cols[0]: int(df_i.iloc[-1][cal_cols[1]]),
            cal_cols[1]: int(df_i.iloc[-1][cal_cols[1]]),
        }
        for col in num_feature_names:
            dec[col] = float(df_i.iloc[-1][col])

        df_i = pd.concat([df_i, pd.DataFrame([dec])], ignore_index=True)

        # Optional dummy timestamp (ignored by TFT)
        base = pd.Timestamp("2000-01-01") + pd.to_timedelta(i, unit="D")
        df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")

        # df_i["is_synth"] = 1
        frames.append(df_i)

    synth_df = pd.concat(frames, ignore_index=True)
    front = ["Timestamp","group_id","time_idx",target_name]
    synth_df = synth_df[front + [c for c in synth_df.columns if c not in front]]
    return synth_df

In [4]:
import numpy as np
import pandas as pd
import random
import os


def TFT_training(seed_val = 42, testing_stage = True, pinball_usage = False, batch_size = 16, data_augmentation = False, fake_data_length = 0):

    from lightning.pytorch import seed_everything

    seed_everything(seed_val, workers=True)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    random.seed(seed_val)

    # --- 0) Config you already have ---
    n_past   = 48        # lookback
    n_future = 1         # horizon
    
    # --- 1) Start from your original df ---
    data = ori_data.copy()
    # Ensure timestamp is datetime and sorted
    data['Timestamp'] = pd.to_datetime(data['Timestamp'])
    data = data.sort_values('Timestamp').reset_index(drop=True)
    # --- 2) Recreate your time + categorical features (same logic as now) ---
    time_related = pd.DataFrame({'Timestamp': data['Timestamp']})
    time_related['hour_sin'] = np.sin(2 * np.pi * time_related['Timestamp'].dt.hour / 24)
    time_related['hour_cos'] = np.cos(2 * np.pi * time_related['Timestamp'].dt.hour / 24)
    time_related['dow_sin']  = np.sin(2 * np.pi * time_related['Timestamp'].dt.dayofweek / 7)
    time_related['dow_cos']  = np.cos(2 * np.pi * time_related['Timestamp'].dt.dayofweek / 7)

    tmp = data.set_index('Timestamp')
    tmp['dow']  = tmp.index.day_name().str[:3]
    tmp['hour'] = tmp.index.hour

    sun_low = tmp['dow'] == 'Sun'
    mon_low = (tmp['dow'] == 'Mon') & tmp['hour'].isin([0,1,2,3,4,5,6,7,8,9])
    wed_low = (tmp['dow'] == 'Wed') & tmp['hour'].isin([6,7,8,9,10,11,12,13,14])
    sat_low = (tmp['dow'] == 'Sat') & tmp['hour'].isin([19,20,21,22,23])

    tmp['is_low_usage'] = (sun_low | mon_low | wed_low | sat_low).astype(int)
    tmp = tmp.drop(columns=['dow','hour']).reset_index()

    # next-step flag (calendar-derived ⇒ can be treated as known future)
    tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
        
    # --- 3) Assemble the long frame for TFT ---
    # Keep ALL numeric covariates you originally had (besides Energy & Timestamp)
    # If you had extra engineered numeric features, they can stay—TFT will normalize them.
    df = tmp.merge(time_related, on='Timestamp', how='left')
    # TFT reqs: group_id (single series) + integer time_idx
    df['group_id'] = 'series_0'
    df['time_idx'] = np.arange(len(df))  # hourly regular steps
    timestamps = df['Timestamp']
    # Target
    assert 'Energy' in df.columns, "Expected target column 'Energy' in ori_data"
    target_col = 'Energy'

    # Known future vs observed:
    # - Hour/dow sin/cos + calendar flags can be computed for future ⇒ known
    known_reals = ["time_idx", "hour_sin", "hour_cos", "dow_sin", "dow_cos",
                "is_low_usage", "is_low_usage_next"]  # keep as numeric 0/1
    known_cats  = []  # empty

    # Everything else numeric (except target) we treat as observed reals by default
    exclude = set(['Timestamp','group_id','time_idx', target_col] + known_reals + known_cats)
    observed_reals = [c for c in df.columns
                    if c not in exclude and np.issubdtype(df[c].dtype, np.number)]
    unknown_reals = observed_reals  # whatever you computed before, but DO NOT include 'Energy'
    unknown_cats  = []              # if you had any observed categoricals, put them here

        
    # --- 4) Time-based splits: 80 / 5 / 5 / 10 ---
    N = len(df)
    i_train_end = int(0.80 * N) - 1
    i_val1_end  = i_train_end + int(0.05 * N)
    i_val2_end  = i_val1_end + int(0.05 * N)
    # test is the remainder

    train_df = df.iloc[:i_train_end+1].copy()
    val1_df  = df.iloc[i_train_end+1 : i_val1_end+1].copy()
    val2_df  = df.iloc[i_val1_end+1 : i_val2_end+1].copy()
    test_df  = df.iloc[i_val2_end+1 :].copy()
    timestamps_train = timestamps.iloc[:i_train_end+1].copy()
    timestamps_val1  = timestamps.iloc[i_train_end+1 : i_val1_end+1].copy()
    timestamps_val2  = timestamps.iloc[i_val1_end+1 : i_val2_end+1].copy()
    timestamps_test  = timestamps.iloc[i_val2_end+1 :].copy()
    # optional: your "testing_stage" logic
    if testing_stage:
        # fold val1 into train, use val2 for validation (matches your comment)
        train_df = pd.concat([train_df, val1_df], axis=0)
        val_df = val2_df.copy()
        timestamps = timestamps_test 
    else:
        val_df = val1_df.copy()
        timestamps = timestamps_val1



        
    # --- 5) Build TimeSeriesDataSet / DataLoaders ---
    from pytorch_forecasting import TimeSeriesDataSet
    from pytorch_forecasting.data import NaNLabelEncoder
    
    from torch.utils.data import DataLoader
    from pytorch_forecasting.metrics import QuantileLoss
    from pytorch_forecasting.models import TemporalFusionTransformer
    import lightning.pytorch as pl

    import inspect, lightning.pytorch as pl
    from pytorch_forecasting.models import TemporalFusionTransformer
    from pytorch_forecasting.data import GroupNormalizer, TorchNormalizer


    # normalize the train target, and then apply it to the rest
    from sklearn.preprocessing import StandardScaler
    target_scaler = StandardScaler()
    target_scaler.fit(train_df[["Energy"]])

    for d in (train_df, val_df, test_df):
        d["Energy"] = target_scaler.transform(d[["Energy"]])
    if data_augmentation:
        fake_data = np.load("ddpm_fake_energy_raw.npy")[:fake_data_length]
        _, seq_len, F = fake_data.shape
        X_fake = fake_data[:, :seq_len-1, :]                  # encoder
        Y_fake = fake_data[:, seq_len-1, F-1].reshape(-1, 1)  # target at decoder step

        X_num  = X_fake[:, :, :-7]     # unknown reals (sensors)
        X_lag  = X_fake[:, :, -1:]   # unknown real (lag)
        X_cat  = X_fake[:, :, -7:-5]   # (treat as unknown reals unless you set encoders)
        X_time = X_fake[:, :, -5:-1]     # known reals (hour/dow sin/cos)
        

        
        synth_df = make_synth_df(X_num, X_cat, X_time, X_lag, Y_fake, unknown_reals)
        synth_df.to_csv("synth_data.csv", index=False)
        # scale the fake target
        synth_df["Energy"] = target_scaler.transform(synth_df[["Energy"]])
        for col in train_df.columns:
            if col not in synth_df.columns:
                synth_df[col] = 0
        synth_df = synth_df[train_df.columns]
            # finally concatenate
        train_df = pd.concat([train_df, synth_df], ignore_index=True)
        train_df.to_csv("train_df.csv", index=False)


    training = TimeSeriesDataSet(
        train_df,
        time_idx="time_idx",
        target="Energy",
        group_ids=["group_id"],
        min_encoder_length=n_past,
        max_encoder_length=n_past,
        min_prediction_length=n_future,
        max_prediction_length=n_future,
        time_varying_known_categoricals=known_cats,       # []
        time_varying_known_reals=known_reals,             # includes the 0/1 flags now
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=unknown_reals,
        categorical_encoders=None,
        # target_normalizer=GroupNormalizer(groups=["group_id"]),
        target_normalizer = None,
        add_relative_time_idx=False,
        add_target_scales=False,
        add_encoder_length=False,
    )
    validation = TimeSeriesDataSet.from_dataset(
        training,
        val_df,
        min_prediction_idx=int(val_df["time_idx"].min()),   # ✅ shift by n_past
        stop_randomization=True,
    )

    testing = TimeSeriesDataSet.from_dataset(
        training,
        test_df,
        min_prediction_idx=int(test_df["time_idx"].min()),  # ✅ shift by n_past
        stop_randomization=True,
    )

    # --- 6) TFT model with QuantileLoss ---
    if pinball_usage:
        tft = TemporalFusionTransformer.from_dataset(
            training,
            hidden_size=64,
            attention_head_size=4,
            hidden_continuous_size=32,
            dropout=0.2,
            loss=QuantileLoss(quantiles=[0.05, 0.5, 0.95]),
            learning_rate=3e-4,
        )
    else:
        tft = TemporalFusionTransformer.from_dataset(
            training,
            hidden_size=64,
            attention_head_size=4,
            hidden_continuous_size=32,
            dropout=0.2,
            loss=RMSE(),
            learning_rate=3e-4,
        )

    # build dataloaders
    train_loader = training.to_dataloader(train=True,  batch_size=batch_size, shuffle=True,  num_workers=4)
    val_loader   = validation.to_dataloader(train=False, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader  = testing.to_dataloader(train=False,  batch_size=batch_size, shuffle=False,  num_workers=4)

    from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

    early_stop = EarlyStopping(
        monitor="val_loss",
        min_delta=0.0,
        patience=10,
        mode="min",
    )

    ckpt = ModelCheckpoint(
        monitor="val_loss",
        mode="min",
        save_top_k=1,
        filename="tft-{epoch:02d}-{val_loss:.4f}",
    )

    trainer = pl.Trainer(
        max_epochs=50,
        gradient_clip_val=0.1,
        accelerator="auto",
        devices="auto",
        log_every_n_steps=50,
        callbacks=[early_stop, ckpt],
    )

    trainer.fit(tft, train_dataloaders=train_loader, val_dataloaders=val_loader)

    # (optional) load best weights
    best_path = ckpt.best_model_path
    if best_path:
        tft = TemporalFusionTransformer.load_from_checkpoint(best_path)


    if testing_stage:
        test_loader = test_loader
    else:
        test_loader = val_loader

    if pinball_usage:
        pred = tft.predict(test_loader, mode="quantiles")
        pred_np = pred.detach().cpu().numpy()   # (N, horizon, n_q)

        # If horizon=1 → shape (N, 1, 3)
        p05 = pred_np[:, :, 0].squeeze(1)   # → (N,)
        p50 = pred_np[:, :, 1].squeeze(1)   # → (N,)
        p95 = pred_np[:, :, 2].squeeze(1)   # → (N,)
        p05 = p05.ravel()
        p50 = p50.ravel()
        p95 = p95.ravel()
    else:
        pred = tft.predict(test_loader)
        p50 = pred.detach().cpu().numpy().squeeze(-1)
    # pred = tft.predict(test_loader, return_y=True)   # no return_y; returns a Prediction object
    # tensors -> numpy
    ys = []
    for _, y in iter(test_loader):
        ys.append(y[0])              # take the target (ignore weights)
    y_true = torch.cat(ys, dim=0)    # shape: (N, max_prediction_length)
    y_true = y_true.detach().cpu().numpy().squeeze(-1)    # first item in the y tuple

    # metrics (aggregate all horizons; for per-horizon, compute along axis=0)
    y = y_true.ravel()
    yhat = p50
    if testing_stage:
        y = y[1:-1]
        yhat = yhat[1:-1]
        timestamps = timestamps[n_past +1:-1]
        if pinball_usage:
            p05 = p05[1:-1]
            p95 = p95[1:-1]
    else:
        y = y[:-1]
        yhat = yhat[:-1]
        timestamps = timestamps[n_past:-1]
        if pinball_usage:
            p05 = p05[:-1]
            p95 = p95[:-1]

    # Inverse scale the predictions
    if pinball_usage:
        p05 = target_scaler.inverse_transform(p05.reshape(-1, 1)).ravel()
        yhat = target_scaler.inverse_transform(yhat.reshape(-1, 1)).ravel()
        p95 = target_scaler.inverse_transform(p95.reshape(-1, 1)).ravel()
    else:
        yhat = target_scaler.inverse_transform(yhat.reshape(-1, 1)).ravel()

    y = target_scaler.inverse_transform(y.reshape(-1, 1)).ravel()
    #------------QUANTILE METRICS-----------------#
    if pinball_usage:
    # Pinball loss for quantiles
        def pinball_loss(y_true, y_pred, q):
            """
            Pinball loss for quantile q.
            y_true, y_pred must be arrays of same shape.
            """
            e = y_true - y_pred
            return np.mean(np.maximum(q*e, (q-1)*e))

        loss_q05 = pinball_loss(y, p05, 0.05)
        loss_q50 = pinball_loss(y, yhat, 0.5)
        loss_q95 = pinball_loss(y, p95, 0.95)

        # Coverage (Calibration of prediction intervals)
        def interval_coverage(y_true, y_lower, y_upper, nominal=0.90):
            """
            Computes empirical coverage of [y_lower, y_upper].
            """
            inside = (y_true >= y_lower) & (y_true <= y_upper)
            empirical = np.mean(inside)
            return empirical, empirical - nominal

        coverage_90, error_90 = interval_coverage(y, p05, p95, nominal=0.90)


        # Interval Width (Sharpness)
        def interval_width(y_lower, y_upper):
            return np.mean(y_upper - y_lower)
        

        sharpness_90 = interval_width(p05, p95)
    #------------QUANTILE METRICS END-----------------#

    # save predictions

    np.savez(f"TFT_Results/Predictions/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_predictions.npz", predictions=yhat, ground_truth=y)
    #------------Point forecast metrics-----------------#
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mae  = mean_absolute_error(y, yhat)
    mse  = mean_squared_error(y, yhat)
    rmse = np.sqrt(mean_squared_error(y, yhat))
    r2   = r2_score(y, yhat)

    eps = 1e-8
    smape = 100.0 * np.mean(2.0 * np.abs(y - yhat) / (np.abs(y) + np.abs(yhat) + eps))

    print(f"Val MAE:   {mae:.4f}")
    print(f"Val MSE:   {mse:.4f}")
    print(f"Val RMSE:  {rmse:.4f}")
    print(f"Val R^2:   {r2:.4f}")
    print(f"Val sMAPE: {smape:.2f}%")

    if not pinball_usage:
        loss_q05 = ""
        loss_q50 = ""
        loss_q95 = ""
        coverage_90 = ""
        error_90 = ""
        sharpness_90 = ""

    metrics = {
    "Name": f"TFT-standard-bs{batch_size}-aug{data_augmentation}--{seed_val}--{fake_data_length}",
    "state": "finished",
    "Notes": "-",
    "User": "",
    "Tags": "",
    "Created": "",
    "Runtime": "",
    "Sweep": "",
    "data_augmentation": data_augmentation,
    "fake_data_length": fake_data_length,
    "model_name": "TFT",
    "scaler_name": "standard",

    "seed": seed_val,
    "val_MAE": mae,
    "val_MASE": "",
    "val_MSE": mse,
    "val_R2": r2,
    "val_RMSE": rmse,
    "val_SMAPE": smape,

    "Pinball_0.05": loss_q05,
    "Pinball_0.50": loss_q50,
    "Pinball_0.95": loss_q95,

    "Coverage_0.90": coverage_90,
    "Coverage_Error_0.90": error_90,
    "Sharpness_0.90": sharpness_90,
}
    # metrics = dict(Name = f"TFT-standard-bs{batch_size}-aug{data_augmentation}--{seed_val}--{fake_data_length}", state = "finished", Notes = "-", User = "", Tags = "", Created = "", Runtime = "", Sweep = "", data_augmentation=data_augmentation, fake_data_length=fake_data_length, model_name="TFT", scaler_name="standard",
    #                seed=seed_val, val_MAE=mae, val_MASE = "", val_MSE=mse, val_R2=r2, val_RMSE=rmse, val_SMAPE=smape, Pinball_0.05=loss_q05, Pinball_0.50=loss_q50, Pinball_0.95=loss_q95, Coverage_0.90=coverage_90, Coverage_Error_0.90=error_90, Sharpness_0.90=sharpness_90)

    # --- Save metrics to CSV ---
    import csv
    if testing_stage:
        if data_augmentation:
            csv_path = os.path.join("TFT_Results/Augmentation/", f"TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_metrics.csv")
        else:
            csv_path = os.path.join("TFT_Results/Testing/", f"TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_metrics.csv")
    else:
        csv_path = os.path.join("TFT_Results/Validation/", f"TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_metrics.csv")
    
    df_metrics = pd.DataFrame([metrics])
    if not os.path.exists(csv_path):
        df_metrics.to_csv(csv_path, index=False, quoting=csv.QUOTE_ALL)
    else:
        df_metrics.to_csv(csv_path, mode="a", header=False, index=False)
    import matplotlib.pyplot as plt


    if pinball_usage:
        plt.figure(figsize=(14, 6))
        plt.plot(timestamps, y, label="Ground Truth", color='black', linewidth=2)
        plt.plot(timestamps, yhat, label="Median Prediction (0.5)", color='#0072B2', linewidth=2)
        plt.fill_between(
            timestamps, p05, p95,
            color='#0072B2', alpha=0.2, label="90% Confidence Interval (0.05–0.95)"
        )

        # Custom x-axis formatter → weekday + month-day + hour:00
        ax = plt.gca()
        ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%a %m-%d %H:%M'))

        plt.grid(alpha=0.3)
        plt.title(f"TFT Quantile Regression - Confidence Interval")
        plt.xlabel("Time")
        plt.ylabel("Energy")
        plt.legend(fontsize=12)
        plt.tight_layout()
        if testing_stage:
            if data_augmentation:
                plt.savefig(f"TFT_Results/Augmentation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
            else:
                plt.savefig(f"TFT_Results/Testing/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        else:
            plt.savefig(f"TFT_Results/Validation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        plt.close()

    else:
        plt.figure(figsize=(14,6))
        plt.plot(timestamps, y, label = "Ground truth", color = 'black', linewidth=2)
        plt.plot(timestamps, yhat, label = "Predictions", color='#0072B2', linewidth=2)
        ax = plt.gca()
        ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%a %m-%d %H:%M'))
        plt.grid(alpha=0.3)
        plt.title("TFT Predictions vs Ground Truth (Validation set)")
        plt.xlabel("Time")
        plt.ylabel("Energy")
        plt.legend(fontsize=12)
        plt.tight_layout()
        if testing_stage:
            if data_augmentation:
                plt.savefig(f"TFT_Results/Augmentation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
            else:
                plt.savefig(f"TFT_Results/Testing/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        else:
            plt.savefig(f"TFT_Results/Validation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        plt.close()


In [5]:
from itertools import product
from tqdm import tqdm

seeds = [42, 4242, 1234, 2021, 777]
fake_lengths = [200, 400, 800, 1200, 1600]
PINBALL = False  # set to True/False as needed (kept fixed to hit 5+5+25=35 total)

runs = []

# # 1) 5 runs: testing_stage=False
# for seed in seeds:
#     runs.append((seed, PINBALL, False, False, 0))   # (seed, pinball, testing_stage, data_aug, fake_len)

# # 2) 5 runs: testing_stage=True, data_augmentation=False
# for seed in seeds:
#     runs.append((seed, PINBALL, True, False, 0))

# 3) 25 runs: testing_stage=True, data_augmentation=True over 5 fake lengths
for seed, fake_len in product(seeds, fake_lengths):
    runs.append((seed, PINBALL, True, True, fake_len))

# (Optional) sanity check
assert len(runs) == 25, f"Expected 25 runs, got {len(runs)}"

# Execute with tqdm progress bar
for seed, pinball, test, aug, fake_len in tqdm(runs, desc="TFT runs", unit="run"):
    TFT_training(
        seed_val=seed,
        testing_stage=test,
        pinball_usage=pinball,
        data_augmentation=aug,
        fake_data_length=fake_len,
    )


TFT runs:   0%|          | 0/25 [00:00<?, ?run/s]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_time

Epoch 40: 100%|██████████| 124/124 [00:06<00:00, 17.86it/s, v_num=50, train_loss_step=0.137, val_loss=0.229, train_loss_epoch=0.123] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.9280
Val MSE:   209.7933
Val RMSE:  14.4842
Val R^2:   0.9387
Val sMAPE: 18.04%


TFT runs:   4%|▍         | 1/25 [05:00<2:00:07, 300.33s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base +

Epoch 38: 100%|██████████| 137/137 [00:07<00:00, 17.63it/s, v_num=52, train_loss_step=0.0951, val_loss=0.214, train_loss_epoch=0.120]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.1145
Val MSE:   162.7696
Val RMSE:  12.7581
Val R^2:   0.9524
Val sMAPE: 18.97%


TFT runs:   8%|▊         | 2/25 [10:15<1:58:31, 309.20s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base +

Epoch 39: 100%|██████████| 162/162 [00:09<00:00, 17.68it/s, v_num=54, train_loss_step=0.109, val_loss=0.212, train_loss_epoch=0.119] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.7039
Val MSE:   185.6409
Val RMSE:  13.6250
Val R^2:   0.9458
Val sMAPE: 19.18%


TFT runs:  12%|█▏        | 3/25 [16:39<2:05:53, 343.32s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base +

Epoch 49: 100%|██████████| 187/187 [00:10<00:00, 17.91it/s, v_num=56, train_loss_step=0.0913, val_loss=0.159, train_loss_epoch=0.0991]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 187/187 [00:10<00:00, 17.38it/s, v_num=56, train_loss_step=0.0913, val_loss=0.159, train_loss_epoch=0.0991]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.2051
Val MSE:   213.5208
Val RMSE:  14.6124
Val R^2:   0.9376
Val sMAPE: 17.38%


TFT runs:  16%|█▌        | 4/25 [25:49<2:28:40, 424.80s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base +

Epoch 49: 100%|██████████| 212/212 [00:11<00:00, 17.89it/s, v_num=58, train_loss_step=0.114, val_loss=0.127, train_loss_epoch=0.0928] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 212/212 [00:12<00:00, 17.19it/s, v_num=58, train_loss_step=0.114, val_loss=0.127, train_loss_epoch=0.0928]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.5050
Val MSE:   224.6333
Val RMSE:  14.9878
Val R^2:   0.9344
Val sMAPE: 17.76%


TFT runs:  20%|██        | 5/25 [36:16<2:45:54, 497.74s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 46: 100%|██████████| 124/124 [00:07<00:00, 17.07it/s, v_num=60, train_loss_step=0.0911, val_loss=0.227, train_loss_epoch=0.118]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.0487
Val MSE:   216.1705
Val RMSE:  14.7027
Val R^2:   0.9368
Val sMAPE: 19.38%


TFT runs:  24%|██▍       | 6/25 [42:03<2:21:26, 446.64s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 40: 100%|██████████| 137/137 [00:07<00:00, 17.24it/s, v_num=62, train_loss_step=0.128, val_loss=0.214, train_loss_epoch=0.117] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.6905
Val MSE:   187.9099
Val RMSE:  13.7080
Val R^2:   0.9451
Val sMAPE: 20.78%


TFT runs:  28%|██▊       | 7/25 [47:40<2:03:11, 410.66s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 41: 100%|██████████| 162/162 [00:09<00:00, 17.46it/s, v_num=64, train_loss_step=0.0867, val_loss=0.198, train_loss_epoch=0.109]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   12.9498
Val MSE:   265.6751
Val RMSE:  16.2995
Val R^2:   0.9224
Val sMAPE: 23.11%


TFT runs:  32%|███▏      | 8/25 [54:24<1:55:47, 408.68s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 44: 100%|██████████| 187/187 [00:10<00:00, 17.62it/s, v_num=66, train_loss_step=0.129, val_loss=0.222, train_loss_epoch=0.105] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.7160
Val MSE:   186.8327
Val RMSE:  13.6687
Val R^2:   0.9454
Val sMAPE: 21.68%


TFT runs:  36%|███▌      | 9/25 [1:02:46<1:56:42, 437.64s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = ba

Epoch 49: 100%|██████████| 212/212 [00:11<00:00, 17.70it/s, v_num=68, train_loss_step=0.090, val_loss=0.151, train_loss_epoch=0.0928] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 212/212 [00:12<00:00, 17.17it/s, v_num=68, train_loss_step=0.090, val_loss=0.151, train_loss_epoch=0.0928]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.2816
Val MSE:   179.8474
Val RMSE:  13.4107
Val R^2:   0.9475
Val sMAPE: 18.29%


TFT runs:  40%|████      | 10/25 [1:13:15<2:04:11, 496.73s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 35: 100%|██████████| 124/124 [00:07<00:00, 16.41it/s, v_num=70, train_loss_step=0.127, val_loss=0.264, train_loss_epoch=0.121] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.0830
Val MSE:   203.7730
Val RMSE:  14.2749
Val R^2:   0.9405
Val sMAPE: 23.21%


TFT runs:  44%|████▍     | 11/25 [1:17:44<1:39:39, 427.10s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 24: 100%|██████████| 137/137 [00:08<00:00, 17.05it/s, v_num=72, train_loss_step=0.181, val_loss=0.199, train_loss_epoch=0.137] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.5423
Val MSE:   210.9349
Val RMSE:  14.5236
Val R^2:   0.9384
Val sMAPE: 20.97%


TFT runs:  48%|████▊     | 12/25 [1:21:13<1:18:09, 360.71s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 25: 100%|██████████| 162/162 [00:09<00:00, 16.66it/s, v_num=74, train_loss_step=0.0935, val_loss=0.199, train_loss_epoch=0.131]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.5960
Val MSE:   176.8327
Val RMSE:  13.2978
Val R^2:   0.9483
Val sMAPE: 18.95%


TFT runs:  52%|█████▏    | 13/25 [1:25:30<1:05:52, 329.33s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 34: 100%|██████████| 187/187 [00:10<00:00, 17.62it/s, v_num=76, train_loss_step=0.0897, val_loss=0.177, train_loss_epoch=0.106]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1392
Val MSE:   209.4902
Val RMSE:  14.4738
Val R^2:   0.9388
Val sMAPE: 19.74%


TFT runs:  56%|█████▌    | 14/25 [1:32:03<1:03:55, 348.69s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 49: 100%|██████████| 212/212 [00:11<00:00, 17.76it/s, v_num=78, train_loss_step=0.0917, val_loss=0.130, train_loss_epoch=0.0911]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 212/212 [00:12<00:00, 17.24it/s, v_num=78, train_loss_step=0.0917, val_loss=0.130, train_loss_epoch=0.0911]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.7823
Val MSE:   242.4851
Val RMSE:  15.5719
Val R^2:   0.9292
Val sMAPE: 19.03%


TFT runs:  60%|██████    | 15/25 [1:42:34<1:12:17, 433.73s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 14: 100%|██████████| 124/124 [00:07<00:00, 17.04it/s, v_num=80, train_loss_step=0.163, val_loss=0.238, train_loss_epoch=0.171]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.3047
Val MSE:   203.4818
Val RMSE:  14.2647
Val R^2:   0.9405
Val sMAPE: 23.52%


TFT runs:  64%|██████▍   | 16/25 [1:44:30<50:42, 338.08s/run]  Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 32: 100%|██████████| 137/137 [00:08<00:00, 16.49it/s, v_num=82, train_loss_step=0.123, val_loss=0.230, train_loss_epoch=0.128] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.7797
Val MSE:   212.2236
Val RMSE:  14.5679
Val R^2:   0.9380
Val sMAPE: 32.10%


TFT runs:  68%|██████▊   | 17/25 [1:49:04<42:31, 318.89s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = bas

Epoch 42: 100%|██████████| 162/162 [00:09<00:00, 17.27it/s, v_num=84, train_loss_step=0.111, val_loss=0.170, train_loss_epoch=0.110] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.8047
Val MSE:   239.2084
Val RMSE:  15.4664
Val R^2:   0.9301
Val sMAPE: 23.91%


TFT runs:  72%|███████▏  | 18/25 [1:56:04<40:44, 349.17s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = bas

Epoch 49: 100%|██████████| 187/187 [00:10<00:00, 17.93it/s, v_num=86, train_loss_step=0.117, val_loss=0.181, train_loss_epoch=0.0954] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 187/187 [00:10<00:00, 17.39it/s, v_num=86, train_loss_step=0.117, val_loss=0.181, train_loss_epoch=0.0954]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   12.2361
Val MSE:   242.8077
Val RMSE:  15.5823
Val R^2:   0.9291
Val sMAPE: 23.16%


TFT runs:  76%|███████▌  | 19/25 [2:05:25<41:16, 412.81s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = bas

Epoch 49: 100%|██████████| 212/212 [00:11<00:00, 18.13it/s, v_num=88, train_loss_step=0.0761, val_loss=0.156, train_loss_epoch=0.0919]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 212/212 [00:12<00:00, 17.39it/s, v_num=88, train_loss_step=0.0761, val_loss=0.156, train_loss_epoch=0.0919]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.6121
Val MSE:   238.0131
Val RMSE:  15.4277
Val R^2:   0.9305
Val sMAPE: 22.24%


TFT runs:  80%|████████  | 20/25 [2:15:58<39:55, 479.02s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 34: 100%|██████████| 124/124 [00:07<00:00, 16.18it/s, v_num=90, train_loss_step=0.133, val_loss=0.207, train_loss_epoch=0.132] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.2889
Val MSE:   181.3744
Val RMSE:  13.4675
Val R^2:   0.9470
Val sMAPE: 17.62%


TFT runs:  84%|████████▍ | 21/25 [2:20:24<27:40, 415.02s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 49: 100%|██████████| 137/137 [00:08<00:00, 16.87it/s, v_num=92, train_loss_step=0.0769, val_loss=0.208, train_loss_epoch=0.112]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 137/137 [00:08<00:00, 16.39it/s, v_num=92, train_loss_step=0.0769, val_loss=0.208, train_loss_epoch=0.112]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   15.8843
Val MSE:   360.2892
Val RMSE:  18.9813
Val R^2:   0.8947
Val sMAPE: 29.95%


TFT runs:  88%|████████▊ | 22/25 [2:27:18<20:43, 414.54s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 42: 100%|██████████| 162/162 [00:09<00:00, 16.60it/s, v_num=94, train_loss_step=0.137, val_loss=0.202, train_loss_epoch=0.109] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.4777
Val MSE:   177.8429
Val RMSE:  13.3358
Val R^2:   0.9480
Val sMAPE: 23.51%


TFT runs:  92%|█████████▏| 23/25 [2:34:19<13:53, 416.55s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 44: 100%|██████████| 187/187 [00:10<00:00, 17.59it/s, v_num=96, train_loss_step=0.0939, val_loss=0.165, train_loss_epoch=0.102]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1131
Val MSE:   208.0648
Val RMSE:  14.4245
Val R^2:   0.9392
Val sMAPE: 18.44%


TFT runs:  96%|█████████▌| 24/25 [2:42:45<07:23, 443.37s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 27: 100%|██████████| 212/212 [00:12<00:00, 17.55it/s, v_num=98, train_loss_step=0.0959, val_loss=0.176, train_loss_epoch=0.112]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.7241
Val MSE:   213.8455
Val RMSE:  14.6235
Val R^2:   0.9375
Val sMAPE: 20.95%


TFT runs: 100%|██████████| 25/25 [2:48:49<00:00, 405.17s/run]
