In [1]:
import torch
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.metrics.point import MAE, MAPE, RMSE
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning_fabric/__init__.py:41: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.


In [2]:
ori_data = pd.read_csv("All_Data_Reducted.csv", sep=";")
ori_data['Timestamp'] = pd.to_datetime(ori_data['Timestamp'])
ori_data = ori_data[:-120]

In [3]:
def make_synth_df(
    X_num, X_cat, X_time, X_lag, Y_target,
    num_feature_names,                      # names (len == X_num.shape[2]) in SAME order as X_num
    target_name="Energy",
    time_cols=("hour_sin","hour_cos","dow_sin","dow_cos"),
    cal_cols=("is_low_usage","is_low_usage_next"),
):
    """
    Build synthetic mini-series:
      - encoder rows [0..L-1]: copy known time + calendar, sensors; Energy from X_lag (lag == Energy)
      - decoder row  [L]:      copy *all* encoder features (ffill one step) and set Energy from Y_target
    Shapes:
      X_num  : (N, L, D_num)
      X_cat  : (N, L, 2) -> [is_low_usage, is_low_usage_next]
      X_time : (N, L, 4) -> [hour_sin,hour_cos,dow_sin,dow_cos]
      X_lag  : (N, L, 1) -> lag == Energy at encoder rows
      Y_target: (N, 1) or (N,)
    """
    N, L, _ = X_time.shape
    assert X_time.shape[2] == 4, "X_time must have 4 columns"
    assert X_cat.shape[2]  == 2, "X_cat must have 2 columns: [is_low_usage, is_low_usage_next]"
    assert X_lag.shape     == (N, L, 1), "X_lag must be (N, L, 1)"
    if X_num.size:
        assert len(num_feature_names) == X_num.shape[2], "num_feature_names length must match X_num last dim"
    else:
        num_feature_names = []

    Y_target = np.ravel(Y_target).astype(float)

    frames = []
    for i in range(N):
        gid = f"synth_{i:05d}"

        # Encoder rows
        df_i = pd.DataFrame({
            "group_id": gid,
            "time_idx": np.arange(L, dtype=int),
            time_cols[0]: X_time[i, :, 0],
            time_cols[1]: X_time[i, :, 1],
            time_cols[2]: X_cat[i, :, 0].astype(int),   # if your X_time order is [hour_sin,hour_cos,dow_sin,dow_cos], keep next line as-is
            time_cols[3]: X_cat[i, :, 1].astype(int),   # <- remove these two lines if X_time already holds dow sin/cos; see note below
        })
        # NOTE: If your X_time contains the 4 sin/cos features already, comment out the two lines above
        # that wrote cal flags into time_cols[2:4]. Then add calendar flags separately:

        # Proper calendar flags:
        df_i[cal_cols[0]] = X_cat[i, :, 0].astype(int)
        df_i[cal_cols[1]] = X_cat[i, :, 1].astype(int)

        # If X_time already contains the 4 time features, overwrite them correctly:
        df_i[time_cols[0]] = X_time[i, :, 0]
        df_i[time_cols[1]] = X_time[i, :, 1]
        df_i[time_cols[2]] = X_time[i, :, 2]
        df_i[time_cols[3]] = X_time[i, :, 3]

        # Sensors (unknown reals)
        for j, col in enumerate(num_feature_names):
            df_i[col] = X_num[i, :, j] if X_num.size else np.nan

        # Target on encoder rows from lag (lag == Energy)
        df_i[target_name] = X_lag[i, :, 0].astype(float)

        # Decoder row (time_idx = L): copy features to avoid NaNs; Energy from Y_target
        dec = {
            "group_id": gid,
            "time_idx": L,
            target_name: float(Y_target[i]),
            # known time features for decoder (reuse last encoder step)
            time_cols[0]: float(df_i.iloc[-1][time_cols[0]]),
            time_cols[1]: float(df_i.iloc[-1][time_cols[1]]),
            time_cols[2]: float(df_i.iloc[-1][time_cols[2]]),
            time_cols[3]: float(df_i.iloc[-1][time_cols[3]]),
            # calendar flags at decoder: shift next -> now (simple, consistent)
            cal_cols[0]: int(df_i.iloc[-1][cal_cols[1]]),
            cal_cols[1]: int(df_i.iloc[-1][cal_cols[1]]),
        }
        for col in num_feature_names:
            dec[col] = float(df_i.iloc[-1][col])

        df_i = pd.concat([df_i, pd.DataFrame([dec])], ignore_index=True)

        # Optional dummy timestamp (ignored by TFT)
        base = pd.Timestamp("2000-01-01") + pd.to_timedelta(i, unit="D")
        df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")

        # df_i["is_synth"] = 1
        frames.append(df_i)

    synth_df = pd.concat(frames, ignore_index=True)
    front = ["Timestamp","group_id","time_idx",target_name]
    synth_df = synth_df[front + [c for c in synth_df.columns if c not in front]]
    return synth_df

In [4]:
import numpy as np
import pandas as pd
import random
import os


def TFT_training(seed_val = 42, testing_stage = True, pinball_usage = False, batch_size = 16, data_augmentation = False, fake_data_length = 0):

    from lightning.pytorch import seed_everything

    seed_everything(seed_val, workers=True)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    random.seed(seed_val)

    # --- 0) Config you already have ---
    n_past   = 48        # lookback
    n_future = 1         # horizon
    
    # --- 1) Start from your original df ---
    data = ori_data.copy()
    # Ensure timestamp is datetime and sorted
    data['Timestamp'] = pd.to_datetime(data['Timestamp'])
    data = data.sort_values('Timestamp').reset_index(drop=True)
    # --- 2) Recreate your time + categorical features (same logic as now) ---
    time_related = pd.DataFrame({'Timestamp': data['Timestamp']})
    time_related['hour_sin'] = np.sin(2 * np.pi * time_related['Timestamp'].dt.hour / 24)
    time_related['hour_cos'] = np.cos(2 * np.pi * time_related['Timestamp'].dt.hour / 24)
    time_related['dow_sin']  = np.sin(2 * np.pi * time_related['Timestamp'].dt.dayofweek / 7)
    time_related['dow_cos']  = np.cos(2 * np.pi * time_related['Timestamp'].dt.dayofweek / 7)

    tmp = data.set_index('Timestamp')
    tmp['dow']  = tmp.index.day_name().str[:3]
    tmp['hour'] = tmp.index.hour

    sun_low = tmp['dow'] == 'Sun'
    mon_low = (tmp['dow'] == 'Mon') & tmp['hour'].isin([0,1,2,3,4,5,6,7,8,9])
    wed_low = (tmp['dow'] == 'Wed') & tmp['hour'].isin([6,7,8,9,10,11,12,13,14])
    sat_low = (tmp['dow'] == 'Sat') & tmp['hour'].isin([19,20,21,22,23])

    tmp['is_low_usage'] = (sun_low | mon_low | wed_low | sat_low).astype(int)
    tmp = tmp.drop(columns=['dow','hour']).reset_index()

    # next-step flag (calendar-derived ⇒ can be treated as known future)
    tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
        
    # --- 3) Assemble the long frame for TFT ---
    # Keep ALL numeric covariates you originally had (besides Energy & Timestamp)
    # If you had extra engineered numeric features, they can stay—TFT will normalize them.
    df = tmp.merge(time_related, on='Timestamp', how='left')
    # TFT reqs: group_id (single series) + integer time_idx
    df['group_id'] = 'series_0'
    df['time_idx'] = np.arange(len(df))  # hourly regular steps
    timestamps = df['Timestamp']
    # Target
    assert 'Energy' in df.columns, "Expected target column 'Energy' in ori_data"
    target_col = 'Energy'

    # Known future vs observed:
    # - Hour/dow sin/cos + calendar flags can be computed for future ⇒ known
    known_reals = ["time_idx", "hour_sin", "hour_cos", "dow_sin", "dow_cos",
                "is_low_usage", "is_low_usage_next"]  # keep as numeric 0/1
    known_cats  = []  # empty

    # Everything else numeric (except target) we treat as observed reals by default
    exclude = set(['Timestamp','group_id','time_idx', target_col] + known_reals + known_cats)
    observed_reals = [c for c in df.columns
                    if c not in exclude and np.issubdtype(df[c].dtype, np.number)]
    unknown_reals = observed_reals  # whatever you computed before, but DO NOT include 'Energy'
    unknown_cats  = []              # if you had any observed categoricals, put them here

        
    # --- 4) Time-based splits: 80 / 5 / 5 / 10 ---
    N = len(df)
    i_train_end = int(0.80 * N) - 1
    i_val1_end  = i_train_end + int(0.05 * N)
    i_val2_end  = i_val1_end + int(0.05 * N)
    # test is the remainder

    train_df = df.iloc[:i_train_end+1].copy()
    val1_df  = df.iloc[i_train_end+1 : i_val1_end+1].copy()
    val2_df  = df.iloc[i_val1_end+1 : i_val2_end+1].copy()
    test_df  = df.iloc[i_val2_end+1 :].copy()
    timestamps_train = timestamps.iloc[:i_train_end+1].copy()
    timestamps_val1  = timestamps.iloc[i_train_end+1 : i_val1_end+1].copy()
    timestamps_val2  = timestamps.iloc[i_val1_end+1 : i_val2_end+1].copy()
    timestamps_test  = timestamps.iloc[i_val2_end+1 :].copy()
    # optional: your "testing_stage" logic
    if testing_stage:
        # fold val1 into train, use val2 for validation (matches your comment)
        train_df = pd.concat([train_df, val1_df], axis=0)
        val_df = val2_df.copy()
        timestamps = timestamps_test 
    else:
        val_df = val1_df.copy()
        timestamps = timestamps_val1



        
    # --- 5) Build TimeSeriesDataSet / DataLoaders ---
    from pytorch_forecasting import TimeSeriesDataSet
    from pytorch_forecasting.data import NaNLabelEncoder
    
    from torch.utils.data import DataLoader
    from pytorch_forecasting.metrics import QuantileLoss
    from pytorch_forecasting.models import TemporalFusionTransformer
    import lightning.pytorch as pl

    import inspect, lightning.pytorch as pl
    from pytorch_forecasting.models import TemporalFusionTransformer
    from pytorch_forecasting.data import GroupNormalizer, TorchNormalizer


    # normalize the train target, and then apply it to the rest
    from sklearn.preprocessing import StandardScaler
    target_scaler = StandardScaler()
    target_scaler.fit(train_df[["Energy"]])

    for d in (train_df, val_df, test_df):
        d["Energy"] = target_scaler.transform(d[["Energy"]])
    if data_augmentation:
        fake_data = np.load("ddpm_fake_energy_raw.npy")[:fake_data_length]
        _, seq_len, F = fake_data.shape
        X_fake = fake_data[:, :seq_len-1, :]                  # encoder
        Y_fake = fake_data[:, seq_len-1, F-1].reshape(-1, 1)  # target at decoder step

        X_num  = X_fake[:, :, :-7]     # unknown reals (sensors)
        X_lag  = X_fake[:, :, -1:]   # unknown real (lag)
        X_cat  = X_fake[:, :, -7:-5]   # (treat as unknown reals unless you set encoders)
        X_time = X_fake[:, :, -5:-1]     # known reals (hour/dow sin/cos)
        

        
        synth_df = make_synth_df(X_num, X_cat, X_time, X_lag, Y_fake, unknown_reals)
        synth_df.to_csv("synth_data.csv", index=False)
        # scale the fake target
        synth_df["Energy"] = target_scaler.transform(synth_df[["Energy"]])
        for col in train_df.columns:
            if col not in synth_df.columns:
                synth_df[col] = 0
        synth_df = synth_df[train_df.columns]
            # finally concatenate
        train_df = pd.concat([train_df, synth_df], ignore_index=True)
        train_df.to_csv("train_df.csv", index=False)


    training = TimeSeriesDataSet(
        train_df,
        time_idx="time_idx",
        target="Energy",
        group_ids=["group_id"],
        min_encoder_length=n_past,
        max_encoder_length=n_past,
        min_prediction_length=n_future,
        max_prediction_length=n_future,
        time_varying_known_categoricals=known_cats,       # []
        time_varying_known_reals=known_reals,             # includes the 0/1 flags now
        time_varying_unknown_categoricals=[],
        time_varying_unknown_reals=unknown_reals,
        categorical_encoders=None,
        # target_normalizer=GroupNormalizer(groups=["group_id"]),
        target_normalizer = None,
        add_relative_time_idx=False,
        add_target_scales=False,
        add_encoder_length=False,
    )
    validation = TimeSeriesDataSet.from_dataset(
        training,
        val_df,
        min_prediction_idx=int(val_df["time_idx"].min()),   # ✅ shift by n_past
        stop_randomization=True,
    )

    testing = TimeSeriesDataSet.from_dataset(
        training,
        test_df,
        min_prediction_idx=int(test_df["time_idx"].min()),  # ✅ shift by n_past
        stop_randomization=True,
    )

    # --- 6) TFT model with QuantileLoss ---
    if pinball_usage:
        tft = TemporalFusionTransformer.from_dataset(
            training,
            hidden_size=64,
            attention_head_size=4,
            hidden_continuous_size=32,
            dropout=0.2,
            loss=QuantileLoss(quantiles=[0.05, 0.5, 0.95]),
            learning_rate=3e-4,
        )
    else:
        tft = TemporalFusionTransformer.from_dataset(
            training,
            hidden_size=64,
            attention_head_size=4,
            hidden_continuous_size=32,
            dropout=0.2,
            loss=RMSE(),
            learning_rate=3e-4,
        )

    # build dataloaders
    train_loader = training.to_dataloader(train=True,  batch_size=batch_size, shuffle=True,  num_workers=4)
    val_loader   = validation.to_dataloader(train=False, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader  = testing.to_dataloader(train=False,  batch_size=batch_size, shuffle=False,  num_workers=4)

    from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint

    early_stop = EarlyStopping(
        monitor="val_loss",
        min_delta=0.0,
        patience=10,
        mode="min",
    )

    ckpt = ModelCheckpoint(
        monitor="val_loss",
        mode="min",
        save_top_k=1,
        filename="tft-{epoch:02d}-{val_loss:.4f}",
    )

    trainer = pl.Trainer(
        max_epochs=50,
        gradient_clip_val=0.1,
        accelerator="auto",
        devices="auto",
        log_every_n_steps=50,
        callbacks=[early_stop, ckpt],
    )

    trainer.fit(tft, train_dataloaders=train_loader, val_dataloaders=val_loader)

    # (optional) load best weights
    best_path = ckpt.best_model_path
    if best_path:
        tft = TemporalFusionTransformer.load_from_checkpoint(best_path)


    if testing_stage:
        test_loader = test_loader
    else:
        test_loader = val_loader

    if pinball_usage:
        pred = tft.predict(test_loader, mode="quantiles")
        pred_np = pred.detach().cpu().numpy()   # (N, horizon, n_q)

        # If horizon=1 → shape (N, 1, 3)
        p05 = pred_np[:, :, 0].squeeze(1)   # → (N,)
        p50 = pred_np[:, :, 1].squeeze(1)   # → (N,)
        p95 = pred_np[:, :, 2].squeeze(1)   # → (N,)
        p05 = p05.ravel()
        p50 = p50.ravel()
        p95 = p95.ravel()
    else:
        pred = tft.predict(test_loader)
        p50 = pred.detach().cpu().numpy().squeeze(-1)
    # pred = tft.predict(test_loader, return_y=True)   # no return_y; returns a Prediction object
    # tensors -> numpy
    ys = []
    for _, y in iter(test_loader):
        ys.append(y[0])              # take the target (ignore weights)
    y_true = torch.cat(ys, dim=0)    # shape: (N, max_prediction_length)
    y_true = y_true.detach().cpu().numpy().squeeze(-1)    # first item in the y tuple

    # metrics (aggregate all horizons; for per-horizon, compute along axis=0)
    y = y_true.ravel()
    yhat = p50
    if testing_stage:
        y = y[1:-1]
        yhat = yhat[1:-1]
        timestamps = timestamps[n_past +1:-1]
        if pinball_usage:
            p05 = p05[1:-1]
            p95 = p95[1:-1]
    else:
        y = y[:-1]
        yhat = yhat[:-1]
        timestamps = timestamps[n_past:-1]
        if pinball_usage:
            p05 = p05[:-1]
            p95 = p95[:-1]

    # Inverse scale the predictions
    if pinball_usage:
        p05 = target_scaler.inverse_transform(p05.reshape(-1, 1)).ravel()
        yhat = target_scaler.inverse_transform(yhat.reshape(-1, 1)).ravel()
        p95 = target_scaler.inverse_transform(p95.reshape(-1, 1)).ravel()
    else:
        yhat = target_scaler.inverse_transform(yhat.reshape(-1, 1)).ravel()

    y = target_scaler.inverse_transform(y.reshape(-1, 1)).ravel()
    #------------QUANTILE METRICS-----------------#
    if pinball_usage:
    # Pinball loss for quantiles
        def pinball_loss(y_true, y_pred, q):
            """
            Pinball loss for quantile q.
            y_true, y_pred must be arrays of same shape.
            """
            e = y_true - y_pred
            return np.mean(np.maximum(q*e, (q-1)*e))

        loss_q05 = pinball_loss(y, p05, 0.05)
        loss_q50 = pinball_loss(y, yhat, 0.5)
        loss_q95 = pinball_loss(y, p95, 0.95)

        # Coverage (Calibration of prediction intervals)
        def interval_coverage(y_true, y_lower, y_upper, nominal=0.90):
            """
            Computes empirical coverage of [y_lower, y_upper].
            """
            inside = (y_true >= y_lower) & (y_true <= y_upper)
            empirical = np.mean(inside)
            return empirical, empirical - nominal

        coverage_90, error_90 = interval_coverage(y, p05, p95, nominal=0.90)


        # Interval Width (Sharpness)
        def interval_width(y_lower, y_upper):
            return np.mean(y_upper - y_lower)
        

        sharpness_90 = interval_width(p05, p95)
    #------------QUANTILE METRICS END-----------------#

    # save predictions

    np.savez(f"TFT_Results/Predictions/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_predictions.npz", predictions=yhat, ground_truth=y)
    #------------Point forecast metrics-----------------#
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mae  = mean_absolute_error(y, yhat)
    mse  = mean_squared_error(y, yhat)
    rmse = np.sqrt(mean_squared_error(y, yhat))
    r2   = r2_score(y, yhat)

    eps = 1e-8
    smape = 100.0 * np.mean(2.0 * np.abs(y - yhat) / (np.abs(y) + np.abs(yhat) + eps))

    print(f"Val MAE:   {mae:.4f}")
    print(f"Val MSE:   {mse:.4f}")
    print(f"Val RMSE:  {rmse:.4f}")
    print(f"Val R^2:   {r2:.4f}")
    print(f"Val sMAPE: {smape:.2f}%")

    if not pinball_usage:
        loss_q05 = ""
        loss_q50 = ""
        loss_q95 = ""
        coverage_90 = ""
        error_90 = ""
        sharpness_90 = ""

    metrics = {
    "Name": f"TFT-standard-bs{batch_size}-aug{data_augmentation}--{seed_val}--{fake_data_length}",
    "state": "finished",
    "Notes": "-",
    "User": "",
    "Tags": "",
    "Created": "",
    "Runtime": "",
    "Sweep": "",
    "data_augmentation": data_augmentation,
    "fake_data_length": fake_data_length,
    "model_name": "TFT",
    "scaler_name": "standard",

    "seed": seed_val,
    "val_MAE": mae,
    "val_MASE": "",
    "val_MSE": mse,
    "val_R2": r2,
    "val_RMSE": rmse,
    "val_SMAPE": smape,

    "Pinball_0.05": loss_q05,
    "Pinball_0.50": loss_q50,
    "Pinball_0.95": loss_q95,

    "Coverage_0.90": coverage_90,
    "Coverage_Error_0.90": error_90,
    "Sharpness_0.90": sharpness_90,
}
    # metrics = dict(Name = f"TFT-standard-bs{batch_size}-aug{data_augmentation}--{seed_val}--{fake_data_length}", state = "finished", Notes = "-", User = "", Tags = "", Created = "", Runtime = "", Sweep = "", data_augmentation=data_augmentation, fake_data_length=fake_data_length, model_name="TFT", scaler_name="standard",
    #                seed=seed_val, val_MAE=mae, val_MASE = "", val_MSE=mse, val_R2=r2, val_RMSE=rmse, val_SMAPE=smape, Pinball_0.05=loss_q05, Pinball_0.50=loss_q50, Pinball_0.95=loss_q95, Coverage_0.90=coverage_90, Coverage_Error_0.90=error_90, Sharpness_0.90=sharpness_90)

    # --- Save metrics to CSV ---
    import csv
    if testing_stage:
        if data_augmentation:
            csv_path = os.path.join("TFT_Results/Augmentation/", f"TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_metrics.csv")
        else:
            csv_path = os.path.join("TFT_Results/Testing/", f"TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_metrics.csv")
    else:
        csv_path = os.path.join("TFT_Results/Validation/", f"TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_metrics.csv")
    
    df_metrics = pd.DataFrame([metrics])
    if not os.path.exists(csv_path):
        df_metrics.to_csv(csv_path, index=False, quoting=csv.QUOTE_ALL)
    else:
        df_metrics.to_csv(csv_path, mode="a", header=False, index=False)
    import matplotlib.pyplot as plt


    if pinball_usage:
        plt.figure(figsize=(14, 6))
        plt.plot(timestamps, y, label="Ground Truth", color='black', linewidth=2)
        plt.plot(timestamps, yhat, label="Median Prediction (0.5)", color='#0072B2', linewidth=2)
        plt.fill_between(
            timestamps, p05, p95,
            color='#0072B2', alpha=0.2, label="90% Confidence Interval (0.05–0.95)"
        )

        # Custom x-axis formatter → weekday + month-day + hour:00
        ax = plt.gca()
        ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%a %m-%d %H:%M'))

        plt.grid(alpha=0.3)
        plt.title(f"TFT Quantile Regression - Confidence Interval")
        plt.xlabel("Time")
        plt.ylabel("Energy")
        plt.legend(fontsize=12)
        plt.tight_layout()
        if testing_stage:
            if data_augmentation:
                plt.savefig(f"TFT_Results/Augmentation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
            else:
                plt.savefig(f"TFT_Results/Testing/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        else:
            plt.savefig(f"TFT_Results/Validation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        plt.close()

    else:
        plt.figure(figsize=(14,6))
        plt.plot(timestamps, y, label = "Ground truth", color = 'black', linewidth=2)
        plt.plot(timestamps, yhat, label = "Predictions", color='#0072B2', linewidth=2)
        ax = plt.gca()
        ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%a %m-%d %H:%M'))
        plt.grid(alpha=0.3)
        plt.title("TFT Predictions vs Ground Truth (Validation set)")
        plt.xlabel("Time")
        plt.ylabel("Energy")
        plt.legend(fontsize=12)
        plt.tight_layout()
        if testing_stage:
            if data_augmentation:
                plt.savefig(f"TFT_Results/Augmentation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
            else:
                plt.savefig(f"TFT_Results/Testing/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        else:
            plt.savefig(f"TFT_Results/Validation/TFT_{seed_val}_testing{testing_stage}_pinball{pinball_usage}_aug{data_augmentation}_fake_data_length{fake_data_length}_plot.png", dpi=300)
        plt.close()


In [5]:
from itertools import product
from tqdm import tqdm

seeds = [42, 4242, 1234, 2021, 777]
fake_lengths = [200, 400, 800, 1200, 1600]
PINBALL = True  # set to True/False as needed (kept fixed to hit 5+5+25=35 total)

runs = []

# 1) 5 runs: testing_stage=False
for seed in seeds:
    runs.append((seed, PINBALL, False, False, 0))   # (seed, pinball, testing_stage, data_aug, fake_len)

# 2) 5 runs: testing_stage=True, data_augmentation=False
for seed in seeds:
    runs.append((seed, PINBALL, True, False, 0))

# 3) 25 runs: testing_stage=True, data_augmentation=True over 5 fake lengths
for seed, fake_len in product(seeds, fake_lengths):
    runs.append((seed, PINBALL, True, True, fake_len))

# (Optional) sanity check
assert len(runs) == 35, f"Expected 35 runs, got {len(runs)}"

# Execute with tqdm progress bar
for seed, pinball, test, aug, fake_len in tqdm(runs, desc="TFT runs", unit="run"):
    TFT_training(
        seed_val=seed,
        testing_stage=test,
        pinball_usage=pinball,
        data_augmentation=aug,
        fake_data_length=fake_len,
    )


TFT runs:   0%|          | 0/35 [00:00<?, ?run/s]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3080')

Epoch 22: 100%|██████████| 105/105 [00:06<00:00, 16.16it/s, v_num=939, train_loss_step=0.0819, val_loss=0.0876, train_loss_epoch=0.0653]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.6826
Val MSE:   172.4134
Val RMSE:  13.1306
Val R^2:   0.9624
Val sMAPE: 24.06%


TFT runs:   3%|▎         | 1/35 [02:29<1:24:37, 149.35s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 22: 100%|██████████| 105/105 [00:06<00:00, 16.16it/s, v_num=941, train_loss_step=0.0443, val_loss=0.120, train_loss_epoch=0.0631] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.2794
Val MSE:   164.4219
Val RMSE:  12.8227
Val R^2:   0.9641
Val sMAPE: 23.70%


TFT runs:   6%|▌         | 2/35 [04:56<1:21:16, 147.77s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 13: 100%|██████████| 105/105 [00:06<00:00, 16.78it/s, v_num=943, train_loss_step=0.0562, val_loss=0.106, train_loss_epoch=0.0705]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.1783
Val MSE:   181.9862
Val RMSE:  13.4902
Val R^2:   0.9603
Val sMAPE: 23.86%


TFT runs:   9%|▊         | 3/35 [06:26<1:04:49, 121.55s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 14: 100%|██████████| 105/105 [00:06<00:00, 16.05it/s, v_num=945, train_loss_step=0.0682, val_loss=0.155, train_loss_epoch=0.0726] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.4159
Val MSE:   211.4402
Val RMSE:  14.5410
Val R^2:   0.9539
Val sMAPE: 24.92%


TFT runs:  11%|█▏        | 4/35 [08:03<57:51, 111.98s/run]  Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Epoch 16: 100%|██████████| 105/105 [00:06<00:00, 16.58it/s, v_num=947, train_loss_step=0.0452, val_loss=0.146, train_loss_epoch=0.0682] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.4932
Val MSE:   218.2715
Val RMSE:  14.7740
Val R^2:   0.9524
Val sMAPE: 25.39%


TFT runs:  14%|█▍        | 5/35 [09:54<55:42, 111.41s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   

Epoch 39: 100%|██████████| 112/112 [00:06<00:00, 16.79it/s, v_num=949, train_loss_step=0.0605, val_loss=0.0842, train_loss_epoch=0.0539]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.3026
Val MSE:   208.2715
Val RMSE:  14.4316
Val R^2:   0.9391
Val sMAPE: 22.16%


TFT runs:  17%|█▋        | 6/35 [14:26<1:20:21, 166.27s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 14: 100%|██████████| 112/112 [00:06<00:00, 16.88it/s, v_num=951, train_loss_step=0.0643, val_loss=0.112, train_loss_epoch=0.0728]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1179
Val MSE:   199.1878
Val RMSE:  14.1134
Val R^2:   0.9418
Val sMAPE: 23.60%


TFT runs:  20%|██        | 7/35 [16:10<1:08:05, 145.93s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 40: 100%|██████████| 112/112 [00:06<00:00, 16.90it/s, v_num=953, train_loss_step=0.0458, val_loss=0.0941, train_loss_epoch=0.0549]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   13.0517
Val MSE:   251.0366
Val RMSE:  15.8441
Val R^2:   0.9267
Val sMAPE: 34.32%


TFT runs:  23%|██▎       | 8/35 [20:49<1:24:43, 188.28s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 18: 100%|██████████| 112/112 [00:06<00:00, 16.65it/s, v_num=955, train_loss_step=0.0706, val_loss=0.106, train_loss_epoch=0.0687]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.2204
Val MSE:   182.5686
Val RMSE:  13.5118
Val R^2:   0.9467
Val sMAPE: 17.75%


TFT runs:  26%|██▌       | 9/35 [23:01<1:13:54, 170.56s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/home/stelios-pc/anaconda3/envs/pytorch/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Epoch 16: 100%|██████████| 112/112 [00:06<00:00, 16.70it/s, v_num=957, train_loss_step=0.0743, val_loss=0.100, train_loss_epoch=0.072]  


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.5136
Val MSE:   199.7730
Val RMSE:  14.1341
Val R^2:   0.9416
Val sMAPE: 17.81%


TFT runs:  29%|██▊       | 10/35 [25:00<1:04:24, 154.59s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base 

Epoch 32: 100%|██████████| 124/124 [00:07<00:00, 16.42it/s, v_num=959, train_loss_step=0.059, val_loss=0.0941, train_loss_epoch=0.0581] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.3465
Val MSE:   173.6027
Val RMSE:  13.1758
Val R^2:   0.9493
Val sMAPE: 18.19%


TFT runs:  31%|███▏      | 11/35 [29:11<1:13:39, 184.15s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base 

Epoch 44: 100%|██████████| 137/137 [00:08<00:00, 16.27it/s, v_num=961, train_loss_step=0.0387, val_loss=0.0905, train_loss_epoch=0.0492]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.8080
Val MSE:   188.0002
Val RMSE:  13.7113
Val R^2:   0.9451
Val sMAPE: 21.39%


TFT runs:  34%|███▍      | 12/35 [35:29<1:33:09, 243.02s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base 

Epoch 49: 100%|██████████| 162/162 [00:09<00:00, 17.44it/s, v_num=963, train_loss_step=0.0486, val_loss=0.0733, train_loss_epoch=0.0428]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 162/162 [00:09<00:00, 16.94it/s, v_num=963, train_loss_step=0.0486, val_loss=0.0733, train_loss_epoch=0.0428]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.8670
Val MSE:   197.9862
Val RMSE:  14.0708
Val R^2:   0.9422
Val sMAPE: 21.36%


TFT runs:  37%|███▋      | 13/35 [43:44<1:57:06, 319.38s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base 

Epoch 49: 100%|██████████| 187/187 [00:11<00:00, 16.65it/s, v_num=965, train_loss_step=0.0441, val_loss=0.0751, train_loss_epoch=0.0406]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 187/187 [00:11<00:00, 16.16it/s, v_num=965, train_loss_step=0.0441, val_loss=0.0751, train_loss_epoch=0.0406]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.0286
Val MSE:   172.2559
Val RMSE:  13.1246
Val R^2:   0.9497
Val sMAPE: 17.67%


TFT runs:  40%|████      | 14/35 [53:39<2:20:55, 402.64s/run]Seed set to 42
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base 

Epoch 36: 100%|██████████| 212/212 [00:12<00:00, 16.52it/s, v_num=967, train_loss_step=0.0401, val_loss=0.0791, train_loss_epoch=0.0436]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.0982
Val MSE:   207.9977
Val RMSE:  14.4221
Val R^2:   0.9392
Val sMAPE: 19.29%


TFT runs:  43%|████▎     | 15/35 [1:01:56<2:23:42, 431.12s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 27: 100%|██████████| 124/124 [00:07<00:00, 16.34it/s, v_num=969, train_loss_step=0.0809, val_loss=0.0963, train_loss_epoch=0.0611]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1128
Val MSE:   192.9869
Val RMSE:  13.8920
Val R^2:   0.9436
Val sMAPE: 23.66%


TFT runs:  46%|████▌     | 16/35 [1:05:40<1:56:47, 368.80s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 36: 100%|██████████| 137/137 [00:08<00:00, 15.71it/s, v_num=971, train_loss_step=0.0536, val_loss=0.0917, train_loss_epoch=0.0503]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.2112
Val MSE:   199.2022
Val RMSE:  14.1139
Val R^2:   0.9418
Val sMAPE: 26.45%


TFT runs:  49%|████▊     | 17/35 [1:11:03<1:46:31, 355.07s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 24: 100%|██████████| 162/162 [00:10<00:00, 15.96it/s, v_num=973, train_loss_step=0.0764, val_loss=0.106, train_loss_epoch=0.0543] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1574
Val MSE:   201.3696
Val RMSE:  14.1905
Val R^2:   0.9412
Val sMAPE: 20.35%


TFT runs:  51%|█████▏    | 18/35 [1:15:25<1:32:42, 327.19s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 38: 100%|██████████| 187/187 [00:11<00:00, 16.03it/s, v_num=975, train_loss_step=0.036, val_loss=0.0799, train_loss_epoch=0.0446] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.5059
Val MSE:   226.2428
Val RMSE:  15.0414
Val R^2:   0.9339
Val sMAPE: 21.30%


TFT runs:  54%|█████▍    | 19/35 [1:23:12<1:38:22, 368.93s/run]Seed set to 4242
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 49: 100%|██████████| 212/212 [00:12<00:00, 16.59it/s, v_num=977, train_loss_step=0.0345, val_loss=0.0717, train_loss_epoch=0.0381]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 212/212 [00:13<00:00, 16.10it/s, v_num=977, train_loss_step=0.0345, val_loss=0.0717, train_loss_epoch=0.0381]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   12.3668
Val MSE:   247.0423
Val RMSE:  15.7176
Val R^2:   0.9278
Val sMAPE: 24.39%


TFT runs:  57%|█████▋    | 20/35 [1:34:21<1:54:45, 459.03s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 26: 100%|██████████| 124/124 [00:07<00:00, 16.15it/s, v_num=979, train_loss_step=0.0554, val_loss=0.0984, train_loss_epoch=0.0581]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   13.0591
Val MSE:   281.5197
Val RMSE:  16.7786
Val R^2:   0.9177
Val sMAPE: 27.29%


TFT runs:  60%|██████    | 21/35 [1:37:59<1:30:14, 386.72s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 21: 100%|██████████| 137/137 [00:08<00:00, 15.94it/s, v_num=981, train_loss_step=0.0717, val_loss=0.0972, train_loss_epoch=0.0615]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.0557
Val MSE:   186.5197
Val RMSE:  13.6572
Val R^2:   0.9455
Val sMAPE: 20.63%


TFT runs:  63%|██████▎   | 22/35 [1:41:17<1:11:31, 330.08s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 39: 100%|██████████| 162/162 [00:10<00:00, 15.75it/s, v_num=983, train_loss_step=0.066, val_loss=0.0805, train_loss_epoch=0.0467] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   12.1252
Val MSE:   251.8441
Val RMSE:  15.8696
Val R^2:   0.9264
Val sMAPE: 25.40%


TFT runs:  66%|██████▌   | 23/35 [1:48:13<1:11:12, 356.03s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 36: 100%|██████████| 187/187 [00:11<00:00, 16.07it/s, v_num=985, train_loss_step=0.0356, val_loss=0.0698, train_loss_epoch=0.045] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.1389
Val MSE:   173.1833
Val RMSE:  13.1599
Val R^2:   0.9494
Val sMAPE: 17.08%


TFT runs:  69%|██████▊   | 24/35 [1:55:33<1:09:53, 381.23s/run]Seed set to 1234
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 37: 100%|██████████| 212/212 [00:13<00:00, 16.13it/s, v_num=987, train_loss_step=0.0486, val_loss=0.0748, train_loss_epoch=0.042] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.3315
Val MSE:   228.6588
Val RMSE:  15.1215
Val R^2:   0.9332
Val sMAPE: 20.68%


TFT runs:  71%|███████▏  | 25/35 [2:04:06<1:10:06, 420.64s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 23: 100%|██████████| 124/124 [00:08<00:00, 15.43it/s, v_num=989, train_loss_step=0.0671, val_loss=0.105, train_loss_epoch=0.0628] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1810
Val MSE:   198.7445
Val RMSE:  14.0977
Val R^2:   0.9419
Val sMAPE: 26.20%


TFT runs:  74%|███████▍  | 26/35 [2:07:20<52:54, 352.69s/run]  Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = b

Epoch 17: 100%|██████████| 137/137 [00:08<00:00, 16.54it/s, v_num=991, train_loss_step=0.0683, val_loss=0.0943, train_loss_epoch=0.066] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.7124
Val MSE:   229.7738
Val RMSE:  15.1583
Val R^2:   0.9329
Val sMAPE: 21.27%


TFT runs:  77%|███████▋  | 27/35 [2:10:02<39:23, 295.47s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = bas

Epoch 44: 100%|██████████| 162/162 [00:10<00:00, 16.14it/s, v_num=993, train_loss_step=0.0486, val_loss=0.083, train_loss_epoch=0.0433] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.3254
Val MSE:   228.2246
Val RMSE:  15.1071
Val R^2:   0.9333
Val sMAPE: 20.76%


TFT runs:  80%|████████  | 28/35 [2:17:49<40:28, 346.93s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = bas

Epoch 44: 100%|██████████| 187/187 [00:11<00:00, 16.38it/s, v_num=995, train_loss_step=0.0548, val_loss=0.0692, train_loss_epoch=0.0416]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.6084
Val MSE:   230.3382
Val RMSE:  15.1769
Val R^2:   0.9327
Val sMAPE: 21.69%


TFT runs:  83%|████████▎ | 29/35 [2:26:43<40:18, 403.10s/run]Seed set to 2021
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = bas

Epoch 34: 100%|██████████| 212/212 [00:13<00:00, 16.10it/s, v_num=997, train_loss_step=0.0577, val_loss=0.0758, train_loss_epoch=0.0443]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.7530
Val MSE:   192.1686
Val RMSE:  13.8625
Val R^2:   0.9439
Val sMAPE: 20.35%


TFT runs:  86%|████████▌ | 30/35 [2:34:39<35:24, 424.89s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 41: 100%|██████████| 124/124 [00:07<00:00, 15.57it/s, v_num=999, train_loss_step=0.0389, val_loss=0.0955, train_loss_epoch=0.0502]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.1684
Val MSE:   167.0333
Val RMSE:  12.9241
Val R^2:   0.9512
Val sMAPE: 19.41%


TFT runs:  89%|████████▊ | 31/35 [2:40:16<26:34, 398.64s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 33: 100%|██████████| 137/137 [00:08<00:00, 15.90it/s, v_num=1001, train_loss_step=0.0428, val_loss=0.0928, train_loss_epoch=0.053] 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   10.6339
Val MSE:   177.9527
Val RMSE:  13.3399
Val R^2:   0.9480
Val sMAPE: 20.00%


TFT runs:  91%|█████████▏| 32/35 [2:45:27<18:36, 372.19s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 44: 100%|██████████| 162/162 [00:10<00:00, 15.34it/s, v_num=1003, train_loss_step=0.0486, val_loss=0.0791, train_loss_epoch=0.0439]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.9967
Val MSE:   240.4708
Val RMSE:  15.5071
Val R^2:   0.9297
Val sMAPE: 20.67%


TFT runs:  94%|█████████▍| 33/35 [2:53:32<13:32, 406.02s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 25: 100%|██████████| 187/187 [00:11<00:00, 16.25it/s, v_num=1005, train_loss_step=0.0357, val_loss=0.0851, train_loss_epoch=0.0514]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.3905
Val MSE:   208.2773
Val RMSE:  14.4318
Val R^2:   0.9391
Val sMAPE: 24.38%


TFT runs:  97%|█████████▋| 34/35 [2:59:01<06:22, 382.95s/run]Seed set to 777
  tmp['is_low_usage_next'] = tmp['is_low_usage'].shift(-1).fillna(method='ffill').astype(int)
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base + pd.to_timedelta(df_i["time_idx"], unit="H")
  df_i["Timestamp"] = base

Epoch 49: 100%|██████████| 212/212 [00:13<00:00, 15.28it/s, v_num=1007, train_loss_step=0.0295, val_loss=0.0704, train_loss_epoch=0.038] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 212/212 [00:14<00:00, 14.86it/s, v_num=1007, train_loss_step=0.0295, val_loss=0.0704, train_loss_epoch=0.038]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Val MAE:   11.1525
Val MSE:   200.5675
Val RMSE:  14.1622
Val R^2:   0.9414
Val sMAPE: 17.98%


TFT runs: 100%|██████████| 35/35 [3:11:00<00:00, 327.43s/run]
