Transformer Forecasting

1. Install Dependencies

In [1]:
!pip install neuralforecast pytorch-lightning
!pip install neuralforecast neuralforecast[timesnet] transformers





2. Load & Prepare Data

In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv("../dataset/combined_with_cluster_feature.csv")

# Convert 'time' column to datetime format
df['ds'] = pd.to_datetime(df['time'])

# Rename relevant columns for NeuralForecast
df = df.rename(columns={
    'down': 'y',
    'Cluster_Feature': 'slice_label'
})

# Add a unique_id (can be 'District' if you treat districts as stations)
df['unique_id'] = df['District'].astype(str)

# Retain required columns
df = df[['unique_id', 'ds', 'y', 'slice_label']]


  from pandas.core import (


3. Import Transformer Models

In [3]:
!pip install --upgrade typing_extensions



In [5]:
from neuralforecast import NeuralForecast
from neuralforecast.models import TimesNet, Informer
from neuralforecast.losses.pytorch import MAE


In [4]:
import torch
print(torch.cuda.is_available())
print(torch.__version__)
print(torch.version.cuda)


True
2.5.1+cu121
12.1


4. Define the Transformer Models

TimesNet

In [6]:
model_tn = TimesNet(
    h=6,                      # Predict next 6 steps
    input_size=12,           # Look-back window of 12 time steps
    hidden_size=64,
    conv_hidden_size=32,
    num_kernels=4,
    encoder_layers=2,
    loss=MAE(),
    max_steps=500,
    batch_size=32,
    # GPU SETTINGS
    accelerator='gpu',
    devices=1,
)


Seed set to 1


Informer

In [7]:
model_if = Informer(
    h=6,
    input_size=12,
    hidden_size=64,
    n_head=4,
    factor=3,
    encoder_layers=2,
    decoder_layers=1,
    dropout=0.05,
    distil=True,
    loss=MAE(),
    max_steps=500,
    batch_size=32,
    # GPU SETTINGS
    accelerator='gpu',
    devices=1,
)


Seed set to 1


5. Fit the Models

In [8]:
nf = NeuralForecast(
    models=[model_tn, model_if],
    freq='5T'  # Change as per actual granularity
)

# Fit models
nf.fit(
    df=df,
    id_col='unique_id',
    time_col='ds',
    target_col='y'
)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type          | Params | Mode 
---------------------------------------------------------
0 | loss           | MAE           | 0      | train
1 | padder_train   | ConstantPad1d | 0      | train
2 | scaler         | TemporalNorm  | 0      | train
3 | model          | ModuleList    | 688 K  | train
4 | enc_embedding  | DataEmbedding | 192    | train
5 | layer_norm     | LayerNorm     | 128    | train
6 | predict_linear | Linear        | 234    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | enc_embedding | DataEmbedding | 192    | train
4 | dec_embedding | DataEmbedding | 192    | train
5 | encoder       | TransEncoder  | 54.8 K | train
6 | decoder       | TransDecoder  | 38.0 K | train
--------------------------------------------------------
93.2 K    Trainable params
0         Non-trainable params
93.2 K    Total params
0.373     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.


6. Generate Predictions

In [None]:
pred_df = nf.predict()


  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [None]:
print(pred_df.columns)


Index(['unique_id', 'ds', 'TimesNet', 'Informer'], dtype='object')


In [None]:
# Ensure datetime format consistency
df['ds'] = pd.to_datetime(df['ds'])
pred_df['ds'] = pd.to_datetime(pred_df['ds'])

# If rounding needed to match granularity
df['ds'] = df['ds'].dt.round('1min')
pred_df['ds'] = pred_df['ds'].dt.round('1min')


In [None]:
print("DF unique_ids:", df['unique_id'].unique())
print("Pred unique_ids:", pred_df['unique_id'].unique())


DF unique_ids: ['ElBorn' 'LesCorts' 'PobleSec']
Pred unique_ids: ['ElBorn' 'LesCorts' 'PobleSec']


In [None]:
# Merge and drop missing predictions
merged = df.merge(pred_df, on=['unique_id', 'ds'], how='inner')
merged = merged.dropna(subset=['TimesNet'])  # or 'Informer'

print(f"Merged rows: {len(merged)}")


Merged rows: 0


7. Evaluate Predictions

In [21]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Evaluate if data exists
if len(merged) > 0:
    mae_tn = mean_absolute_error(merged['y'], merged['TimesNet'])
    rmse_tn = np.sqrt(mean_squared_error(merged['y'], merged['TimesNet']))
    print(f"TimesNet MAE: {mae_tn:.2f}, RMSE: {rmse_tn:.2f}")
else:
    print("No overlapping timestamps between actual and predicted data.")


No overlapping timestamps between actual and predicted data.


8. Per-Slice Evaluation

In [24]:
from sklearn.metrics import mean_absolute_error

df['ds'] = pd.to_datetime(df['ds'])
pred_df['ds'] = pd.to_datetime(pred_df['ds'])

# Attach slice_label to prediction
pred_df = pred_df.merge(df[['unique_id', 'ds', 'slice_label']], on=['unique_id', 'ds'], how='left')

# Now merge original data and predictions (including slice_label)
merged = df.merge(pred_df, on=['unique_id', 'ds'], how='inner')

if merged.empty:
    print("No overlapping timestamps between actual and predicted data.")
    print("df sample:", df[['unique_id', 'ds']].head())
    print("pred_df sample:", pred_df[['unique_id', 'ds']].head())
else:
    for sl in merged['slice_label'].unique():
        subset = merged[merged['slice_label'] == sl]
        print(f"\nSlice {sl}:")
        print(f"TimesNet MAE: {mean_absolute_error(subset['y'], subset['TimesNet']):.2f}")
        print(f"Informer MAE: {mean_absolute_error(subset['y'], subset['Informer']):.2f}")

No overlapping timestamps between actual and predicted data.
df sample:   unique_id                  ds
0    ElBorn 2018-03-28 15:56:00
1    ElBorn 2018-03-28 15:58:00
2    ElBorn 2018-03-28 16:00:00
3    ElBorn 2018-03-28 16:02:00
4    ElBorn 2018-03-28 16:04:00
pred_df sample:   unique_id                  ds
0    ElBorn 2018-04-03 11:43:00
1    ElBorn 2018-04-03 11:48:00
2    ElBorn 2018-04-03 11:53:00
3    ElBorn 2018-04-03 11:58:00
4    ElBorn 2018-04-03 12:03:00


 Informer work with slice-specific forecasting

Approach 1: Per-Slice Informer Training (Loop over Cluster Labels)


    - Train a separate Informer model for each pseudo-slice (slice_label). This mimics slice-aware modeling.

In [41]:
import pandas as pd
from neuralforecast import NeuralForecast
from neuralforecast.models import Informer
from neuralforecast.losses.pytorch import MAE

# Load and prepare the dataset
df = pd.read_csv("../dataset/combined_with_cluster_feature.csv")
df['ds'] = pd.to_datetime(df['time'])
df = df.rename(columns={'down': 'y', 'District': 'unique_id', 'Cluster_Feature': 'slice_label'})
df = df[['unique_id', 'ds', 'y', 'slice_label']]

results = []

# Loop over each slice_label (pseudo-slice cluster)
for slice_val in df['slice_label'].unique():
    print(f"\nTraining Informer model for slice {slice_val}...")
    
    df_slice = df[df['slice_label'] == slice_val]

    # Define Informer model
    model_if = Informer(
        h=6,
        input_size=12,
        hidden_size=64,
        n_head=4,
        factor=3,
        encoder_layers=2,
        decoder_layers=1,
        dropout=0.05,
        distil=True,
        loss=MAE(),
        max_steps=300,
        batch_size=32,
    )

    nf = NeuralForecast(
        models=[model_if],
        freq='5T'
    )

    # Fit model
    nf.fit(
        df=df_slice,
        id_col='unique_id',
        time_col='ds',
        target_col='y'
    )

    # Predict
    pred = nf.predict()
    pred['slice_label'] = slice_val
    results.append(pred)

# Combine results from all slices
final_preds = pd.concat(results)
final_preds.to_csv("../dataset/predictions_informer_slice_aware.csv", index=False)


Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Training Informer model for slice 0...



  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | enc_embedding | DataEmbedding | 192    | train
4 | dec_embedding | DataEmbedding | 192    | train
5 | encoder       | TransEncoder  | 54.8 K | train
6 | decoder       | TransDecoder  | 38.0 K | train
--------------------------------------------------------
93.2 K    Trainable params
0         Non-trainable params
93.2 K    Total params
0.373     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=300` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | enc_embedding | DataEmbedding | 192    | train
4 | dec_embedding | DataEmbedding | 192    | train
5 | encoder       | TransEncoder  | 54.8 K | train
6 | decoder       | TransDecoder  | 38.0 K | train
--------------------------------------------------------
93.2 K    Trainable params
0         Non-trainable params
93.2 K    Total params
0.373     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode



Training Informer model for slice 1...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=300` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type          | Params | Mode 
--------------------------------------------------------
0 | loss          | MAE           | 0      | train
1 | padder_train  | ConstantPad1d | 0      | train
2 | scaler        | TemporalNorm  | 0      | train
3 | enc_embedding | DataEmbedding | 192    | train
4 | dec_embedding | DataEmbedding | 192    | train
5 | encoder       | TransEncoder  | 54.8 K | train
6 | decoder       | TransDecoder  | 38.0 K | train
--------------------------------------------------------
93.2 K    Trainable params
0         Non-trainable params
93.2 K    Total params
0.373     Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode



Training Informer model for slice 2...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=300` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Approach 2: Use NHITS

    - use the same model for all slices, but still provide slice_label as a feature

In [42]:
from neuralforecast import NeuralForecast
from neuralforecast.models import NHITS
from neuralforecast.losses.pytorch import MAE

# Make sure 'slice_label' is present for every row in df
# df should already have columns: ['unique_id', 'ds', 'y', 'slice_label']

model_nhits = NHITS(
    h=6,                      # Forecast horizon
    input_size=12,            # Look-back window
    max_steps=500,
    batch_size=32,
    hist_exog_list=['slice_label'],  # Add slice_label as historical exogenous
    loss=MAE()
)

nf = NeuralForecast(
    models=[model_nhits],
    freq='5T'
)

nf.fit(
    df=df,
    id_col='unique_id',
    time_col='ds',
    target_col='y'
)

pred_nhits = nf.predict()
pred_nhits.to_csv("../dataset/predictions_nhits_with_slice.csv", index=False)

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.654     Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [43]:
from neuralforecast import NeuralForecast
from neuralforecast.models import TimesNet
from neuralforecast.losses.pytorch import MAE

results = []

for sl in df['slice_label'].unique():
    print(f"\nTraining TimesNet model for slice {sl}...")
    df_slice = df[df['slice_label'] == sl]

    model_tn = TimesNet(
        h=6,
        input_size=12,
        hidden_size=64,
        conv_hidden_size=32,
        num_kernels=4,
        encoder_layers=2,
        loss=MAE(),
        max_steps=500,
        batch_size=32
    )

    nf = NeuralForecast(
        models=[model_tn],
        freq='5T'
    )

    nf.fit(
        df=df_slice,
        id_col='unique_id',
        time_col='ds',
        target_col='y'
    )

    pred = nf.predict()
    pred['slice_label'] = sl
    results.append(pred)

# Combine predictions from all slices
final_preds = pd.concat(results)
final_preds.to_csv("../dataset/predictions_timesnet_slice_aware.csv", index=False)

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Training TimesNet model for slice 0...



  | Name           | Type          | Params | Mode 
---------------------------------------------------------
0 | loss           | MAE           | 0      | train
1 | padder_train   | ConstantPad1d | 0      | train
2 | scaler         | TemporalNorm  | 0      | train
3 | model          | ModuleList    | 688 K  | train
4 | enc_embedding  | DataEmbedding | 192    | train
5 | layer_norm     | LayerNorm     | 128    | train
6 | predict_linear | Linear        | 234    | train
7 | projection     | Linear        | 65     | train
---------------------------------------------------------
689 K     Trainable params
0         Non-trainable params
689 K     Total params
2.758     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type          | Params | Mode 
---------------------------------------------------------
0 | loss           | MAE           | 0      | train
1 | padder_train   | ConstantPad1d | 0      | train
2 | scaler         | TemporalNorm  | 0      | train
3 | model          | ModuleList    | 688 K  | train
4 | enc_embedding  | DataEmbedding | 192    | train
5 | layer_norm     | LayerNorm     | 128    | train
6 | predict_linear | Linear        | 234    | train
7 | projection     | Linear        | 65     | train
---------------------------------------------------------
689 K     Trainable params
0         Non-trainable params
689 K     Total params
2.758     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode



Training TimesNet model for slice 1...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Seed set to 1
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type          | Params | Mode 
---------------------------------------------------------
0 | loss           | MAE           | 0      | train
1 | padder_train   | ConstantPad1d | 0      | train
2 | scaler         | TemporalNorm  | 0      | train
3 | model          | ModuleList    | 688 K  | train
4 | enc_embedding  | DataEmbedding | 192    | train
5 | layer_norm     | LayerNorm     | 128    | train
6 | predict_linear | Linear        | 234    | train
7 | projection     | Linear        | 65     | train
---------------------------------------------------------
689 K     Trainable params
0         Non-trainable params
689 K     Total params
2.758     Total estimated model params size (MB)
42        Modules in train mode
0         Modules in eval mode



Training TimesNet model for slice 2...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_steps=500` reached.
  freq = pd.tseries.frequencies.to_offset(freq)
  freq = pd.tseries.frequencies.to_offset(freq)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Evaluation

In [45]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load ground truth
df = pd.read_csv("../dataset/combined_with_cluster_feature.csv")
df['ds'] = pd.to_datetime(df['time'])
df = df.rename(columns={'down': 'y', 'District': 'unique_id'})
df = df[['unique_id', 'ds', 'y']]

# Load predictions
pred_inf = pd.read_csv("../dataset/predictions_informer_slice_aware.csv")
pred_inf['ds'] = pd.to_datetime(pred_inf['ds'])

pred_tn = pd.read_csv("../dataset/predictions_timesnet_slice_aware.csv")
pred_tn['ds'] = pd.to_datetime(pred_tn['ds'])

# Merge predictions with ground truth
merged = df.merge(pred_inf[['unique_id', 'ds', 'Informer']], on=['unique_id', 'ds'], how='left')
merged = merged.merge(pred_tn[['unique_id', 'ds', 'TimesNet']], on=['unique_id', 'ds'], how='left')

print("=== Overall Evaluation ===")
for model in ['Informer', 'TimesNet']:
    valid = merged[['y', model]].dropna()
    if len(valid) > 0:
        mae = mean_absolute_error(valid['y'], valid[model])
        rmse = np.sqrt(mean_squared_error(valid['y'], valid[model]))
        print(f"{model}: MAE={mae:.2f}, RMSE={rmse:.2f}")
    else:
        print(f"{model}: No predictions available.")

=== Overall Evaluation ===
Informer: MAE=78035131.35, RMSE=99209541.34
TimesNet: MAE=191806176.00, RMSE=261388187.27
