In [None]:
#!git clone https://github.com/zhouhaoyi/Informer2020.git
#!git clone https://github.com/zhouhaoyi/ETDataset.git
# modify the code based on my dataset

In [None]:
# !{sys.executable} -m pip install matplotlib
# !{sys.executable} -m pip install pandas
# !{sys.executable} -m pip install scikit_learn
# !{sys.executable} -m pip install torch
# !{sys.executable} -m pip install seaborn

In [1]:
import sys
import os
import pandas as pd
import numpy as np
from argparse import Namespace

from data.data_loader import Dataset_Custom
from utils.metrics import metric

In [3]:
import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())

2.8.0+cu128
12.8
True


In [4]:
num_gpus = torch.cuda.device_count()
print("Number of GPUs available:", num_gpus)

for i in range(num_gpus):
    gpu_name = torch.cuda.get_device_name(i)
    print(f"GPU {i}: {gpu_name}")

Number of GPUs available: 1
GPU 0: NVIDIA A30


## Prepare the data

In [5]:
my_data = pd.read_csv('../GD030A_S.csv')

In [6]:
# Define the recover_timestamp function
def recover_timestamp(data):
    # Combine 'date' and 'time' to form a datetime column
    data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'].astype(str) + ':00', format='%Y-%m-%d %H:%M')

    # Set 'datetime' as index
    data = data.set_index('datetime')

    # Create a complete range of timestamps with hourly frequency
    full_time_range = pd.date_range(start=data.index.min(), end=data.index.max(), freq='H')

    # Reindex the data to include all timestamps, filling missing rows with NaN
    data_full = data.reindex(full_time_range)

    return data_full

Custom data (xxx.csv) has to include at least 2 features: date(format: YYYY-MM-DD hh:mm:ss) and target feature.

In [None]:
traffic_full = recover_timestamp(my_data)
traffic_full.drop(['date', 'time'], axis=1, inplace=True)
traffic_full = traffic_full.reset_index(names='date')
traffic_full['date'] = traffic_full['date'].astype(str)
traffic_full

In [None]:
#traffic_full.to_csv('traffic_full.csv', index=False)

## Run training directly using bash

### <span style="color:red">Next step: to record training time and inference time into log file</span>
#### <span style="color:red">write loop for the shell commands and inverse_results</span>

In [7]:
# ensure the shell commands below using the current Python environment
os.environ["PATH"] = f"{sys.prefix}/bin:" + os.environ["PATH"]

In [8]:
cd Informer2020

/home/yl2672496l/Yue/code/transformer/Informer2020


In [36]:
!python -u main_informer.py --model informer --data traffic_full --root_path ../ --features S --seq_len 24 --label_len 24 --pred_len 6 --e_layers 2 --d_layers 1 --attn prob --des 'Exp' --itr 10 #--inverse

Args in experiment:
Namespace(model='informer', data='traffic_full', root_path='../', data_path='traffic_full.csv', features='S', target='flow', freq='h', checkpoints='./checkpoints/', seq_len=24, label_len=24, pred_len=6, enc_in=1, dec_in=1, c_out=1, d_model=512, n_heads=8, e_layers=2, d_layers=1, s_layers=[3, 2, 1], d_ff=2048, factor=5, padding=0, distil=True, dropout=0.05, attn='prob', embed='timeF', activation='gelu', output_attention=False, do_predict=False, mix=True, cols=None, num_workers=0, itr=10, train_epochs=6, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', loss='mse', lradj='type1', use_amp=False, inverse=False, use_gpu=True, gpu=0, use_multi_gpu=False, devices='0,1,2,3', detail_freq='h')
Use GPU: cuda:0
>>>>>>>start training : informer_traffic_full_ftS_sl24_ll24_pl6_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_Exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>
train 24515
[DEBUG] Original dataset length: 24515
[DEBUG] Cleaned dataset length (NaN removed): 23049
val 

## Get the prediction results and metrics

In [33]:
def inverse_results(setting, args):
    """
    Convert saved scaled predictions (pred.npy, true.npy) back to original scale.
    """
    # Load stored pred & true
    result_path = f'./results/{setting}/'
    preds = np.load(result_path + 'pred.npy')
    trues = np.load(result_path + 'true.npy')

    # Rebuild the dataset to recover the SCALER
    # We rebuild Dataset_Custom ONLY to access its scaler;
    dataset = Dataset_Custom(
        root_path=args.root_path,
        data_path=args.data_path,
        flag='train',                    # scaler is fit on TRAIN SPLIT
        size=[args.seq_len, args.label_len, args.pred_len],
        features=args.features,
        target=args.target,
        scale=True,
        inverse=False,
        timeenc=0,
        freq=args.freq,
        cols=args.cols
    )

    scaler = dataset.scaler  # this is the original scaler used during training

    # Apply inverse scaling: reshape → inverse → reshape back
    def _inverse(x):
        orig_shape = x.shape
        x2 = x.reshape(-1, orig_shape[-1])    # 2D for StandardScaler
        x2 = scaler.inverse_transform(x2)
        return x2.reshape(orig_shape)

    preds_inv = _inverse(preds)
    trues_inv = _inverse(trues)

    # Calculate metrics on ORIGINAL scale
    mae, mse, rmse, mape, mspe = metric(preds_inv, trues_inv)

    # Save inverse results
    np.save(result_path + 'pred_inverse.npy', preds_inv)
    np.save(result_path + 'true_inverse.npy', trues_inv)
    np.save(result_path + 'metrics_inverse.npy', np.array([mae, mse, rmse, mape, mspe]))

    print("✓ Inverse transformation complete")
    print("=== Inverse Scale Metrics ===")
    print(f"MSE:  {mse}")
    print(f"MAE:  {mae}")
    print(f"RMSE: {rmse}")
    print(f"MAPE: {mape}")
    print(f"MSPE: {mspe}")

    return preds_inv, trues_inv, (mae, mse, rmse, mape, mspe)

In [34]:
# Recreate args from the experiment
args = Namespace(
    root_path="../",
    data_path="traffic_full.csv",
    seq_len=24,
    label_len=24,
    pred_len=6,
    features="S",
    target="flow",
    freq='h',
    cols=None
)

setting = "informer_traffic_full_ftS_sl24_ll24_pl6_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_Exp_0"

preds_inv, trues_inv, metrics_inv = inverse_results(setting, args)

✓ Inverse transformation complete
=== Inverse Scale Metrics ===
MSE:  1669.9002685546875
MAE:  27.648347854614258
RMSE: 40.86441421508789
MAPE: 0.3588350713253021
MSPE: 0.6333627700805664
