# Investigation on notebook by Huggingface on effectiveness transformers

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import sys
sys.path.append("..")



## First part
The blog says that the Autoformer paper claims some results in the MASE metric, but these are not present in the paper. This is code to calculate the MASE results ourselves to verify these claims. Do note that the blog only mentions results from the univariate autoformer and say that multivariate performs worse. We only have multivariate autoformers.

In [2]:
from models import Autoformer
from utils.tools import dotdict
import torch

def obtain_autoformer(pred_len, dataset):
    assert dataset in ["ECL", "Exchange"]
    assert pred_len in [96, 192, 336, 720]

    args = dotdict()
    args.pred_len = pred_len

    if dataset == "ECL":
        args.enc_in = 321
        args.dec_in = 321
        args.c_out = 321
    elif dataset == "Exchange":
        args.enc_in = 8
        args.dec_in = 8
        args.c_out = 8

    args.target = 'OT'
    args.des = 'train'
    args.dropout = 0.05
    args.num_workers = 10
    args.gpu = 0
    args.lradj = 'type1'
    args.devices = '0'
    args.use_gpu = False
    args.use_multi_gpu = False
    args.freq = 'h'
    args.checkpoints = './checkpoints/'
    args.bucket_size = 4
    args.n_hashes = 4
    args.is_trainging = True
    args.data = 'custom'
    args.features = 'M'
    args.seq_len = 96
    args.label_len = 48
    args.e_layers = 2
    args.d_layers = 1
    args.n_heads = 8
    args.factor = 1
    args.d_model = 512
    args.des = 'Exp'
    args.itr = 1
    args.d_ff = 2048
    args.moving_avg = 25
    args.factor = 3
    args.distil = True
    args.output_attention = False
    args.embed = 'timeF'

    autoformer_path = f"/Users/angelavansprang/Documents/PhD/transformers for time series/Autoformer/checkpoints/{dataset}_96_{pred_len}_Autoformer_custom_ftM_sl96_ll48_pl{pred_len}_dm512_nh8_el2_dl1_df2048_fc3_ebtimeF_dtTrue_Exp_0/checkpoint.pth"

    autoformer = Autoformer.Model(args).float()
    autoformer.load_state_dict(torch.load(autoformer_path, map_location=torch.device('cpu')))

    autoformer.eval()

    return autoformer


In [3]:
from data_provider.data_loader import Dataset_Custom
from torch.utils.data import DataLoader

def obtain_test_loader_electricity(pred_len, seq_len = 96): 
    label_len = 48

    data_set_electricity = Dataset_Custom(
        root_path = "/Users/angelavansprang/Documents/PhD/transformers for time series/Autoformer/dataset/electricity/",
        data_path = "electricity.csv",
        flag="train",
        size=[seq_len, label_len, pred_len], # seq_len, label_len, pred_len
        features="M",
        target="OT", #default
        timeenc=1,
        freq="h"
    )

    data_loader_electricity = DataLoader(
        data_set_electricity,
        batch_size=1,
        shuffle=False,
        num_workers=0,
        drop_last=False
    )
    return data_loader_electricity

In [18]:
from data_provider.data_loader import Dataset_Custom
from torch.utils.data import DataLoader

def obtain_test_loader_exchange(pred_len, seq_len = 96): 
    label_len = 48

    data_set_exchange = Dataset_Custom(
        root_path = "/Users/angelavansprang/Documents/PhD/transformers for time series/Autoformer/dataset/exchange_rate/",
        data_path = "exchange_rate.csv",
        flag="test",
        size=[seq_len, label_len, pred_len], # seq_len, label_len, pred_len
        features="M",
        target="OT", #default
        timeenc=1,
        freq="h"
    )

    data_loader_exchange = DataLoader(
        data_set_exchange,
        batch_size=10,
        shuffle=False,
        num_workers=0,
        drop_last=False
    )
    return data_loader_exchange

In [19]:
seq_len = 96
label_len = 48
pred_len = 720

dataloader_elec = obtain_test_loader_electricity(pred_len=pred_len, seq_len=seq_len)
autoformer_elec = obtain_autoformer(pred_len=pred_len, dataset="ECL")

In [20]:
from utils.metrics import metric

def test(model, test_loader, label_len=48):

        preds = []
        trues = []
        # folder_path = './test_results/' + setting + '/'
        # if not os.path.exists(folder_path):
        #     os.makedirs(folder_path)

        print("max_i == ", len(test_loader))

        model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
                # print("data shape: ", batch_x.shape, batch_y.shape, batch_x_mark.shape, batch_y_mark.shape)
                batch_x = batch_x.float()
                batch_y = batch_y.float()

                batch_x_mark = batch_x_mark.float()
                batch_y_mark = batch_y_mark.float()

                outputs = model(batch_x, batch_x_mark, batch_y, batch_y_mark) # assumption model is autoformer

                pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
                true = batch_y[:, :-label_len, :]  # remove init for forecasting batch_y.detach().cpu().numpy()  # .squeeze()

                preds.append(pred)
                trues.append(true)

                if i % 10 == 0:
                    print(f"{i}/{len(test_loader)}")

                if i == 100: #== 100
                    break

        preds = np.concatenate(preds, axis=0)
        trues = np.concatenate(trues, axis=0)
        # print('test shape:', preds.shape, trues.shape)
        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
        print('test shape:', preds.shape, trues.shape)

        # result save
        # folder_path = './results/' + setting + '/'
        # if not os.path.exists(folder_path):
        #     os.makedirs(folder_path)

        mae, mse, rmse, mape, mspe, mase = metric(preds, trues)

        print('mse:{}, mae:{}, mase:{}'.format(mse, mae, mase))
        # f = open("result.txt", 'a')
        # f.write(setting + "  \n")
        # f.write('mse:{}, mae:{}'.format(mse, mae))
        # f.write('\n')
        # f.write('\n')
        # f.close()

        # np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe, mase]))
        # np.save(folder_path + 'pred.npy', preds)
        # np.save(folder_path + 'true.npy', trues)

        return

In [21]:
test(autoformer_elec, dataloader_elec)

max_i ==  17597
0/17597
10/17597
20/17597
30/17597
40/17597
50/17597
60/17597
70/17597
80/17597
90/17597
100/17597
test shape: (101, 720, 321) (101, 720, 321)
mse:0.4629252552986145, mae:0.3926493525505066, mase:1.2603224515914917


In [9]:
# sanity check for MASE function

# import numpy as np

# def MAE(pred, true):
#     return np.mean(np.abs(pred - true))

# def MASE(pred, true):
#     y_naive = np.roll(true, 1, axis=1)  # Naive forecast (shifted by one time step)

#     return MAE(pred, true)/ MAE(y_naive[0,1:,:], true[0,1:,:])

# # Example usage:
# # Assuming you have actual values (y_true), predicted values (y_pred)
# y_true = np.array([[[1,8],[2,9],[3,10],[4,11],[5,12],[6,13],[7,14]]])
# y_pred = np.array([[[0,0],[5,5],[8,8],[5,5],[3,3],[8,8],[4,4]]])

# print(y_true.shape)
# print(y_pred.shape)

# mase_value = MASE(y_pred, y_true)
# print(f"MASE: {mase_value}")


(1, 7, 2)
(1, 7, 2)
MASE: 4.357142857142857


## Second part
In this part we compare the number of parameters for the linear model and the autoformer.

In [4]:
from models import Autoformer
from utils.tools import dotdict
import torch

def obtain_autoformer(pred_len, dataset):
    assert dataset in ["ECL", "Exchange"]
    assert pred_len in [96, 192, 336, 720]

    args = dotdict()
    args.pred_len = pred_len

    if dataset == "ECL":
        args.enc_in = 321
        args.dec_in = 321
        args.c_out = 321
    elif dataset == "Exchange":
        args.enc_in = 8
        args.dec_in = 8
        args.c_out = 8

    args.target = 'OT'
    args.des = 'train'
    args.dropout = 0.05
    args.num_workers = 10
    args.gpu = 0
    args.lradj = 'type1'
    args.devices = '0'
    args.use_gpu = False
    args.use_multi_gpu = False
    args.freq = 'h'
    args.checkpoints = './checkpoints/'
    args.bucket_size = 4
    args.n_hashes = 4
    args.is_trainging = True
    args.data = 'custom'
    args.features = 'M'
    args.seq_len = 96
    args.label_len = 48
    args.e_layers = 2
    args.d_layers = 1
    args.n_heads = 8
    args.factor = 1
    args.d_model = 512
    args.des = 'Exp'
    args.itr = 1
    args.d_ff = 2048
    args.moving_avg = 25
    args.factor = 3
    args.distil = True
    args.output_attention = False
    args.embed = 'timeF'

    autoformer_path = f"/Users/angelavansprang/Documents/PhD/transformers for time series/Autoformer/checkpoints/{dataset}_96_{pred_len}_Autoformer_custom_ftM_sl96_ll48_pl{pred_len}_dm512_nh8_el2_dl1_df2048_fc3_ebtimeF_dtTrue_Exp_0/checkpoint.pth"

    autoformer = Autoformer.Model(args).float()
    autoformer.load_state_dict(torch.load(autoformer_path, map_location=torch.device('cpu')))

    autoformer.eval()

    return autoformer


In [3]:
from models import Linear
from utils.tools import dotdict
import torch

def obtain_linear(pred_len, dataset):
    assert dataset in ["ECL", "Exchange"]
    assert pred_len in [96, 192, 336, 720]

    args = dotdict()
    args.pred_len = pred_len

    args.target = 'OT'
    args.des = 'train'
    args.dropout = 0.05
    args.num_workers = 10
    args.gpu = 0
    args.lradj = 'type1'
    args.devices = '0'
    args.use_gpu = False
    args.use_multi_gpu = False
    args.freq = 'h'
    args.checkpoints = './checkpoints/'
    args.bucket_size = 4
    args.n_hashes = 8
    args.is_trainging = True
    args.data = 'custom'
    args.features = 'M'
    args.seq_len = 336
    args.label_len = 48
    args.factor = 1


    args.d_model = 512
    args.des = 'Exp'
    args.itr = 1
    args.d_ff = 2048
    args.moving_avg = 25
    args.factor = 1
    args.distil = True
    args.output_attention = False
    args.embed = 'timeF'

    linear_path = f"/Users/angelavansprang/Documents/PhD/transformers for time series/Autoformer/checkpoints/{dataset}_{args.seq_len}_{pred_len}_Linear_custom_ftM_sl{args.seq_len}_ll48_pl{pred_len}_dm512_nh8_el2_dl1_df2048_fc1_ebtimeF_dtTrue_Exp_0/checkpoint.pth"

    linear = Linear.Model(args).float()
    linear.load_state_dict(torch.load(linear_path, map_location=torch.device('cpu')))

    linear.eval()

    return linear

In [6]:
def count_parameters(model): 
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [11]:
autoformer_96_electricity = obtain_autoformer(pred_len=720, dataset="Exchange")
linear_96_electricity = obtain_linear(pred_len=720, dataset="Exchange")

In [12]:
print(f"autoformer: {count_parameters(autoformer_96_electricity)}")
print(f"linear: {count_parameters(linear_96_electricity)}")

autoformer: 10541064
linear: 242640


# Third part

In [3]:
from transformers import AutoformerConfig, AutoformerForPrediction

config = AutoformerConfig.from_pretrained("kashif/autoformer-traffic-hourly")
model = AutoformerForPrediction.from_pretrained("kashif/autoformer-traffic-hourly")

Downloading config.json:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/116k [00:00<?, ?B/s]

In [4]:
model

AutoformerForPrediction(
  (model): AutoformerModel(
    (scaler): AutoformerMeanScaler()
    (encoder): AutoformerEncoder(
      (value_embedding): AutoformerValueEmbedding(
        (value_projection): Linear(in_features=47, out_features=16, bias=False)
      )
      (embed_positions): AutoformerSinusoidalPositionalEmbedding(72, 16)
      (layers): ModuleList(
        (0): AutoformerEncoderLayer(
          (self_attn): AutoformerAttention(
            (k_proj): Linear(in_features=16, out_features=16, bias=True)
            (v_proj): Linear(in_features=16, out_features=16, bias=True)
            (q_proj): Linear(in_features=16, out_features=16, bias=True)
            (out_proj): Linear(in_features=16, out_features=16, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=16, out_features=32, bias=True)
          (fc2): Linear(in_features=32, out_featur