In [1]:
################################################################


################################################################
# https://github.com/LiuAoyu1998/STIDGCN

# .log文件有模型结构
################################################################
# conda create -n STGNN_STIDGCN
# conda activate STGNN_STIDGCN

# conda install python=3.8

# https://pytorch.org/get-started/previous-versions/
# conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.7 -c pytorch -c nvidia


################################################################
# pip install pandas
# pip install scipy


################################################################
# conda install ipykernel
# conda install platformdirs
# pip3 install ipywidgets
# pip3 install --upgrade jupyter_core jupyter_client

# python -m ipykernel install --user --name STGNN_STIDGCN


################################################################
# train.py    cuda:0


################################################################

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import argparse
import math
import numpy as np
import os
import pandas as pd  # 数据分析
import random
import scipy.sparse as sp
import time

import util
from util import *
from ranger21 import Ranger

In [3]:
parser = argparse.ArgumentParser()

parser.add_argument("--device", type=str, default="cuda:0", help="")
parser.add_argument("--dataset", type=str, default="PEMS08", help="data path")
parser.add_argument("--input_dim", type=int, default=3, help="number of input_dim")
parser.add_argument("--batch_size", type=int, default=64, help="batch size")
parser.add_argument("--learning_rate", type=float, default=0.001, help="learning rate")
parser.add_argument("--dropout", type=float, default=0.1, help="dropout rate")
parser.add_argument("--weight_decay", type=float, default=0.0001, help="weight decay rate")
parser.add_argument("--epochs", type=int, default=500, help="")
parser.add_argument("--print_every", type=int, default=50, help="")
parser.add_argument(
    "--save", 
    type=str, 
    default="./logs/" + str(time.strftime("%Y-%m-%d-%H:%M:%S")) + "-", 
    help="save path", 
)
parser.add_argument("--expid", type=int, default=1, help="experiment id")
parser.add_argument(
    "--es_patience", 
    type=int, 
    default=100, 
    help="quit if no improvement after this many iterations", 
)

args = parser.parse_args(args=[])

In [4]:
class GLU(nn.Module) :  # 常规GLU


    def __init__(self, features, dropout=0.1) :

        super(GLU, self).__init__()
        self.conv1 = nn.Conv2d(features, features, (1, 1))
        self.conv2 = nn.Conv2d(features, features, (1, 1))
        self.conv3 = nn.Conv2d(features, features, (1, 1))
        self.dropout = nn.Dropout(dropout)


    def forward(self, x) :

        x1 = self.conv1(x)
        x2 = self.conv2(x)
        out = x1 * torch.sigmoid(x2)
        out = self.dropout(out)
        out = self.conv3(out)
        return out

In [5]:
class TemporalEmbedding(nn.Module) :


    def __init__(self, time, features) :

        super(TemporalEmbedding, self).__init__()

        self.time = time  # 时刻step
        self.time_day = nn.Parameter(torch.empty(time, features))  # array[][]
        nn.init.xavier_uniform_(self.time_day)  # 均匀分布

        self.time_week = nn.Parameter(torch.empty(7, features))
        nn.init.xavier_uniform_(self.time_week)


    def forward(self, x) :  # 原理？

        day_emb  = x[..., 1]  # 这是什么操作？
        time_day = self.time_day[(day_emb[:, :, :]*self.time).type(torch.LongTensor)]
        time_day = time_day.transpose(1, 2).contiguous()

        week_emb  = x[..., 2]
        time_week = self.time_week[(week_emb[:, :, :]).type(torch.LongTensor)]
        time_week = time_week.transpose(1, 2).contiguous()

        tem_emb = time_day + time_week

        tem_emb = tem_emb.permute(0, 3, 1, 2)

        return tem_emb

In [6]:
class Diffusion_GCN(nn.Module) :


    def __init__(self, channels=128, diffusion_step=1, dropout=0.1) :

        super().__init__()
        self.diffusion_step = diffusion_step  # k
        self.conv    = nn.Conv2d(diffusion_step*channels, channels, (1, 1))
        self.dropout = nn.Dropout(dropout)


    def forward(self, x, adj) :

        out = []

        for i in range(0, self.diffusion_step) :

            if adj.dim() == 3 :
                x = torch.einsum("bcnt, bnm->bcmt", x, adj).contiguous()
                out.append(x)
            elif adj.dim() == 2 :
                x = torch.einsum("bcnt, nm->bcmt", x, adj).contiguous()
                out.append(x)

        x = torch.cat(out, dim=1)  # 
        x = self.conv(x)
        output = self.dropout(x)

        return output

In [7]:
class Graph_Generator(nn.Module) :


    def __init__(self, channels=128, num_nodes=170, diffusion_step=1, dropout=0.1) :

        super().__init__()

        self.memory = nn.Parameter(torch.randn(channels, num_nodes))
        nn.init.xavier_uniform_(self.memory)
        self.fc = nn.Linear(2, 1)


    def forward(self, x) :

        adj_dyn_1 = torch.softmax(  # A1
            F.relu(
                torch.einsum("bcnt, cm->bnm", x, self.memory).contiguous()
                / math.sqrt(x.shape[1])
            ), 
            -1, 
        )
        adj_dyn_2 = torch.softmax(  # A2
            F.relu(
                torch.einsum("bcn, bcm->bnm", x.sum(-1), x.sum(-1)).contiguous()
                / math.sqrt(x.shape[1])
            ), 
            -1, 
        )

        # adj_dyn = (adj_dyn_1 + adj_dyn_2 + adj)/2
        adj_f = torch.cat([(adj_dyn_1).unsqueeze(-1)] + [(adj_dyn_2).unsqueeze(-1)], dim=-1)  # 融合A
        adj_f = torch.softmax(self.fc(adj_f).squeeze(), -1)

        topk_values, topk_indices = torch.topk(adj_f, k=int(adj_f.shape[1]*0.8), dim=-1)  # 构图
        mask = torch.zeros_like(adj_f)
        mask.scatter_(-1, topk_indices, 1)
        adj_f = adj_f * mask

        return adj_f

In [8]:
class DGCN(nn.Module) :


    def __init__(self, channels=128, num_nodes=170, diffusion_step=1, dropout=0.1, emb=None) :

        super().__init__()
        self.conv = nn.Conv2d(channels, channels, (1, 1))
        self.generator = Graph_Generator(channels, num_nodes, diffusion_step, dropout)
        self.gcn = Diffusion_GCN(channels, diffusion_step, dropout)
        self.emb = emb


    def forward(self, x) :

        skip = x
        x = self.conv(x)
        adj_dyn = self.generator(x)
        x = self.gcn(x, adj_dyn) 
        x = x*self.emb + skip

        return x

In [9]:
class Splitting(nn.Module) :

    def __init__(self) :
        super(Splitting, self).__init__()

    def even(self, x) :
        return x[:, :, :, ::2]  # 偶数位

    def odd(self, x) :
        return x[:, :, :, 1::2]  # 奇数位

    def forward(self, x) :
        return (self.even(x), self.odd(x))

In [10]:
class IDGCN(nn.Module) :

    def __init__(
        self, 
        device, 
        channels=64, 
        diffusion_step=1, 
        splitting=True, 
        num_nodes=170, 
        dropout=0.2, 
        emb = None
    ) :
        super(IDGCN, self).__init__()

        device = device
        self.dropout = dropout
        self.num_nodes = num_nodes
        self.splitting = splitting
        self.split = Splitting()

        Conv1 = []
        Conv2 = []
        Conv3 = []
        Conv4 = []
        pad_l = 3
        pad_r = 3

        k1 = 5
        k2 = 3
        Conv1 += [
            nn.ReplicationPad2d((pad_l, pad_r, 0, 0)), 
            nn.Conv2d(channels, channels, kernel_size=(1, k1)), 
            nn.LeakyReLU(negative_slope=0.01, inplace=True), 
            nn.Dropout(self.dropout), 
            nn.Conv2d(channels, channels, kernel_size=(1, k2)), 
            nn.Tanh(), 
        ]
        Conv2 += [
            nn.ReplicationPad2d((pad_l, pad_r, 0, 0)), 
            nn.Conv2d(channels, channels, kernel_size=(1, k1)), 
            nn.LeakyReLU(negative_slope=0.01, inplace=True), 
            nn.Dropout(self.dropout), 
            nn.Conv2d(channels, channels, kernel_size=(1, k2)), 
            nn.Tanh(), 
        ]
        Conv4 += [
            nn.ReplicationPad2d((pad_l, pad_r, 0, 0)), 
            nn.Conv2d(channels, channels, kernel_size=(1, k1)), 
            nn.LeakyReLU(negative_slope=0.01, inplace=True), 
            nn.Dropout(self.dropout), 
            nn.Conv2d(channels, channels, kernel_size=(1, k2)), 
            nn.Tanh(), 
        ]
        Conv3 += [
            nn.ReplicationPad2d((pad_l, pad_r, 0, 0)), 
            nn.Conv2d(channels, channels, kernel_size=(1, k1)), 
            nn.LeakyReLU(negative_slope=0.01, inplace=True), 
            nn.Dropout(self.dropout), 
            nn.Conv2d(channels, channels, kernel_size=(1, k2)), 
            nn.Tanh(), 
        ]

        self.conv1 = nn.Sequential(*Conv1)
        self.conv2 = nn.Sequential(*Conv2)
        self.conv3 = nn.Sequential(*Conv3)
        self.conv4 = nn.Sequential(*Conv4)

        self.dgcn = DGCN(channels, num_nodes, diffusion_step, dropout, emb)

    def forward(self, x) :
    
            if self.splitting :
                (x_even, x_odd) = self.split(x)
            else :
                (x_even, x_odd) = x
    
            x1 = self.conv1(x_even)
            x1 = self.dgcn(x1)
            d = x_odd.mul(torch.tanh(x1))
    
            x2 = self.conv2(x_odd)
            x2 = self.dgcn(x2)
            c = x_even.mul(torch.tanh(x2))
    
            x3 = self.conv3(c)
            x3 = self.dgcn(x3)
            x_odd_update = d + x3
    
            x4 = self.conv4(d)
            x4 = self.dgcn(x4)
            x_even_update = c + x4
    
            return (x_even_update, x_odd_update)

In [11]:
class IDGCN_Tree(nn.Module) :

    def __init__(
        self, device, channels=64, diffusion_step=1, num_nodes=170, dropout=0.1
    ) :
        super().__init__()

        self.memory1 = nn.Parameter(torch.randn(channels, num_nodes, 6))
        self.memory2 = nn.Parameter(torch.randn(channels, num_nodes, 3))
        self.memory3 = nn.Parameter(torch.randn(channels, num_nodes, 3))

        self.IDGCN1 = IDGCN(
            device=device, 
            splitting=True, 
            channels=channels, 
            diffusion_step=diffusion_step, 
            num_nodes=num_nodes, 
            dropout=dropout, 
            emb=self.memory1
        )
        self.IDGCN2 = IDGCN(
            device=device, 
            splitting=True, 
            channels=channels, 
            diffusion_step=diffusion_step, 
            num_nodes=num_nodes, 
            dropout=dropout, 
            emb=self.memory2
        )
        self.IDGCN3 = IDGCN(
            device=device, 
            splitting=True, 
            channels=channels, 
            diffusion_step=diffusion_step, 
            num_nodes=num_nodes, 
            dropout=dropout, 
            emb=self.memory2
        )


    def concat(self, even, odd) :

        even = even.permute(3, 1, 2, 0)
        odd  = odd.permute(3, 1, 2, 0)
        len  = even.shape[0]
        _ = []

        for i in range(len) :
            _.append(even[i].unsqueeze(0))
            _.append( odd[i].unsqueeze(0))

        return torch.cat(_, 0).permute(3, 1, 2, 0)


    def forward(self, x) :

        x_even_update1, x_odd_update1 = self.IDGCN1(x)
        x_even_update2, x_odd_update2 = self.IDGCN2(x_even_update1)
        x_even_update3, x_odd_update3 = self.IDGCN3(x_odd_update1)

        concat1 = self.concat(x_even_update2, x_odd_update2)
        concat2 = self.concat(x_even_update3, x_odd_update3)
        concat0 = self.concat(concat1, concat2)
        output  = concat0 + x

        return output

In [12]:
class STIDGCN(nn.Module) :

    def __init__(
        self, device, input_dim, num_nodes, channels, granularity, dropout=0.1
    ) :

        super().__init__()

        self.device = device
        self.num_nodes = num_nodes
        self.output_len = 12
        diffusion_step = 1

        self.Temb = TemporalEmbedding(granularity, channels)

        self.start_conv = nn.Conv2d(
            in_channels=input_dim, out_channels=channels, kernel_size=(1, 1)
        )

        self.tree = IDGCN_Tree(
            device=device, 
            channels=channels*2, 
            diffusion_step=diffusion_step, 
            num_nodes=self.num_nodes, 
            dropout=dropout, 
        )

        self.glu = GLU(channels*2, dropout)

        self.regression_layer = nn.Conv2d(
            channels*2, self.output_len, kernel_size=(1, self.output_len)
        )


    def param_num(self) :
        return sum([param.nelement() for param in self.parameters()])


    def forward(self, input) :

        x = input

        # Encoder
        # Data Embedding
        time_emb = self.Temb(input.permute(0, 3, 2, 1))
        x = torch.cat([self.start_conv(x)] + [time_emb], dim=1)

        # IDGCN_Tree
        x = self.tree(x)

        # Decoder
        gcn = self.glu(x) + x
        prediction = self.regression_layer(F.relu(gcn))

        return prediction

In [13]:
# 
def MAE_torch(pred, true, mask_value=None) :

    if mask_value != None :
        mask = torch.gt(true, mask_value)  # ge/gt/le/lt/ne/eq分别是>=/>/<=/</==/!=
        pred = torch.masked_select(pred, mask)
        true = torch.masked_select(true, mask)

    return torch.mean(torch.abs(true - pred))


# 
def MAPE_torch(pred, true, mask_value=None) :

    if mask_value != None :
        mask = torch.gt(true, mask_value)
        pred = torch.masked_select(pred, mask)
        true = torch.masked_select(true, mask)

    return torch.mean(torch.abs(torch.div((true - pred), true)))


# 
def RMSE_torch(pred, true, mask_value=None) :

    if mask_value != None :
        mask = torch.gt(true, mask_value)
        pred = torch.masked_select(pred, mask)
        true = torch.masked_select(true, mask)

    return torch.sqrt(torch.mean((pred - true) ** 2))


# 
def WMAPE_torch(pred, true, mask_value=None) :

    if mask_value != None :
        mask = torch.gt(true, mask_value)
        pred = torch.masked_select(pred, mask)
        true = torch.masked_select(true, mask)
    loss = torch.sum(torch.abs(pred - true)) / torch.sum(torch.abs(true))

    return loss


# 
def metric(pred, real) :

    mae   = MAE_torch  (pred, real, 0.0).item()
    mape  = MAPE_torch (pred, real, 0.0).item()
    rmse  = RMSE_torch (pred, real, 0.0).item()
    wmape = WMAPE_torch(pred, real, 0.0).item()

    return mae, mape, rmse, wmape

In [14]:
class trainer :


    def __init__(
        self, 
        scaler, 
        input_dim, 
        num_nodes, 
        channels, 
        dropout, 
        lrate, 
        wdecay, 
        device, 
        granularity, 
    ) :
        self.model = STIDGCN(device, input_dim, num_nodes, channels, granularity, dropout)
        self.model.to(device)
        self.optimizer = Ranger(self.model.parameters(), lr=lrate, weight_decay=wdecay)
        # self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay)
        self.loss = util.MAE_torch
        self.scaler = scaler
        self.clip = 5
        print("The number of parameters: {}".format(self.model.param_num()))
        print(self.model)


    def train(self, input, real_val) :

        self.model.train()
        self.optimizer.zero_grad()
        output = self.model(input)
        output = output.transpose(1, 3)
        real = torch.unsqueeze(real_val, dim=1)
        predict = self.scaler.inverse_transform(output)
        loss = self.loss(predict, real, 0.0)
        loss.backward()

        if self.clip is not None :
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
        self.optimizer.step()

        mape  = util.MAPE_torch (predict, real, 0.0).item()
        rmse  = util.RMSE_torch (predict, real, 0.0).item()
        wmape = util.WMAPE_torch(predict, real, 0.0).item()

        return loss.item(), mape, rmse, wmape


    def eval(self, input, real_val) :

        self.model.eval()
        output = self.model(input)
        output = output.transpose(1, 3)
        real = torch.unsqueeze(real_val, dim=1)
        predict = self.scaler.inverse_transform(output)
        loss = self.loss(predict, real, 0.0)

        mape  = util.MAPE_torch (predict, real, 0.0).item()
        rmse  = util.RMSE_torch (predict, real, 0.0).item()
        wmape = util.WMAPE_torch(predict, real, 0.0).item()

        return loss.item(), mape, rmse, wmape



In [15]:
class DataLoader(object) :


    def __init__(self, xs, ys, batch_size, pad_with_last_sample=True) :

        self.batch_size  = batch_size
        self.current_ind = 0

        if pad_with_last_sample :

            num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
            # print(num_padding)  # 53 +10699=64*168
            x_padding   = np.repeat(xs[-1:], num_padding, axis=0)  # Repeat the last sample
            # print(x_padding)  # (53, 12, 170, 3)
            y_padding   = np.repeat(ys[-1:], num_padding, axis=0)  # y也复制这么多？
            # print(y_padding)  # (53, 12, 170, 1)
            xs = np.concatenate([xs, x_padding], axis=0)
            ys = np.concatenate([ys, y_padding], axis=0)

        self.xs = xs
        self.ys = ys
        self.size = len(xs)  # 10752
        self.num_batch = int(self.size // self.batch_size)  # 168


    def shuffle(self) :

        permutation = np.random.permutation(self.size)  # 类似句柄？
        xs, ys  = self.xs[permutation], self.ys[permutation]
        self.xs = xs  # 原始数据就被打乱，而不是以乱取
        self.ys = ys


    def get_iterator(self) :

        self.current_ind = 0  # 批号

        def _wrapper() :

            while self.current_ind < self.num_batch :

                start_ind = self.batch_size*self.current_ind  # ？
                end_ind = min(self.size, self.batch_size*(self.current_ind+1))
                x_i = self.xs[start_ind:end_ind, ...]
                y_i = self.ys[start_ind:end_ind, ...]
                yield (x_i, y_i)
                self.current_ind += 1

        return _wrapper()

In [16]:
class StandardScaler :

    def __init__(self, mean, std) :
        self.mean = mean  # 均值
        self.std = std    # 标准差

    def transform(self, data) :
        return (data - self.mean) / self.std

    def inverse_transform(self, data) :
        return (data * self.std) + self.mean

In [17]:
def seed_it(seed) :

    random.seed(seed)
    os.environ["PYTHONSEED"] = str(seed)
    np.random.seed(seed)

    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = True
    torch.manual_seed(seed)

In [18]:
seed_it(6666)

# [N, D, T] = 170*96*288
dataset  = "PEMS08"
data_dir = "data//" + dataset
num_nodes = 170
channels = 96
granularity = 288

device = torch.device(args.device)

data = {}

for category in ["train", "val", "test"] :
    cat_data = np.load(os.path.join(data_dir, category+".npz"))  # .npz是NumPy库用来存储多个NumPy数组

    # 6=3*2
    data["x_" + category] = cat_data["x"]
    # print(data["x_" + category].shape)  # (10699/3567/3567, 12, 170, 3)
    data["y_" + category] = cat_data["y"]
    # print(data["y_" + category].shape)  # (10699/3567/3567, 12, 170, 1)

# class
scaler = StandardScaler(
        # https://blog.csdn.net/g944468183/article/details/124473886
        mean = data["x_train"][..., 0].mean(),  # 均以训练集为基准
        std  = data["x_train"][..., 0].std()
    )
# print(data["x_train"][..., 0].shape)  # (10699, 12, 170)
# print(data["x_train"][..., 0].mean())  # 229.85893440655073
# print(data["x_train"][..., 0].std())   # 145.62268077938813
# Data format
for category in ["train", "val", "test"] :
    data["x_" + category][..., 0] = scaler.transform(data["x_" + category][..., 0])  # 对三个集合的x部分进行缩放

random_train = torch.arange(int(data["x_train"].shape[0]))  # 生成一个从0到data["x_train"].shape[0]的序列
# print(int(data["x_train"].shape[0]))  # 10699
random_train = torch.randperm(random_train.size(0))  # .shape[0]与.size(0)等价，第零维的大小
# print(random_train)
data["x_train"] = data["x_train"][random_train, ...]  # 四维的第一维重排
data["y_train"] = data["y_train"][random_train, ...]

random_val = torch.arange(int(data["x_val"].shape[0]))
random_val = torch.randperm(random_val.size(0))
data["x_val"] = data["x_val"][random_val, ...]
data["y_val"] = data["y_val"][random_val, ...]

# random_test = torch.arange(int(data['x_test'].shape[0]))
# random_test = torch.randperm(random_test.size(0))
# data['x_test'] = data['x_test'][random_test, ...]
# data['y_test'] = data['y_test'][random_test, ...]

In [19]:
# data[]是自己做出来的
# print(data)

train_batch_size = args.batch_size
valid_batch_size = args.batch_size
test_batch_size  = args.batch_size
# class
data["train_loader"] = DataLoader(data["x_train"], data["y_train"], train_batch_size)
data["val_loader"]   = DataLoader(data["x_val"]  , data["y_val"]  , valid_batch_size)
data["test_loader"]  = DataLoader(data["x_test"] , data["y_test"] ,  test_batch_size)
data["scaler"]       = scaler

# print(data)  # 浪费内存？

In [20]:
loss     = 9999999
test_log = 999999
epochs_since_best_mae = 0
path = args.save + args.dataset + "/"

his_loss    = []
val_time    = []
train_time  = []
result      = []
test_result = []

# print(args)

if not os.path.exists(path) :
    os.makedirs(path)

engine = trainer(
    scaler, 
    args.input_dim, 
    num_nodes, 
    channels, 
    args.dropout, 
    args.learning_rate, 
    args.weight_decay, 
    device, 
    granularity
)

print("start training...", flush=True)

RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [None]:




# for i in range(1, args.epochs + 1) :
#     train_loss  = []
#     train_mape  = []
#     train_rmse  = []
#     train_wmape = []

#     t1 = time.time()

#     # dataloader['train_loader'].shuffle()
#     for iter, (x, y) in enumerate(dataloader["train_loader"].get_iterator()) :  # 返回批数据

#         trainx = torch.Tensor(x).to(device)
#         trainx = trainx.transpose(1, 3)
#         trainy = torch.Tensor(y).to(device)
#         trainy = trainy.transpose(1, 3)

#         metrics = engine.train(trainx, trainy[:, 0, :, :])
#         train_loss.append(metrics[0])
#         train_mape.append(metrics[1])
#         train_rmse.append(metrics[2])
#         train_wmape.append(metrics[3])

#         if iter % args.print_every == 0 :
#             log = "Iter: {:03d}, \
#                    Train Loss: {:.4f}, \
#                    Train RMSE: {:.4f}, \
#                    Train MAPE: {:.4f}, \
#                    Train WMAPE: {:.4f}"
#             print(
#                 log.format(
#                     iter, 
#                     train_loss[-1], 
#                     train_rmse[-1], 
#                     train_mape[-1], 
#                     train_wmape[-1], 
#                 ), 
#                 flush=True, 
#             )

#     t2 = time.time()
#     log = "Epoch: {:03d}, Training Time: {:.4f} secs"
#     print(log.format(i, (t2 - t1)))
#     train_time.append(t2 - t1)

#     valid_loss  = []
#     valid_mape  = []
#     valid_wmape = []
#     valid_rmse  = []

#     # 
#     s1 = time.time()

#     for iter, (x, y) in enumerate(dataloader["val_loader"].get_iterator()) :

#         testx = torch.Tensor(x).to(device)
#         testx = testx.transpose(1, 3)
#         testy = torch.Tensor(y).to(device)
#         testy = testy.transpose(1, 3)

#         metrics = engine.eval(testx, testy[:, 0, :, :])
#         valid_loss .append(metrics[0])
#         valid_mape .append(metrics[1])
#         valid_rmse .append(metrics[2])
#         valid_wmape.append(metrics[3])

#     s2 = time.time()

#     log = "Epoch: {:03d}, Inference Time: {:.4f} secs"
#     print(log.format(i, (s2 - s1)))
#     val_time.append(s2 - s1)

#     mtrain_loss  = np.mean(train_loss)
#     mtrain_mape  = np.mean(train_mape)
#     mtrain_wmape = np.mean(train_wmape)
#     mtrain_rmse  = np.mean(train_rmse)

#     mvalid_loss  = np.mean(valid_loss)
#     mvalid_mape  = np.mean(valid_mape)
#     mvalid_wmape = np.mean(valid_wmape)
#     mvalid_rmse  = np.mean(valid_rmse)

#     his_loss.append(mvalid_loss)
#     train_m = dict(
#         train_loss =np.mean(train_loss), 
#         train_rmse =np.mean(train_rmse), 
#         train_mape =np.mean(train_mape), 
#         train_wmape=np.mean(train_wmape), 

#         valid_loss =np.mean(valid_loss), 
#         valid_rmse =np.mean(valid_rmse), 
#         valid_mape =np.mean(valid_mape), 
#         valid_wmape=np.mean(valid_wmape), 
#     )
#     train_m = pd.Series(train_m)
#     result.append(train_m)

#     log = "Epoch: {:03d}, \
#            Train Loss: {:.4f}, \
#            Train RMSE: {:.4f}, \
#            Train MAPE: {:.4f}, \
#            Train WMAPE: {:.4f}, "
#     print(
#         log.format(i, mtrain_loss, mtrain_rmse, mtrain_mape, mtrain_wmape), 
#         flush=True, 
#     )
#     log = "Epoch: {:03d}, \
#            Valid Loss: {:.4f}, \
#            Valid RMSE: {:.4f}, \
#            Valid MAPE: {:.4f}, \
#            Valid WMAPE: {:.4f}"
#     print(
#         log.format(i, mvalid_loss, mvalid_rmse, mvalid_mape, mvalid_wmape), 
#         flush=True, 
#     )

#     if mvalid_loss < loss :
#         print("### Update tasks appear ###")

#         if i < 100 :
#             # It is not necessary to print the results of the test set 
#             # when epoch is less than 100, because the model has not yet converged.
#             loss = mvalid_loss
#             torch.save(engine.model.state_dict(), path + "best_model.pth")
#             bestid = i
#             epochs_since_best_mae = 0
#             print("Updating! Valid Loss:", mvalid_loss, end=", ")
#             print("epoch: ", i)

#         elif i > 100 :
#             outputs = []
#             realy = torch.Tensor(dataloader["y_test"]).to(device)
#             realy = realy.transpose(1, 3)[:, 0, :, :]

#             for iter, (x, y) in enumerate(dataloader["test_loader"].get_iterator()) :
#                 testx = torch.Tensor(x).to(device)
#                 testx = testx.transpose(1, 3)
#                 with torch.no_grad() :
#                     preds = engine.model(testx).transpose(1, 3)
#                 outputs.append(preds.squeeze())

#             yhat = torch.cat(outputs, dim=0)
#             yhat = yhat[: realy.size(0), ...]

#             amae   = []
#             amape  = []
#             awmape = []
#             armse  = []
#             test_m = []

#             for j in range(12) :

#                 pred = scaler.inverse_transform(yhat[:, :, j])
#                 real = realy[:, :, j]
#                 metrics = util.metric(pred, real)
#                 log = "Evaluate best model on test data for horizon {:d}, \
#                     Test MAE  : {:.4f}, \
#                     Test RMSE : {:.4f}, \
#                     Test MAPE : {:.4f}, \
#                     Test WMAPE: {:.4f}"
#                 print(
#                     log.format(
#                         j + 1, metrics[0], metrics[2], metrics[1], metrics[3]
#                     )
#                 )

#                 test_m = dict(
#                     test_loss =np.mean(metrics[0]), 
#                     test_rmse =np.mean(metrics[2]), 
#                     test_mape =np.mean(metrics[1]), 
#                     test_wmape=np.mean(metrics[3]), 
#                 )
#                 test_m = pd.Series(test_m)

#                 amae  .append(metrics[0])
#                 amape .append(metrics[1])
#                 armse .append(metrics[2])
#                 awmape.append(metrics[3])

#             log = "On average over 12 horizons, \
#                    Test MAE  : {:.4f}, \
#                    Test RMSE : {:.4f}, \
#                    Test MAPE : {:.4f}, \
#                    Test WMAPE: {:.4f}"
#             print(
#                 log.format(
#                     np.mean(amae), np.mean(armse), np.mean(amape), np.mean(awmape)
#                 )
#             )

#             if np.mean(amae) < test_log :
#                 test_log = np.mean(amae)
#                 loss = mvalid_loss
#                 torch.save(engine.model.state_dict(), path + "best_model.pth")
#                 epochs_since_best_mae = 0
#                 print("Test low! Updating! Test Loss :", np.mean(amae), end=", ")
#                 print("Test low! Updating! Valid Loss:", mvalid_loss  , end=", ")
#                 bestid = i
#                 print("epoch: ", i)
#             else :
#                 epochs_since_best_mae += 1
#                 print("No update")

#     else :
#         epochs_since_best_mae += 1
#         print("No update")

#     train_csv = pd.DataFrame(result)
#     train_csv.round(8).to_csv(f"{path}/train.csv")
#     if epochs_since_best_mae >= args.es_patience and i >= 300 :
#         break

# print("Average Training Time : {:.4f} secs/epoch".format(np.mean(train_time)))
# print("Average Inference Time: {:.4f} secs".format(np.mean(val_time)))

# print("Training ends")
# print("The epoch of the best result：", bestid)
# print("The valid loss of the best model", str(round(his_loss[bestid - 1], 4)))

# engine.model.load_state_dict(torch.load(path + "best_model.pth"))
# outputs = []
# realy = torch.Tensor(dataloader["y_test"]).to(device)
# realy = realy.transpose(1, 3)[:, 0, :, :]

# for iter, (x, y) in enumerate(dataloader["test_loader"].get_iterator()) :
#     testx = torch.Tensor(x).to(device)
#     testx = testx.transpose(1, 3)
#     with torch.no_grad() :
#         preds = engine.model(testx).transpose(1, 3)
#     outputs.append(preds.squeeze())

# yhat = torch.cat(outputs, dim=0)
# yhat = yhat[: realy.size(0), ...]

# amae   = []
# amape  = []
# armse  = []
# awmape = []

# test_m = []

# for i in range(12) :
#     pred = scaler.inverse_transform(yhat[:, :, i])
#     real = realy[:, :, i]
#     metrics = util.metric(pred, real)
#     log = "Evaluate best model on test data for horizon {:d}, \
#            Test MAE: {:.4f}, \
#            Test RMSE: {:.4f}, \
#            Test MAPE: {:.4f}, \
#            Test WMAPE: {:.4f}"
#     print(log.format(i + 1, metrics[0], metrics[2], metrics[1], metrics[3]))

#     test_m = dict(
#         test_loss =np.mean(metrics[0]), 
#         test_rmse =np.mean(metrics[2]), 
#         test_mape =np.mean(metrics[1]), 
#         test_wmape=np.mean(metrics[3]), 
#     )
#     test_m = pd.Series(test_m)
#     test_result.append(test_m)

#     amae  .append(metrics[0])
#     amape .append(metrics[1])
#     armse .append(metrics[2])
#     awmape.append(metrics[3])

# log = "On average over 12 horizons, \
#        Test MAE: {:.4f}, \
#        Test RMSE: {:.4f}, \
#        Test MAPE: {:.4f}, \
#        Test WMAPE: {:.4f}"
# print(log.format(np.mean(amae), np.mean(armse), np.mean(amape), np.mean(awmape)))

# test_m = dict(
#     test_loss =np.mean(amae), 
#     test_rmse =np.mean(armse), 
#     test_mape =np.mean(amape), 
#     test_wmape=np.mean(awmape), 
# )
# test_m = pd.Series(test_m)
# test_result.append(test_m)

# test_csv = pd.DataFrame(test_result)
# test_csv.round(8).to_csv(f"{path}/test.csv")