## 模型架構 

### Lib

In [None]:

# Util #
import pickle
import numpy as np
import os
import scipy.sparse as sp
import torch
from scipy.sparse import linalg
from torch.autograd import Variable

from sklearn.preprocessing import MinMaxScaler

# Layer #

import torch
import torch.nn as nn
from torch.nn import init
import numbers
import torch.nn.functional as F

# Model #
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import sys

# Trainer #
import torch.optim as optim
import math

# Main #
import torch
import numpy as np
import argparse
import time
import matplotlib.pyplot as plt


from torch.utils.tensorboard import SummaryWriter

### Utils

In [None]:

class DataLoaderM(object):
    def __init__(self, xs, ys,xs_1,xs_2,xs_3,xs_4, batch_size, pad_with_last_sample=True):
        """
        :param xs:
        :param ys:
        :param batch_size:
        :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
        """
        self.batch_size = batch_size
        self.current_ind = 0

        # 將資料長度補齊至batch_size可整除之數量
        # 補齊方法: 取原資料最後一個並複製多個來補齊
        if pad_with_last_sample:
            # 計算需補齊數量
            num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
            
            x_padding = np.repeat(xs[-1:], num_padding, axis=0)
            y_padding = np.repeat(ys[-1:], num_padding, axis=0)

            # 將複製後的ele進行concatenate以補齊成可整除batch_size之長度
            xs = np.concatenate([xs, x_padding], axis=0)
            ys = np.concatenate([ys, y_padding], axis=0)
            
            ### MA6 ###
            # 計算需補齊數量
            x_padding = np.repeat(xs_1[-1:], num_padding, axis=0)
            
            
            # 將複製後的ele進行concatenate以補齊成可整除batch_size之長度
            xs_1 = np.concatenate([xs_1, x_padding], axis=0)
            
            
            ### MA36 ###
            # 計算需補齊數量
            x_padding = np.repeat(xs_2[-1:], num_padding, axis=0)
            

            # 將複製後的ele進行concatenate以補齊成可整除batch_size之長度
            xs_2 = np.concatenate([xs_2, x_padding], axis=0)
            
            
            
            ### GA6 ###
            # 計算需補齊數量
            x_padding = np.repeat(xs_3[-1:], num_padding, axis=0)
            
            # 將複製後的ele進行concatenate以補齊成可整除batch_size之長度
            xs_3 = np.concatenate([xs_3, x_padding], axis=0)
            
            
            ### GA36 ###
            # 計算需補齊數量
            x_padding = np.repeat(xs_4[-1:], num_padding, axis=0)

            # 將複製後的ele進行concatenate以補齊成可整除batch_size之長度
            xs_4 = np.concatenate([xs_4, x_padding], axis=0)
            
            
        self.size = len(xs)
        self.num_batch = int(self.size // self.batch_size)
        self.xs = xs
        self.ys = ys
        
        self.xs_1 = xs_1
        
        self.xs_2 = xs_2
        
        self.xs_4 = xs_4
        
        self.xs_3 = xs_3

    def shuffle(self):
        permutation = np.random.permutation(self.size)
        xs, ys = self.xs[permutation], self.ys[permutation]
        xs_1 = self.xs_1[permutation] 
        xs_2 = self.xs_2[permutation] 
        
        
        xs_3 = self.xs_3[permutation] 
        xs_4 = self.xs_4[permutation] 
        
        self.xs = xs
        self.ys = ys
        
        self.xs_1 = xs_1
        
        
        self.xs_2 = xs_2
        
        
        self.xs_4 = xs_4
        
        self.xs_3 = xs_3

    def get_iterator(self):
        self.current_ind = 0
        def _wrapper():
            while self.current_ind < self.num_batch:
                start_ind = self.batch_size * self.current_ind
                end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
                x_i = self.xs[start_ind: end_ind, ...]
                y_i = self.ys[start_ind: end_ind, ...]
                
                x_i_1 = self.xs_1[start_ind: end_ind, ...]
                
                
                x_i_2 = self.xs_2[start_ind: end_ind, ...]
                
                
                x_i_4 = self.xs_4[start_ind: end_ind, ...]
                
                x_i_3 = self.xs_3[start_ind: end_ind, ...]
                
                # 節省記憶體:
                # yield 設計來的目的，就是為了單次輸出內容
                # 我們可以把 yield 暫時看成 return，但是這個 return 的功能只有單次
                # 而且，一旦我們的程式執行到 yield 後，程式就會把值丟出，並暫時停止
                yield (x_i, y_i,x_i_1,x_i_2, x_i_3, x_i_4)
                self.current_ind += 1

        return _wrapper()

class StandardScaler():
    """
    Standard the input
    """
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std
    def transform(self, data):
        return (data - self.mean) / self.std
    def inverse_transform(self, data):
        return (data * self.std) + self.mean
'''

class StandardScaler():
    """
    Standard the input
    """
    def __init__(self, max, min):
        self.max = max
        self.min = min
    def transform(self, data):
        return (data - self.min) / (self.max - self.min)
    def inverse_transform(self, data):
        return (data * (self.max - self.min) ) + self.min
'''

def asym_adj(adj):
    """Asymmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)).flatten()
    d_inv = np.power(rowsum, -1).flatten()
    d_inv[np.isinf(d_inv)] = 0.
    d_mat= sp.diags(d_inv)
    return d_mat.dot(adj).astype(np.float32).todense()


def load_pickle(pickle_file):
    try:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f)
    except UnicodeDecodeError as e:
        with open(pickle_file, 'rb') as f:
            pickle_data = pickle.load(f, encoding='latin1')
    except Exception as e:
        print('Unable to load data ', pickle_file, ':', e)
        raise
    return pickle_data

def load_adj(pkl_filename, adjtype):
    sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename)

    print('# 全部L.A.的sensor ID(sensor_ids):\n',sensor_ids)
    print('# 將sensor ID對應index(sensor_id_to_ind):\n',sensor_id_to_ind)
    
    if adjtype == "scalap":
        adj = [calculate_scaled_laplacian(adj_mx)]
    elif adjtype == "normlap":
        adj = [calculate_normalized_laplacian(adj_mx).astype(np.float32).todense()]
    elif adjtype == "symnadj":
        adj = [sym_adj(adj_mx)]
    elif adjtype == "transition":
        adj = [asym_adj(adj_mx)]
    elif adjtype == "doubletransition":
        adj = [asym_adj(adj_mx), asym_adj(np.transpose(adj_mx))]   # asym_adj(adj_mx): forward transition matrix / asym_adj(np.transpose(adj_mx)): backward transition matrix
    elif adjtype == "identity":
        adj = [np.diag(np.ones(adj_mx.shape[0])).astype(np.float32)]
    else:
        error = 0
        assert error, "adj type not defined"

    print('# Double transition Transition matrix of Eq 4:\n',adj)
    return sensor_ids, sensor_id_to_ind, adj

def load_dataset(dataset_dir, batch_size, valid_batch_size= None, test_batch_size=None):
    data = {}
    for category in ['train', 'val', 'test']:
        cat_data = np.load(os.path.join(dataset_dir, category + '.npz'))
        data['x_' + category] = cat_data['x']
        data['y_' + category] = cat_data['y']

        if args.log_print:
            print("# category:", category)
            print('x:',data['x_' + category].shape, data['x_' + category][0] )
            print('y:',data['y_' + category].shape, data['y_' + category][0] )
    
    # 使用train的mean/std來正規化valid/test #
    scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std())
    # 將欲訓練特徵改成正規化
    for category in ['train', 'val', 'test']:
        data['x_' + category][..., 0] = scaler.transform(data['x_' + category][..., 0])

    

    data['train_loader'] = DataLoaderM(data['x_train'], data['y_train'], batch_size)
    data['val_loader'] = DataLoaderM(data['x_val'], data['y_val'], valid_batch_size)
    data['test_loader'] = DataLoaderM(data['x_test'], data['y_test'], test_batch_size)
    data['scaler'] = scaler
    return data



def masked_mse(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels!=null_val)
    mask = mask.float()
    mask /= torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = (preds-labels)**2
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)

def masked_rmse(preds, labels, null_val=np.nan):
    return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))


def masked_mae(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels!=null_val)
    mask = mask.float()
    mask /=  torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = torch.abs(preds-labels)
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)
def masked_mape(preds, labels, null_val=np.nan):
    
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels!=null_val)
    mask = mask.float()
    mask /=  torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = 2.0 * (torch.abs(preds - labels) / (torch.abs(preds) + torch.abs(labels)))
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)

def metric(pred, real):
    mae = masked_mae(pred,real,0.0).item()
    mape = masked_mape(pred,real,0.0).item()
    rmse = masked_rmse(pred,real,0.0).item()
    return mae,mape,rmse

# Ref: https://github.com/nnzhan/MTGNN
class LayerNorm(nn.Module):
    __constants__ = ['normalized_shape', 'weight', 'bias', 'eps', 'elementwise_affine']
    def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
        super(LayerNorm, self).__init__()
        if isinstance(normalized_shape, numbers.Integral):
            normalized_shape = (normalized_shape,)
        self.normalized_shape = tuple(normalized_shape)
        self.eps = eps
        self.elementwise_affine = elementwise_affine
        if self.elementwise_affine:
            self.weight = nn.Parameter(torch.Tensor(*normalized_shape))
            self.bias = nn.Parameter(torch.Tensor(*normalized_shape))
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        if self.elementwise_affine:
            init.ones_(self.weight)
            init.zeros_(self.bias)

    def forward(self, input, idx):
        if self.elementwise_affine:
            return F.layer_norm(input, tuple(input.shape[1:]), self.weight[:,idx,:], self.bias[:,idx,:], self.eps)
        else:
            return F.layer_norm(input, tuple(input.shape[1:]), self.weight, self.bias, self.eps)

    def extra_repr(self):
        return '{normalized_shape}, eps={eps}, ' \
            'elementwise_affine={elementwise_affine}'.format(**self.__dict__)

### F-GMAT

In [None]:

class F_GMAT_base(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, dropout, bias=True):
        super(F_GMAT_base, self).__init__()

        print('F_GMAT_base', n_heads, in_channel, num_nodes, dropout)
        self.n_head = n_heads
        self.f_in = num_nodes
        self.a_src = nn.Parameter(torch.Tensor(self.n_head, num_nodes, 1))
        self.a_dst = nn.Parameter(torch.Tensor(self.n_head, num_nodes, 1))

        self.leaky_relu = nn.LeakyReLU(negative_slope=0.2)
        self.softmax = nn.Softmax(dim=-1)
        self.dropout = nn.Dropout(dropout)
        if bias:
            self.bias = nn.Parameter(torch.Tensor(num_nodes))
            nn.init.constant_(self.bias, 0)
        else:
            self.register_parameter("bias", None)

        nn.init.xavier_uniform_(self.a_src, gain=1.414)
        nn.init.xavier_uniform_(self.a_dst, gain=1.414)

    def forward(self, h):
        bs, ch, n, dim = h.size()
        h_prime = h
        attn_src = torch.matmul(h, self.a_src)
        attn_dst = torch.matmul(h, self.a_dst)
        attn = attn_src.expand(-1, -1, -1, n) + attn_dst.expand(-1, -1, -1, n).permute(
            0, 1, 3, 2
        )
        attn = self.leaky_relu(attn)
        attn = self.softmax(attn)
        attn = self.dropout(attn)
        output = torch.matmul(attn, h_prime)
        return output + self.bias, attn
        
class F_GMAT(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, dropout, alpha):
        super(F_GMAT, self).__init__()
        
        self.dropout = dropout
        
        self.layer = F_GMAT_base(
                    n_heads, in_channel, num_nodes, dropout
                )

    def forward(self, x):
        bs,ch,n,dim = x.size()
        x, attn = self.layer(x)

        return x


class F_GMAT_module(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, mlp, mlp2, dropout, alpha):
        super(F_GMAT_module, self).__init__()
        print('F_GMAT_module', n_heads, in_channel, num_nodes, dropout, alpha)
        self.net = F_GMAT(n_heads, in_channel, num_nodes, dropout, alpha)

        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = 32
        for out_channel in mlp:
            self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1))
            last_channel = out_channel
        
        self.mlp_convs2 = nn.ModuleList()
        self.mlp_bns2 = nn.ModuleList()
        last_channel = n_heads
        for out_channel in mlp2:
            self.mlp_convs2.append(nn.Conv2d(last_channel, out_channel, 1))
            last_channel = out_channel

        self.lay_norm2 = nn.LayerNorm([n_heads,5, num_nodes])

        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.6)
    def forward(self,x, x_1, x_2, x_3, x_4):
        bs, ch, n, dim = x.size()
      
        x_all = []
        x_1_all = []
        x_2_all =[]
        x_3_all = []
        x_4_all = []

        for t_idx in range(1,dim):
            x_input = [x[:,:,:,t_idx].unsqueeze(2),
                    x_1[:,:,:,t_idx].unsqueeze(2),x_2[:,:,:,t_idx].unsqueeze(2),
                    x_3[:,:,:,t_idx].unsqueeze(2),x_4[:,:,:,t_idx].unsqueeze(2)
                  ]
            x_input = torch.cat(x_input, dim=2)
            x_input_cpy = x_input

            for i, conv in enumerate(self.mlp_convs):
              x_input = F.relu((conv(x_input)))

            x_input_cpy2 = x_input
            x_input = self.net(x_input)
            x_input = x_input_cpy2+ self.dropout1(x_input)

            x_input = self.lay_norm2(x_input)
            
            for i, conv in enumerate(self.mlp_convs2):
              x_input = F.relu((conv(x_input)))

            x_input = x_input_cpy+ self.dropout2(x_input)

            x_all.append(x_input[:,:,0].unsqueeze(3))
            
            
        x_tmp = torch.cat(x_all, dim=3)  # (64,16,207,13)
        x = torch.cat([x[:,:,:,:1],x_tmp],dim=3)
        
        
        return x

### T-GMAT

In [None]:

class T_GMAT_base(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, dropout, bias=True):
        super(T_GMAT_base, self).__init__()

        print('T_GMAT_base', n_heads, in_channel, num_nodes, dropout)
        self.n_head = n_heads
        self.f_in = num_nodes
        self.a_src = nn.Parameter(torch.Tensor(self.n_head, num_nodes, 1))
        self.a_dst = nn.Parameter(torch.Tensor(self.n_head, num_nodes, 1))

        self.leaky_relu = nn.LeakyReLU(negative_slope=0.2)
        self.softmax = nn.Softmax(dim=-1)
        self.dropout = nn.Dropout(dropout)
        if bias:
            self.bias = nn.Parameter(torch.Tensor(num_nodes))
            nn.init.constant_(self.bias, 0)
        else:
            self.register_parameter("bias", None)

        nn.init.xavier_uniform_(self.a_src, gain=1.414)
        nn.init.xavier_uniform_(self.a_dst, gain=1.414)

    def forward(self, h):
        bs, ch, n, dim = h.size()
        h_prime = h
        attn_src = torch.matmul(h, self.a_src)
        attn_dst = torch.matmul(h, self.a_dst)
        attn = attn_src.expand(-1, -1, -1, n) + attn_dst.expand(-1, -1, -1, n).permute(
            0, 1, 3, 2
        )
        attn = self.leaky_relu(attn)
        attn = self.softmax(attn)
        attn = self.dropout(attn)
        output = torch.matmul(attn, h_prime)
        return output + self.bias, attn
        
class T_GMAT(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, dropout, alpha):
        super(T_GMAT, self).__init__()
        
        self.dropout = dropout
        
        self.layer = T_GMAT_base(
                    n_heads, in_channel, num_nodes, dropout
                )

    def forward(self, x):
        bs,ch,n,dim = x.size()
        x, attn = self.layer(x)

        return x


class T_GMAT_module(nn.Module):
    def __init__(self, kern, dilation_factor, temporal_len, n_heads, in_channel, num_nodes, mlp, mlp2, dropout, alpha):
        super(T_GMAT_module, self).__init__()
        
        print('T_GMAT_module', n_heads, in_channel, num_nodes, dropout, alpha)
        self.net = T_GMAT(n_heads, in_channel, num_nodes, dropout, alpha)

        self.mlp_convs = nn.ModuleList()
        self.mlp_bns = nn.ModuleList()
        last_channel = 32
        for out_channel in mlp:
            self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1))
            last_channel = out_channel
        
        self.mlp_convs2 = nn.ModuleList()
        self.mlp_bns2 = nn.ModuleList()
        last_channel = n_heads
        for out_channel in mlp2:
            self.mlp_convs2.append(nn.Conv2d(last_channel, out_channel, 1))
            last_channel = out_channel

        self.norm2 = nn.LayerNorm([32, num_nodes, temporal_len-4])

        self.dropout1 = nn.Dropout(0.5)
        self.dropout2 = nn.Dropout(0.5)

        self.mlp = (nn.Conv2d(32,32,(1,kern),dilation=(1,dilation_factor))) 
  
    def forward(self,x):
        
        bs, ch, n, dim = x.size()
        
        x_input = x.permute(0,1,3,2)
        x_input_cpy = x_input

        #-------------relu(CNN)-------------#
        for i, conv in enumerate(self.mlp_convs):
            x_input = F.relu((conv(x_input)))
        #-------------relu(CNN)-------------#

        #-------------GAT-------------#
        x_input_cpy2 = x_input

        x_input = self.net(x_input)

        x_input = x_input_cpy2+ self.dropout1(x_input)
        #-------------GAT-------------#

        #-------------relu(CNN)-------------#
        for i, conv in enumerate(self.mlp_convs2):
          x_input = F.relu((conv(x_input)))
        #-------------relu(CNN)-------------#

        x_input = (x_input_cpy + self.dropout2(x_input)).permute(0,1,3,2)


        #最後一維度緊收
        x_input = F.relu(self.mlp(x_input))
        x_input = self.norm2(x_input)

        return x_input

### S-MGAT

In [None]:

class S_GMAT_base(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, dropout, bias=True):
        super(S_GMAT_base, self).__init__()

        print('S_GMAT_base', n_heads, in_channel, num_nodes, dropout)
        self.n_head = n_heads
        self.f_in = num_nodes
        self.a_src = nn.Parameter(torch.Tensor(self.n_head, num_nodes, 1))
        self.a_dst = nn.Parameter(torch.Tensor(self.n_head, num_nodes, 1))

        self.leaky_relu = nn.LeakyReLU(negative_slope=0.2)
        self.softmax = nn.Softmax(dim=-1)
        self.dropout = nn.Dropout(dropout)
        if bias:
            self.bias = nn.Parameter(torch.Tensor(num_nodes))
            nn.init.constant_(self.bias, 0)
        else:
            self.register_parameter("bias", None)
        nn.init.xavier_uniform_(self.a_src, gain=1.414)
        nn.init.xavier_uniform_(self.a_dst, gain=1.414)
        
        self.W_si_1 = nn.Parameter(torch.zeros(size=(n_heads, 1, 1))).cuda()
        nn.init.xavier_uniform_(self.W_si_1.data, gain=1.414)
        
        self.W_ei_1 = nn.Parameter(torch.zeros(size=(n_heads, 1, 1))).cuda()
        nn.init.xavier_uniform_(self.W_ei_1.data, gain=1.414)
       
    def forward(self, h, adj):

        bs, ch, n, dim = h.size()
        
        attn_src = torch.matmul(h, self.a_src)
        attn_dst = torch.matmul(h, self.a_dst)
        attn = attn_src.expand(-1, -1, -1, n) + attn_dst.expand(-1, -1, -1, n).permute(
            0, 1, 3, 2
        )
        attn = self.leaky_relu(attn)
        zero_vec = -9e15*torch.ones_like(attn)
        attn = torch.where(adj > 0, attn, zero_vec) 

        attn = abs(self.W_si_1)*attn+abs(self.W_ei_1)*adj
        
        attn = self.softmax(attn) # bs x n_head x n x n
        attn = self.dropout(attn)

        alpha = 0.05
        all = [h]
        h_prime = h
        h_prime = alpha*h+ (1-alpha)* torch.matmul(attn, h_prime)
        all.append(h_prime)
        h_prime = alpha*h+ (1-alpha)* torch.matmul(attn, h_prime)
        all.append(h_prime)

        return torch.cat(all, dim=1)
class S_GMAT(nn.Module):
    def __init__(self, n_heads, in_channel, num_nodes, dropout, alpha):
        super(S_GMAT, self).__init__()
        
        self.dropout = dropout
        
        self.layer = S_GMAT_base(
                    n_heads, in_channel, num_nodes, dropout
                )

    def forward(self, x, adj):
        bs,ch,n,dim = x.size()

        x = self.layer(x, adj)

        return x


class S_GMAT_module(nn.Module):
    def __init__(self, depth, temporal_len, n_heads, in_channel, num_nodes, mlp, mlp2, dropout, alpha):
        super(S_GMAT_module, self).__init__()
        
        print('S_GMAT_module', n_heads, in_channel, num_nodes, dropout, alpha)

        self.gat_net1 = S_GMAT(n_heads, in_channel, temporal_len, dropout, alpha)
        self.gat_net2 = S_GMAT(n_heads, in_channel, temporal_len, dropout, alpha)

        self.mlp_convs_start_1 = nn.Conv2d(in_channel, n_heads, 1)
        self.mlp_convs_start_2 = nn.Conv2d(in_channel, n_heads, 1)

        self.mlp_convs_end_1 = nn.Conv2d(n_heads*(1+depth), 32, 1)
        self.mlp_convs_end_2 = nn.Conv2d(n_heads*(1+depth), 32, 1)

        self.dropout1 = nn.Dropout(0.1)
        self.dropout2 = nn.Dropout(0.1)

        self.m1 = nn.GroupNorm((1+depth), n_heads*(1+depth))
        self.m2 = nn.GroupNorm((1+depth), n_heads*(1+depth))

        self.mlp1 = (nn.Conv2d(32,32,(1,1))) 
        self.mlp2 = (nn.Conv2d(32,32,(1,1))) 

        self.norm1 = nn.LayerNorm([32, num_nodes, temporal_len])
        self.norm2 = nn.LayerNorm([32, num_nodes, temporal_len])

    def forward(self,x,adj1,adj2):
        
        bs, ch, n, dim = x.size()
        
        x_input = x.clone()
        #(CNN1)
        x_input = self.mlp_convs_start_1(x_input)

        #(GMAT)
        x_input = F.elu(self.gat_net1(x_input,adj1))

        #(CNN2)
        x_input = self.mlp_convs_end_1(x_input)
        
        x_input = (x + self.dropout1(x_input))

        #最後MLP
        x_input = F.elu(self.mlp1(x_input))
        x_input1 = self.norm1(x_input)

        #--------------------------------------------------#

        x_input = x.clone()
        #(CNN1)
        x_input = self.mlp_convs_start_2(x_input)

        #(GMAT)
        x_input = F.elu(self.gat_net2(x_input,adj2))

        #(CNN2)
        x_input = self.mlp_convs_end_2(x_input)
        
        x_input = (x + self.dropout2(x_input))

        x_input = F.elu(self.mlp2(x_input))
        x_input2 = self.norm2(x_input)
        
        x_input = x_input1 + x_input2
        
        return x_input

### Model

In [None]:
class GMAT_Net(nn.Module):
    def __init__(self, 
                 model_type, 
                 num_nodes, 
                 device, 
                 predefined_A=None, 
                 dropout=0.3, 
                 conv_channels=32, 
                 residual_channels=32, 
                 skip_channels=64, 
                 end_channels=128, 
                 seq_length=12, 
                 in_dim=2, 
                 out_dim=12, 
                 layers=3, 
                 layer_norm_affline=True):
        super(GMAT_Net, self).__init__()

        self.model_type = model_type

        self.num_nodes = num_nodes
        self.dropout = dropout
        self.predefined_A = predefined_A
        self.layers = layers
        self.seq_length = seq_length

        self.residual_convs = nn.ModuleList()
        self.skip_convs = nn.ModuleList()
        self.norm = nn.ModuleList()
        self.start_conv = nn.Conv2d(in_channels=in_dim,
                                    out_channels=residual_channels,
                                    kernel_size=(1, 1))
        
        self.f_gmat_list = nn.ModuleList()
        in_channel = 32
        n_heads = 8
        dropout = 0
        alpha = 0.2
        self.f_gmat_list.append(
            F_GMAT_module(
              n_heads=n_heads, in_channel= in_channel, num_nodes=num_nodes, mlp=[n_heads],mlp2=[32], dropout=dropout, alpha=alpha
            )
        )

        self.t_gmat_list_1 = nn.ModuleList()
        self.t_gmat_list_2 = nn.ModuleList()

        self.s_gmat_list = nn.ModuleList() # dual GMAT Blocks
        
        # Modified from: https://github.com/nnzhan/MTGNN
        kernel_size = 7
        dilation_exponential = 1
        if dilation_exponential>1:
            self.receptive_field = int(1+(kernel_size-1)*(dilation_exponential**layers-1)/(dilation_exponential-1))
        else:
            self.receptive_field = layers*(kernel_size-1) + 1
        
        print("# Model Type", self.model_type)
        print("# receptive_field", self.receptive_field)
        self.receptive_field = 13
        i=0
        if dilation_exponential>1:
            rf_size_i = int(1 + i*(kernel_size-1)*(dilation_exponential**layers-1)/(dilation_exponential-1))
        else:
            rf_size_i = i*layers*(kernel_size-1)+1
        new_dilation = 1
        target_len = self.receptive_field

        for j in range(1,layers+1):
           
            if dilation_exponential > 1:
                rf_size_j = int(rf_size_i + (kernel_size-1)*(dilation_exponential**j-1)/(dilation_exponential-1))
            else:
                rf_size_j = rf_size_i+j*(kernel_size-1)

            dilation_factor = 1
            kern = 5

            in_channel = 32
            n_heads = 8
            dropout = 0
            alpha = 0.2
            self.t_gmat_list_1.append(
                T_GMAT_module(
                  kern= kern, dilation_factor=dilation_factor, temporal_len = target_len, n_heads=n_heads, in_channel= in_channel, num_nodes=num_nodes, mlp=[n_heads],mlp2=[32], dropout=dropout, alpha=alpha
                )
            )
            
            self.t_gmat_list_2.append(
                T_GMAT_module(
                  kern= kern, dilation_factor=dilation_factor, temporal_len = target_len, n_heads=n_heads, in_channel= in_channel, num_nodes=num_nodes, mlp=[n_heads],mlp2=[32], dropout=dropout, alpha=alpha
                )
            )
            
            target_len -= 4

            in_channel = 32
            n_heads = 8
            dropout = 0
            alpha = 0.2
            
            depth = 2
            self.s_gmat_list.append(
                S_GMAT_module(
                  depth=depth, temporal_len = target_len, n_heads=n_heads, in_channel= in_channel, num_nodes=num_nodes, mlp=[n_heads],mlp2=[32], dropout=dropout, alpha=alpha
                )
            )
            # 1x1 convolution for skip connection
            self.skip_convs.append(nn.Conv2d(in_channels=conv_channels,
                                                out_channels=skip_channels,
                                                kernel_size=(1, target_len)))
            
            self.norm.append(LayerNorm((residual_channels, num_nodes, target_len),elementwise_affine=layer_norm_affline))
            
            new_dilation *= dilation_exponential
    
        self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
                                             out_channels=end_channels,
                                             kernel_size=(1,1),
                                             bias=True)
        self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
                                             out_channels=out_dim,
                                             kernel_size=(1,1),
                                             bias=True)

        if self.seq_length > self.receptive_field:
            self.skip0 = nn.Conv2d(in_channels=in_dim, out_channels=skip_channels, kernel_size=(1, self.seq_length), bias=True)
            self.skipE = nn.Conv2d(in_channels=residual_channels, out_channels=skip_channels, kernel_size=(1, self.seq_length-self.receptive_field+1), bias=True)

        else:
            self.skip0 = nn.Conv2d(in_channels=in_dim, out_channels=skip_channels, kernel_size=(1, self.receptive_field), bias=True)
            self.skipE = nn.Conv2d(in_channels=residual_channels, out_channels=skip_channels, kernel_size=(1, 1), bias=True)

        self.idx = torch.arange(self.num_nodes).to(device)


    def forward(self, input, input_1,input_2,input_3,input_4, idx=None):
        seq_len = input.size(3)
        assert seq_len==self.seq_length, 'input sequence length not equal to preset sequence length'

        # Step0: 檢查receptive_field, 不足則padding0
        if self.seq_length<self.receptive_field:
            input = nn.functional.pad(input,(self.receptive_field-self.seq_length,0,0,0))
            input_1 = nn.functional.pad(input_1,(self.receptive_field-self.seq_length,0,0,0))
            input_2 = nn.functional.pad(input_2,(self.receptive_field-self.seq_length,0,0,0))
            input_4 = nn.functional.pad(input_4,(self.receptive_field-self.seq_length,0,0,0))
            
            input_3 = nn.functional.pad(input_3,(self.receptive_field-self.seq_length,0,0,0))


        # Step1: turn([64, 2, 207, 13]) to ([64, 32, 207, 13]) => 固定用同一conv
        x = self.start_conv(input) 
        x_1 = self.start_conv(input_1)  
        x_2 = self.start_conv(input_2)
        
        x_4 = self.start_conv(input_4) 
        x_3 = self.start_conv(input_3)  

        x = self.f_gmat_list[0](x,x_1,x_2,x_3,x_4)

        skip = self.skip0(F.dropout(input, self.dropout, training=self.training))
        
        for i in range(self.layers):
            
            residual = x    
            
            filter = self.t_gmat_list_1[i](x)
            filter = torch.tanh(filter)

            gate = self.t_gmat_list_2[i](x)
            gate = torch.sigmoid(gate)

            x = filter * gate
            x = F.dropout(x, self.dropout, training=self.training)

            s = x
            
            s = self.skip_convs[i](s)    

            skip = s + skip
            # Two GMAT Block of different directions implemented in S_GMAT_module 
            x = self.s_gmat_list[i](x, self.predefined_A[0], self.predefined_A[1])

            x = x + residual[:, :, :, -x.size(3):]
            x = self.norm[i](x,self.idx)
            
        skip = self.skipE(x) + skip
        x = F.relu(skip)
        x = F.relu(self.end_conv_1(x))
        x = self.end_conv_2(x)
        return x

### Trainer

In [None]:
class Trainer():
    def __init__(self, model, lrate, wdecay, clip, step_size, seq_out_len, scaler, device, cl=True):
        self.scaler = scaler
        self.model = model
        self.model.to(device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay)
        self.loss = masked_mae
        self.clip = clip
        self.step = step_size
        self.iter = 1
        self.task_level = 1
        self.seq_out_len = seq_out_len
        self.cl = cl

    def train(self, input, input_1, input_2, input_3, input_4 ,real_val, idx=None):
        self.model.train()
        self.optimizer.zero_grad()
        output = self.model(input, input_1, input_2, input_3,input_4, idx=idx)
        output = output.transpose(1,3)
        real = torch.unsqueeze(real_val,dim=1)
        predict = self.scaler.inverse_transform(output)
        
        if self.iter%self.step==0 and self.task_level<=self.seq_out_len:
            self.task_level +=1
            print("### cl learning\n iter",self.iter,"\niter%step",self.iter%self.step,"\ntask_level",self.task_level)
            print("# predict len:", len(predict[:, :, :, :self.task_level]))
        
        if self.cl:
            loss = masked_mae(predict[:, :, :, :self.task_level], real[:, :, :, :self.task_level], 0.0)
        else:
            loss = masked_mae(predict, real, 0.0)

        loss.backward()

        if self.clip is not None:
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)

        self.optimizer.step()
        
        metrics = metric(predict, real) # mae,mape,rmse
        
        self.iter += 1
        return metrics # mae,mape,rmse

    def eval(self, input, input_1, input_2, input_3,input_4, real_val):
        self.model.eval()
        output = self.model(input, input_1, input_2, input_3,input_4)
        output = output.transpose(1,3)
        real = torch.unsqueeze(real_val,dim=1)
        predict = self.scaler.inverse_transform(output)
        
        metrics = metric(predict, real) # mae,mape,rmse
        return metrics # mae,mape,rmse


### Parameter

In [None]:


def str_to_bool(value):
    if isinstance(value, bool):
        return value
    if value.lower() in {'false', 'f', '0', 'no', 'n'}:
        return False
    elif value.lower() in {'true', 't', '1', 'yes', 'y'}:
        return True
    raise ValueError(f'{value} is not a valid boolean value')


parser = argparse.ArgumentParser()
parser.add_argument('--model_type',type=str,default='GMAT_Net',help='model type')

parser.add_argument('--device',type=str,default='cuda',help='')
parser.add_argument('--adjtype',type=str,default='doubletransition',help='adj type')
parser.add_argument('--cl', type=str_to_bool, default=True,help='whether to do curriculum learning')
parser.add_argument('--conv_channels',type=int,default=32,help='convolution channels')
parser.add_argument('--residual_channels',type=int,default=32,help='residual channels')
parser.add_argument('--in_dim',type=int,default=2,help='inputs dimension')
parser.add_argument('--seq_in_len',type=int,default=12,help='input sequence length')
parser.add_argument('--seq_out_len',type=int,default=12,help='output sequence length')
parser.add_argument('--batch_size',type=int,default=64,help='batch size')
parser.add_argument('--clip',type=int,default=5,help='clip')


parser.add_argument('--skip_channels',type=int,default=64,help='skip channels')
parser.add_argument('--end_channels',type=int,default=128,help='end channels')
parser.add_argument('--layers',type=int,default=3,help='number of layers')

parser.add_argument('--print_every',type=int,default=50,help='')
parser.add_argument('--seed',type=int,default=101,help='random seed')
parser.add_argument('--save',type=str,default='./save/',help='save path')

parser.add_argument('--log_print', type=str_to_bool, default=False ,help='whether to load static feature')

parser.add_argument('--learning_rate',type=float,default=0.0005,help='learning rate')
parser.add_argument('--weight_decay',type=float,default=0.0001,help='weight decay rate')
parser.add_argument('--dropout',type=float,default=0.5,help='dropout rate')

target = 'RMThsin'
parser.add_argument('--data',type=str,default='../Data/'+target ,help='data path')
parser.add_argument('--adj_data',type=str,default='../Data/'+target+'/adj_mat_'+target+'.pkl',help='adj data path')
parser.add_argument('--num_nodes',type=int,default=11,help='number of nodes/variables')
parser.add_argument('--step_size1',type=int,default=1500,help='step_size')
parser.add_argument('--step_size2',type=int,default=100,help='step_size')

parser.add_argument('--expid',type=int,default=202212010003,help='experiment id')
parser.add_argument('--runs',type=int,default=3,help='number of runs')
parser.add_argument('--epochs',type=int,default=200,help='')

torch.set_num_threads(3)

args=parser.parse_args(args=[])
print('# args', args)

device = torch.device(args.device)

writer = SummaryWriter()

# args Namespace(adj_data='../Data/RMThsin/adj_mat_RMThsin.pkl', adjtype='doubletransition', batch_size=64, cl=True, clip=5, conv_channels=32, data='../Data/RMThsin', device='cuda', dropout=0.5, end_channels=128, epochs=200, expid=202212010003, in_dim=2, layers=3, learning_rate=0.0005, log_print=False, model_type='GMAT_Net', num_nodes=11, print_every=50, residual_channels=32, runs=3, save='./save/', seed=101, seq_in_len=12, seq_out_len=12, skip_channels=64, step_size1=1500, step_size2=100, weight_decay=0.0001)


### Loading Data

In [None]:
"""### Loading Data"""

batch_size = args.batch_size
valid_batch_size = args.batch_size
test_batch_size = args.batch_size
data = {}

feature_id = 0
for range_type in ['','ma3','ma6','ga12','ga24']:
    print("range_type", range_type)
    for category in ['train', 'val', 'test']:
        
        if range_type == "":
          category = category
          key = category
        else:
          
          key = category + "_" + str(feature_id)
          category = category + "_" + range_type

        # Loading npz 
        cat_data = np.load(os.path.join(args.data, category + '.npz'))
        print("loading:", category ,'->', args.data, category + '.npz')

        data['x_' + key] = cat_data['x'][:]     # (?, 12, 207, 2)
        data['y_' + key] = cat_data['y'][:]   # (?, 12, 207, 2)
    
    if range_type == '':
        # 使用train的mean/std來正規化valid/test #
        scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std())
        data['scaler'] = scaler

    print(data.keys())
    # 將欲訓練特徵改成正規化
    for category in ['train', 'val', 'test']:
        
        if range_type == "":
          key = category
        else:
          key = category + "_" + str(feature_id)

        data['x_' + key][..., 0] = data['scaler'].transform(data['x_' + key][..., 0])
        print("data['x_' + key]:", 'x_' + key)

    feature_id += 1

#print(data['x_train'].shape)
data['train_loader'] = DataLoaderM(
    data['x_train'], data['y_train'], 
    data['x_train_1'], 
    data['x_train_2'], 
    data['x_train_3'],
    data['x_train_4'],
    batch_size)

data['val_loader'] = DataLoaderM(
    data['x_val'], data['y_val'], 
    data['x_val_1'],  
    data['x_val_2'],  
    data['x_val_3'],  
    data['x_val_4'],  
    valid_batch_size)

data['test_loader'] = DataLoaderM(
    data['x_test'], data['y_test'], 
    data['x_test_1'],  
    data['x_test_2'], 
    data['x_test_3'],
    data['x_test_4'],  
    test_batch_size)

sensor_ids, sensor_id_to_ind, adj_mx = load_adj(args.adj_data,args.adjtype)   # adjtype: default='doubletransition'

adj_mx = [torch.tensor(i).to(device) for i in adj_mx]

dataloader = data.copy()


range_type 
loading: train -> ../Data/RMThsin train.npz
loading: val -> ../Data/RMThsin val.npz
loading: test -> ../Data/RMThsin test.npz
dict_keys(['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test', 'scaler'])
data['x_' + key]: x_train
data['x_' + key]: x_val
data['x_' + key]: x_test
range_type ma3
loading: train_ma3 -> ../Data/RMThsin train_ma3.npz
loading: val_ma3 -> ../Data/RMThsin val_ma3.npz
loading: test_ma3 -> ../Data/RMThsin test_ma3.npz
dict_keys(['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test', 'scaler', 'x_train_1', 'y_train_1', 'x_val_1', 'y_val_1', 'x_test_1', 'y_test_1'])
data['x_' + key]: x_train_1
data['x_' + key]: x_val_1
data['x_' + key]: x_test_1
range_type ma6
loading: train_ma6 -> ../Data/RMThsin train_ma6.npz
loading: val_ma6 -> ../Data/RMThsin val_ma6.npz
loading: test_ma6 -> ../Data/RMThsin test_ma6.npz
dict_keys(['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test', 'scaler', 'x_train_1', 'y_train_1', 'x_val_1', 'y_val_1', 'x_test_1',

### Main

In [None]:
def main(runid):
    

    model = GMAT_Net(args.model_type, 
                   args.num_nodes,
                   device, 
                   predefined_A=adj_mx,  
                   dropout=args.dropout, 
                   conv_channels=args.conv_channels, 
                   residual_channels=args.residual_channels,
                   skip_channels=args.skip_channels, 
                   end_channels= args.end_channels,
                   seq_length=args.seq_in_len, 
                   in_dim=args.in_dim, 
                   out_dim=args.seq_out_len,
                   layers=args.layers,  
                   layer_norm_affline=True)

    print(model)
    nParams = sum([p.nelement() for p in model.parameters()])       # model參數量!

    engine = Trainer(model, args.learning_rate, args.weight_decay, args.clip, args.step_size1, args.seq_out_len, data['scaler'], device, args.cl)
    
    print("start training...",flush=True)
    his_loss =[]
    val_time = []
    train_time = []
    minl = 1e5
    start_epoch=0
    train_loss_epoch = []  # 紀錄train在epoch收斂
    valid_loss_epoch = []  # 紀錄valid在epoch收斂
    
    for i in range(start_epoch,start_epoch+args.epochs+1):
        train_mae = []
        train_mape = []
        train_rmse = []
        t1 = time.time()
        dataloader['train_loader'].shuffle()  # 為了檢視資料先拿掉
        for iter, (x, y,x_1,x_2,x_3,x_4) in enumerate(dataloader['train_loader'].get_iterator()):
            trainx = torch.Tensor(x).to(device)
            trainx= trainx.transpose(1, 3)
            trainy = torch.Tensor(y).to(device)
            trainy = trainy.transpose(1, 3)
            
            trainx_1 = torch.Tensor(x_1).to(device)
            trainx_1= trainx_1.transpose(1, 3)
            
            trainx_2 = torch.Tensor(x_2).to(device)
            trainx_2= trainx_2.transpose(1, 3)
            
            
            trainx_3 = torch.Tensor(x_3).to(device)
            trainx_3= trainx_3.transpose(1, 3)
            
            trainx_4 = torch.Tensor(x_4).to(device)
            trainx_4= trainx_4.transpose(1, 3)
            
            #mae,mape,rmse
            metrics = engine.train(trainx,trainx_1,trainx_2,trainx_3,trainx_4 ,trainy[:,0,:,:])

            train_mae.append(metrics[0])
            train_mape.append(metrics[1])
            train_rmse.append(metrics[2])

            if iter % args.print_every == 0 :
                log = 'Iter: {:03d}, Train MAE: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}'
                print(log.format(iter, train_mae[-1], train_mape[-1], train_rmse[-1],flush=True))
        t2 = time.time()
        train_time.append(t2-t1)
        #validation
        valid_mae = []
        valid_mape = []
        valid_rmse = []

        s1 = time.time()
        for iter, (x, y,x_1,x_2,x_3,x_4)  in enumerate(dataloader['val_loader'].get_iterator()):
            testx = torch.Tensor(x).to(device)
            testx = testx.transpose(1, 3)
            testy = torch.Tensor(y).to(device)
            testy = testy.transpose(1, 3)
            
            testx_1 = torch.Tensor(x_1).to(device)
            testx_1= testx_1.transpose(1, 3)
            
            testx_2 = torch.Tensor(x_2).to(device)
            testx_2= testx_2.transpose(1, 3)
            
            
            testx_3 = torch.Tensor(x_3).to(device)
            testx_3= testx_3.transpose(1, 3)
            
            testx_4 = torch.Tensor(x_4).to(device)
            testx_4= testx_4.transpose(1, 3)
            
            
            metrics = engine.eval(testx, testx_1,testx_2,testx_3,testx_4, testy[:,0,:,:])
            valid_mae.append(metrics[0])
            valid_mape.append(metrics[1])
            valid_rmse.append(metrics[2])
            
        s2 = time.time()
        log = 'Epoch: {:03d}, Inference Time: {:.4f} secs'
        print(log.format(i,(s2-s1)))
        val_time.append(s2-s1)
        mtrain_mae = np.mean(train_mae)
        mtrain_mape = np.mean(train_mape)
        mtrain_rmse = np.mean(train_rmse)

        mvalid_mae = np.mean(valid_mae)
        mvalid_mape = np.mean(valid_mape)
        mvalid_rmse = np.mean(valid_rmse)
        
        his_loss.append(mvalid_mae)

        log = 'Epoch: {:03d}, Train MAE: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f},  Valid MAE: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}, Training Time: {:.4f}/epoch'
        print(log.format(i, mtrain_mae, mtrain_mape, mtrain_rmse, mvalid_mae, mvalid_mape, mvalid_rmse, (t2 - t1)),flush=True)
        
        train_loss_epoch.append(mtrain_mae)
        valid_loss_epoch.append(mvalid_mae)
        
        if mvalid_mae<minl:
            target_best_model = args.save + "exp" + str(args.expid) + "_" + str(runid) +".pth"
            print("### Update Best Model:",target_best_model, '*LOSS:', mvalid_mae, " ###")
            SAVE_PATH = args.save + "exp" + str(args.expid) + "_" + str(runid) +".pth"
            torch.save({
              'epoch': i,
              'task_level': engine.task_level,
              'model_state_dict': engine.model.state_dict(),
              'optimizer_state_dict': engine.optimizer.state_dict(),
              'loss': mvalid_mae,
              'train_loss': train_loss_epoch,
              'valid_loss': valid_loss_epoch
            }, SAVE_PATH)
            minl = mvalid_mae

    print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time)))
    print("Average Inference Time: {:.4f} secs".format(np.mean(val_time)))


    bestid = np.argmin(his_loss)
    

    print("Training finished")
    print("The valid loss on best model is", str(round(his_loss[bestid],4)))
    
    target_model = args.save + "exp" + str(args.expid) + "_" + str(runid) +".pth"
   
    print("### loading model is:",target_model ,'###')
    
    SAVE_PATH = args.save + "exp" + str(args.expid) + "_" + str(runid) +".pth"
    checkpoint = torch.load(SAVE_PATH)
    engine.model.load_state_dict(checkpoint['model_state_dict'])
    engine.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    engine.task_level = checkpoint['task_level']
    start_epoch = checkpoint['epoch']
    train_loss_epoch = checkpoint['train_loss']
    valid_loss_epoch = checkpoint['valid_loss']
    #draw_plot_loss(checkpoint)

    ### 測試讀取出的model ### 
    valid_mae = []  
    valid_mape = [] 
    valid_rmse = [] 
    tmp_y = []
    for iter, (x, y,x_1,x_2,x_3,x_4)  in enumerate(dataloader['val_loader'].get_iterator()):  
        
        testx = torch.Tensor(x).to(device)  
        testx = testx.transpose(1, 3)   
        testy = torch.Tensor(y).to(device)  
        testy = testy.transpose(1, 3)   
        
        testx_1 = torch.Tensor(x_1).to(device)
        testx_1= testx_1.transpose(1, 3)

        testx_2 = torch.Tensor(x_2).to(device)
        testx_2= testx_2.transpose(1, 3)

        testx_3 = torch.Tensor(x_3).to(device)
        testx_3= testx_3.transpose(1, 3)
        
        testx_4 = torch.Tensor(x_4).to(device)
        testx_4= testx_4.transpose(1, 3)

        metrics = engine.eval(testx, testx_1,testx_2, testx_3,testx_4,testy[:,0,:,:]) 
        valid_mae.append(metrics[0])    
        valid_mape.append(metrics[1])   
        valid_rmse.append(metrics[2])   


    mvalid_mae = np.mean(valid_mae) 
    mvalid_mape = np.mean(valid_mape)   
    mvalid_rmse = np.mean(valid_rmse)   

    print("### 2-The valid loss on loding model is", str(round(mvalid_mae,4)))
    minl= valid_mae   
    print("### minl:",minl, "checkpoint['loss']:",checkpoint['loss'])   
    ### 測試讀取出的model ### 

    #valid data
    outputs = []
    realy = torch.Tensor(dataloader['y_val']).to(device)
    
    realy = realy.transpose(1,3)[:,0,:,:]
    print('#realy', realy.shape)
    
    for iter, (x, y,x_1,x_2,x_3,x_4)  in enumerate(dataloader['val_loader'].get_iterator()):
        testx = torch.Tensor(x).to(device)
        testx = testx.transpose(1,3)
        
        testx_1 = torch.Tensor(x_1).to(device)
        testx_1= testx_1.transpose(1, 3)

        testx_2 = torch.Tensor(x_2).to(device)
        testx_2= testx_2.transpose(1, 3)

        testx_3 = torch.Tensor(x_3).to(device)
        testx_3= testx_3.transpose(1, 3)

        testx_4 = torch.Tensor(x_4).to(device)
        testx_4= testx_4.transpose(1, 3)
        with torch.no_grad():
            preds = engine.model(testx,testx_1,testx_2,testx_3,testx_4)
            preds = preds.transpose(1,3)  # 64,1,6,12

        outputs.append(preds.squeeze()) # 64,1,6,12 ->squeeze()->64,6,12

    yhat = torch.cat(outputs,dim=0)
    yhat = yhat[:realy.size(0),...]  # 5240,6,12
    print('# cat valid preds', yhat.shape)

    pred = dataloader['scaler'].inverse_transform(yhat)
    
    vmae, vmape, vrmse  = metric(pred,realy)
    print("valid - vmae, vmape, vrmse ", vmae, vmape, vrmse )
    #----------------------------------#
    #test data
    outputs = []
    realy = torch.Tensor(dataloader['y_test']).to(device)
    realy = realy.transpose(1, 3)[:, 0, :, :]

    for iter, (x, y,x_1,x_2,x_3,x_4)  in enumerate(dataloader['test_loader'].get_iterator()):
        testx = torch.Tensor(x).to(device)
        testx = testx.transpose(1, 3)
        
        testx_1 = torch.Tensor(x_1).to(device)
        testx_1= testx_1.transpose(1, 3)

        testx_2 = torch.Tensor(x_2).to(device)
        testx_2= testx_2.transpose(1, 3)

        testx_3 = torch.Tensor(x_3).to(device)
        testx_3= testx_3.transpose(1, 3)

        testx_4 = torch.Tensor(x_4).to(device)
        testx_4= testx_4.transpose(1, 3)

        
        with torch.no_grad():
            preds = engine.model(testx,testx_1,testx_2,testx_3,testx_4)
            preds = preds.transpose(1, 3)
        outputs.append(preds.squeeze())

    yhat = torch.cat(outputs, dim=0)
    yhat = yhat[:realy.size(0), ...]  #10478, 6, 12
    print('# cat test preds', yhat.shape)
    
    mae = []
    mape = []
    rmse = []
    
    for i in range(args.seq_out_len):
        pred = dataloader['scaler'].inverse_transform(yhat[:, :, i])
        
        real = realy[:, :, i]

        metrics = metric(pred, real)
        
        log = 'Evaluate best model on test data for horizon {:d}, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}'
        print(log.format(i + 1, metrics[0], metrics[1], metrics[2]))
        mae.append(metrics[0])
        mape.append(metrics[1])
        rmse.append(metrics[2])
        
    #sys.exit()
    log = '{:.2f}   {:.2f}    {:.4f}  '
    print("#### Final Results:")
    print(  str(args.expid) + "_" + str(runid)+'    ', 
          log.format(mae[0], rmse[0],   mape[0]),
          log.format(mae[2], rmse[2],  mape[2]),
          log.format(mae[5], rmse[5],   mape[5]),
          log.format(mae[11], rmse[11],  mape[11]),
         )
    return vmae, vmape, vrmse, mae, mape, rmse

if __name__ == "__main__":

    vmae = []
    vmape = []
    vrmse = []

    mae = []
    mape = []
    rmse = []

    for i in range(args.runs):
        vm1, vm2, vm3, m1, m2, m3 = main(i)
        vmae.append(vm1)
        vmape.append(vm2)
        vrmse.append(vm3)

        mae.append(m1)
        mape.append(m2)
        rmse.append(m3)


    mae = np.array(mae)
    mape = np.array(mape)
    rmse = np.array(rmse)

    amae = np.mean(mae,0)
    amape = np.mean(mape,0)
    armse = np.mean(rmse,0)

    smae = np.std(mae,0)
    s_mape = np.std(mape,0)
    srmse = np.std(rmse,0)


    print('\n\nResults for 10 runs\n\n')
    #valid data
    print('valid\tMAE\tRMSE\tMAPE')
    log = 'mean:\t{:.4f}\t{:.4f}\t{:.4f}'
    print(log.format(np.mean(vmae),np.mean(vrmse),np.mean(vmape)))
    log = 'std:\t{:.4f}\t{:.4f}\t{:.4f}'
    print(log.format(np.std(vmae),np.std(vrmse),np.std(vmape)))
    print('\n\n')
    #test data
    print('test|horizon\tMAE-mean\tRMSE-mean\tMAPE-mean\tMAE-std\tRMSE-std\tMAPE-std')
    for i in [2,5,11]:
        log = '{:d}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'
        print(log.format(i+1, amae[i], armse[i], amape[i], smae[i], srmse[i], s_mape[i]))
