In [1]:
import os
import numpy as np
import torch
import torch.utils.data
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from scipy.sparse.linalg import eigs

###### Function for Computing Scaled Laplacian

In [2]:
def scaled_Laplacian(W):
    '''
    compute \tilde{L}

    Parameters
    ----------
    W: np.ndarray, shape is (N, N), N is the num of vertices

    Returns
    ----------
    scaled_Laplacian: np.ndarray, shape (N, N)

    '''
    ###Checking if the number of rows and columns of an adjacenecy matrix
    ####are same or not
    assert W.shape[0] == W.shape[1]

    ### First sum each row of the adjacency matrix, and we obtain the degress of each vertex in that row
    ### Secondly, a diagonal matrix has been created with degree of each vertex in the diagonal
    #### Finally, 'D' is the sparse matrix containing only degrees of each vertex
    D = np.diag(np.sum(W, axis=1))

    #### 'l' is the unormalized Laplacian Matrix obtained by subtraction
    ### of Adjacenecy Matrix from the Diagonal Matrix
    L = D - W

    #### First of all from the Laplacian Matrix, with the help of 'eigs' function largest value for eigen value and eigne vector
    ##### has been evaluated
    #### Secondly, eigen value has been only kept
    #### Thirdly, eigen value beign a complex number, only real part is kept and saved as lamda_max
    lambda_max = eigs(L, k=1, which='LR')[0].real

    #### Finally Scaled Laplacian Matrix value is Obtained
    return (2 * L) / lambda_max - np.identity(W.shape[0])


##### Function for Computing Chebyshev Polynomials

In [3]:
def cheb_polynomial(L_tilde, K):
    '''
    compute a list of chebyshev polynomials from T_0 to T_{K-1}

    Parameters
    ----------
    L_tilde: scaled Laplacian, np.ndarray, shape (N, N)

    K: the maximum order of chebyshev polynomials

    Returns
    ----------
    cheb_polynomials: list(np.ndarray), length: K, from T_0 to T_{K-1}

    '''
    ### The value of N is set to number of rows of a Laplacian Matric
    N = L_tilde.shape[0]

    ### cheb_polynimials conatins the zeroth order Chebyshev Polynomial T(0) = np.identity(N)
    ### and the first order Chebyshev Polynomial T(1) = L_tilde.copy()
    cheb_polynomials = [np.identity(N), L_tilde.copy()]

    ### The loop computes the next higher order of Chebyshev Polynomials form order 2 to order k
    ### The next order recurrence Chebysehev Polynomial is given by Tk(x) =2xTk-1(x)-Tk-2(x)
    ## Since the loop starts from order 2, I am experimenting with values K = 3, 4, 5
    for i in range(2, K):
        cheb_polynomials.append(2 * L_tilde * cheb_polynomials[i - 1] - cheb_polynomials[i - 2])

    return cheb_polynomials

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

###### Chebyshev's Convolution Layer

In [5]:
class cheb_conv(nn.Module):
    '''
    K-order chebyshev graph convolution
    '''

    def __init__(self, K, cheb_polynomials, in_channels, out_channels):
        '''
        :param K: int
        :param in_channles: int, num of channels in the input sequence
        :param out_channels: int, num of channels in the output sequence
        '''
        super(cheb_conv, self).__init__()
        self.K = K
        self.cheb_polynomials = cheb_polynomials
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.DEVICE = cheb_polynomials[0].device
        self.Theta = nn.ParameterList([nn.Parameter(torch.FloatTensor(in_channels, out_channels).to(self.DEVICE)) for _ in range(K)])

    def forward(self, x):
        '''
        Chebyshev graph convolution operation
        :param x: (batch_size, N, F_in, T)
        :return: (batch_size, N, F_out, T)
        '''

        batch_size, num_of_vertices, in_channels, num_of_timesteps = x.shape

        outputs = []

        for time_step in range(num_of_timesteps):

            graph_signal = x[:, :, :, time_step]  # (b, N, F_in)

            output = torch.zeros(batch_size, num_of_vertices, self.out_channels).to(self.DEVICE)  # (b, N, F_out)

            for k in range(self.K):

                T_k = self.cheb_polynomials[k]  # (N,N)

                theta_k = self.Theta[k]  # (in_channel, out_channel)

                rhs = graph_signal.permute(0, 2, 1).matmul(T_k).permute(0, 2, 1)

                output = output + rhs.matmul(theta_k)

            outputs.append(output.unsqueeze(-1))

        return F.relu(torch.cat(outputs, dim=-1))


###### Describing a Singular MSTGCN Block

In [6]:
class MSTGCN_block(nn.Module):

    def __init__(self, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, cheb_polynomials):
        super(MSTGCN_block, self).__init__()
        self.cheb_conv = cheb_conv(K, cheb_polynomials, in_channels, nb_chev_filter)
        self.time_conv = nn.Conv2d(nb_chev_filter, nb_time_filter, kernel_size=(1, 3), stride=(1, time_strides), padding=(0, 1))
        self.residual_conv = nn.Conv2d(in_channels, nb_time_filter, kernel_size=(1, 1), stride=(1, time_strides))
        self.ln = nn.LayerNorm(nb_time_filter)

    def forward(self, x):
        '''
        :param x: (batch_size, N, F_in, T)
        :return: (batch_size, N, nb_time_filter, T)
        '''
        # cheb gcn
        spatial_gcn = self.cheb_conv(x)  # (b,N,F,T)

        # convolution along the time axis
        time_conv_output = self.time_conv(spatial_gcn.permute(0, 2, 1, 3))  # (b,F,N,T)

        # residual shortcut
        x_residual = self.residual_conv(x.permute(0, 2, 1, 3))  # (b,F,N,T)

        x_residual = self.ln(F.relu(x_residual + time_conv_output).permute(0, 3, 2, 1)).permute(0, 2, 3, 1)  # (b,N,F,T)

        return x_residual

###### Describing a series of MSTGCN Blocks inside the submodule Class

In [7]:
class MSTGCN_submodule(nn.Module):

    def __init__(self, DEVICE, nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, cheb_polynomials, num_for_predict, len_input):
        '''
        :param nb_block:
        :param in_channels:
        :param K:
        :param nb_chev_filter:
        :param nb_time_filter:
        :param time_strides:
        :param cheb_polynomials:
        :param nb_predict_step:
        '''

        super(MSTGCN_submodule, self).__init__()

        self.BlockList = nn.ModuleList([MSTGCN_block(in_channels, K, nb_chev_filter, nb_time_filter, time_strides, cheb_polynomials)])

        self.BlockList.extend([MSTGCN_block(nb_time_filter, K, nb_chev_filter, nb_time_filter, 1, cheb_polynomials) for _ in range(nb_block-1)])

        self.final_conv = nn.Conv2d(int(len_input/time_strides), num_for_predict, kernel_size=(1, nb_time_filter))

        self.DEVICE = DEVICE

        self.to(DEVICE)

    def forward(self, x):
        '''
        :param x: (B, N_nodes, F_in, T_in)
        :return: (B, N_nodes, T_out)
        '''
        for block in self.BlockList:
            x = block(x)

        output = self.final_conv(x.permute(0, 3, 1, 2))[:, :, :, -1].permute(0, 2, 1)

        return output


###### Making Model Function

In [8]:
def make_model(DEVICE, nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, adj_mx, num_for_predict, len_input):
    '''

    :param DEVICE:
    :param nb_block:
    :param in_channels:
    :param K:
    :param nb_chev_filter:
    :param nb_time_filter:
    :param time_strides:
    :param cheb_polynomials:
    :param nb_predict_step:
    :param len_input
    :return:
    '''
    L_tilde = scaled_Laplacian(adj_mx)
    cheb_polynomials = [torch.from_numpy(i).type(torch.FloatTensor).to(DEVICE) for i in cheb_polynomial(L_tilde, K)]
    model = MSTGCN_submodule(DEVICE, nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, cheb_polynomials, num_for_predict, len_input)

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model

In [9]:
import torch

if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
else:
    DEVICE = torch.device("cpu")

In [10]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl.metadata (64 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3


In [11]:
!pip install tensorboardX

Collecting tensorboardX
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/101.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboardX
Successfully installed tensorboardX-2.6.2.2


In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
import os
from time import time
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx



import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from scipy.sparse.linalg import eigs


USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda:0')
print("CUDA:", USE_CUDA, DEVICE)

from tensorboardX import SummaryWriter
sw = SummaryWriter(logdir='.', flush_secs=5)

import math
from typing import Optional, List, Union

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F

from torch_geometric.data import Data
from torch_geometric.typing import OptTensor
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.transforms import LaplacianLambdaMax
from torch_geometric.utils import remove_self_loops, add_self_loops, get_laplacian
from torch_geometric.utils import to_dense_adj
#from torch_scatter import scatter_add

CUDA: True cuda:0


###### Loading the Dataset and dividing the Dataset into Training, Testing and Validation

In [34]:
def load_graphdata_channel1(batch_size,shuffle=True, DEVICE = torch.device('cuda:0')):
    '''
    :param DEVICE:
    :param batch_size: int
    :return:
    three DataLoaders, each dataloader contains:
    test_x_tensor: (B, N_nodes, in_feature, T_input)
    test_decoder_input_tensor: (B, N_nodes, T_output)
    test_target_tensor: (B, N_nodes, T_output)
    '''

    #file = os.path.basename(graph_signal_matrix_filename).split('.')[0]
    #filename = os.path.join('../input/processing-traffic-data-for-deep-learning-projects/', file + '_r' + str(num_of_hours) + '_d' + str(num_of_days) + '_w' + str(num_of_weeks)) +'_astcgn'
    #print('load file:', filename)

    file_data = np.load("/content/drive/MyDrive/COMP9491_ASTGCN_Model/Dataset_PEMS07/PEMS04_304r4_304d0_304w0_astcgn.npz")
    train_x = file_data['train_x']  # (10181, 307, 3, 12)
    train_x = train_x[:, :, 0:1, :]
    train_target = file_data['train_target']  # (10181, 307, 12)

    val_x = file_data['val_x']
    val_x = val_x[:, :, 0:1, :]
    val_target = file_data['val_target']

    test_x = file_data['test_x']
    test_x = test_x[:, :, 0:1, :]
    test_target = file_data['test_target']

    mean = file_data['mean'][:, :, 0:1, :]  # (1, 1, 3, 1)
    std = file_data['std'][:, :, 0:1, :]  # (1, 1, 3, 1)

    # ------- train_loader -------
    train_x_tensor = torch.from_numpy(train_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    train_target_tensor = torch.from_numpy(train_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    train_dataset = torch.utils.data.TensorDataset(train_x_tensor, train_target_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)

    # ------- val_loader -------
    val_x_tensor = torch.from_numpy(val_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    val_target_tensor = torch.from_numpy(val_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    val_dataset = torch.utils.data.TensorDataset(val_x_tensor, val_target_tensor)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # ------- test_loader -------
    test_x_tensor = torch.from_numpy(test_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    test_target_tensor = torch.from_numpy(test_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    test_dataset = torch.utils.data.TensorDataset(test_x_tensor, test_target_tensor)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # print
    print('train:', train_x_tensor.size(), train_target_tensor.size())
    print('val:', val_x_tensor.size(), val_target_tensor.size())
    print('test:', test_x_tensor.size(), test_target_tensor.size())

    return train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, mean, std

###### Loading the Data and receiving the dimensions of training, testing and validation tensors

In [35]:
batch_size = 64

train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, _mean, _std = load_graphdata_channel1(batch_size)

train: torch.Size([10186, 307, 1, 12]) torch.Size([10186, 307, 3])
val: torch.Size([3396, 307, 1, 12]) torch.Size([3396, 307, 3])
test: torch.Size([3396, 307, 1, 12]) torch.Size([3396, 307, 3])


###### Function To Obtain Adjacenecy Matrix from the Graph Data

In [36]:
def get_adjacency_matrix(distance_df_filename, num_of_vertices, id_filename=None):
    '''
    Parameters
    ----------
    distance_df_filename: str, path of the csv file contains edges information
    num_of_vertices: int, the number of vertices
    Returns
    ----------
    A: np.ndarray, adjacency matrix
    '''
    if 'npy' in distance_df_filename:  # false
        adj_mx = np.load(distance_df_filename)
        return adj_mx, None
    else:

        #--------------------------------------------- read from here
        import csv
        A = np.zeros((int(num_of_vertices), int(num_of_vertices)),dtype=np.float32)
        distaneA = np.zeros((int(num_of_vertices), int(num_of_vertices)), dtype=np.float32)

        #------------ Ignore
        if id_filename: # false
            with open(id_filename, 'r') as f:
                id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))}

            with open(distance_df_filename, 'r') as f:
                f.readline()
                reader = csv.reader(f)
                for row in reader:
                    if len(row) != 3:
                        continue
                    i, j, distance = int(row[0]), int(row[1]), float(row[2])
                    A[id_dict[i], id_dict[j]] = 1
                    distaneA[id_dict[i], id_dict[j]] = distance
            return A, distaneA

        else:
         #-------------Continue reading
            with open(distance_df_filename, 'r') as f:
                f.readline()
                reader = csv.reader(f)
                for row in reader:
                    if len(row) != 3:
                        continue
                    i, j, distance = int(row[0]), int(row[1]), float(row[2])
                    A[i, j] = 1
                    distaneA[i, j] = distance
            return A, distaneA

In [37]:
id_filename = None
adj_filename = r'/content/drive/MyDrive/COMP9491_ASTGCN_Model/PEMS04_Recent_K_3_4_5/PEMS04.csv'
num_of_vertices = 307
adj_mx, distance_mx = get_adjacency_matrix(adj_filename, num_of_vertices, id_filename)

In [38]:
def masked_mape_np(y_true, y_pred, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(y_true)
        else:
            mask = np.not_equal(y_true, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mape = np.abs(np.divide(np.subtract(y_pred, y_true).astype('float32'),
                      y_true))
        mape = np.nan_to_num(mask * mape)
        return np.mean(mape)

In [39]:
def masked_mse(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels != null_val)
    mask = mask.float()
    # print(mask.sum())
    # print(mask.shape[0]*mask.shape[1]*mask.shape[2])
    mask /= torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = (preds - labels)**2
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)

In [40]:
def masked_rmse(preds, labels, null_val=np.nan):
    return torch.sqrt(masked_mse(preds=preds, labels=labels,
                                 null_val=null_val))

In [41]:
def masked_mae(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        ###creates a mask where values are present are set to True
        #### where missing values are present, are set to False
        mask = ~torch.isnan(labels)
    else:
        ### if there is no missing value, create a mask where values are False
        mask = (labels != null_val)
    mask = mask.float()

    ##normalizing the weight of the mask, by dividing with mean of mask values
    mask /= torch.mean((mask))

    ##Replaces any Missing value in Mask with Zero
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = torch.abs(preds - labels)
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)

    ##Computing the meas value of mean absolute error
    return torch.mean(loss)

In [42]:
def masked_mae_test(y_true, y_pred, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(y_true)
        else:
            mask = np.not_equal(y_true, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mae = np.abs(np.subtract(y_pred, y_true).astype('float32'),
                      )
        mae = np.nan_to_num(mask * mae)
        return np.mean(mae)

In [43]:
def masked_rmse_test(y_true, y_pred, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(y_true)
        else:
            # null_val=null_val
            mask = np.not_equal(y_true, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mse = ((y_pred- y_true)**2)
        mse = np.nan_to_num(mask * mse)
        return np.sqrt(np.mean(mse))

In [44]:
masked_flag=0
criterion = nn.L1Loss().to(DEVICE)
criterion_masked = masked_mae
loss_function = 'mse'

metric_method = 'unmask'
missing_value=0.0


if loss_function=='masked_mse':
    criterion_masked = masked_mse         #nn.MSELoss().to(DEVICE)
    masked_flag=1
elif loss_function=='masked_mae':
    criterion_masked = masked_mae
    masked_flag = 1
elif loss_function == 'mae':
    criterion = nn.L1Loss().to(DEVICE)
    ###indicating that standard loss function will be used
    masked_flag = 0
elif loss_function == 'rmse':
    criterion = nn.MSELoss().to(DEVICE)
    masked_flag= 0

In [45]:
model_hour_k4 = make_model(DEVICE, 2, 1, 4,
                        64, 64, 1, adj_mx,
                        3, 12)

In [46]:
print(model_hour_k4)

MSTGCN_submodule(
  (BlockList): ModuleList(
    (0): MSTGCN_block(
      (cheb_conv): cheb_conv(
        (Theta): ParameterList(
            (0): Parameter containing: [torch.float32 of size 1x64 (cuda:0)]
            (1): Parameter containing: [torch.float32 of size 1x64 (cuda:0)]
            (2): Parameter containing: [torch.float32 of size 1x64 (cuda:0)]
            (3): Parameter containing: [torch.float32 of size 1x64 (cuda:0)]
        )
      )
      (time_conv): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
      (residual_conv): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
      (ln): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (1): MSTGCN_block(
      (cheb_conv): cheb_conv(
        (Theta): ParameterList(
            (0): Parameter containing: [torch.float32 of size 64x64 (cuda:0)]
            (1): Parameter containing: [torch.float32 of size 64x64 (cuda:0)]
            (2): Parameter containing: [torch.float32 of size 64x64 (cuda:0

###### Defining Optimization and Config for Model, K = 4

In [47]:
learning_rate = 0.001
optimizer = optim.Adam(model_hour_k4.parameters(), lr=learning_rate)

In [48]:
print('Net\'s state_dict:')
total_param = 0
for param_tensor in model_hour_k4.state_dict():
    print(param_tensor, '\t', model_hour_k4.state_dict()[param_tensor].size(), '\t', model_hour_k4.state_dict()[param_tensor].device)
    total_param += np.prod(model_hour_k4.state_dict()[param_tensor].size())
print('Net\'s total params:', total_param)
#--------------------------------------------------
print('Optimizer\'s state_dict:')
for var_name in optimizer.state_dict():
    print(var_name, '\t', optimizer.state_dict()[var_name])

Net's state_dict:
BlockList.0.cheb_conv.Theta.0 	 torch.Size([1, 64]) 	 cuda:0
BlockList.0.cheb_conv.Theta.1 	 torch.Size([1, 64]) 	 cuda:0
BlockList.0.cheb_conv.Theta.2 	 torch.Size([1, 64]) 	 cuda:0
BlockList.0.cheb_conv.Theta.3 	 torch.Size([1, 64]) 	 cuda:0
BlockList.0.time_conv.weight 	 torch.Size([64, 64, 1, 3]) 	 cuda:0
BlockList.0.time_conv.bias 	 torch.Size([64]) 	 cuda:0
BlockList.0.residual_conv.weight 	 torch.Size([64, 1, 1, 1]) 	 cuda:0
BlockList.0.residual_conv.bias 	 torch.Size([64]) 	 cuda:0
BlockList.0.ln.weight 	 torch.Size([64]) 	 cuda:0
BlockList.0.ln.bias 	 torch.Size([64]) 	 cuda:0
BlockList.1.cheb_conv.Theta.0 	 torch.Size([64, 64]) 	 cuda:0
BlockList.1.cheb_conv.Theta.1 	 torch.Size([64, 64]) 	 cuda:0
BlockList.1.cheb_conv.Theta.2 	 torch.Size([64, 64]) 	 cuda:0
BlockList.1.cheb_conv.Theta.3 	 torch.Size([64, 64]) 	 cuda:0
BlockList.1.time_conv.weight 	 torch.Size([64, 64, 1, 3]) 	 cuda:0
BlockList.1.time_conv.bias 	 torch.Size([64]) 	 cuda:0
BlockList.1.residua

In [49]:
from tensorboardX import SummaryWriter
sw = SummaryWriter(logdir='.', flush_secs=5)

In [50]:
def compute_val_loss_mstgcn(net, val_loader, criterion,  masked_flag,missing_value,sw, epoch, limit=None):
    '''
    for rnn, compute mean loss on validation set
    :param net: model
    :param val_loader: torch.utils.data.utils.DataLoader
    :param criterion: torch.nn.MSELoss
    :param sw: tensorboardX.SummaryWriter
    :param global_step: int, current global_step
    :param limit: int,
    :return: val_loss
    '''

    net.train(False)  # ensure dropout layers are in evaluation mode

    with torch.no_grad():

        val_loader_length = len(val_loader)  # nb of batch

        tmp = []  # batch loss

        for batch_index, batch_data in enumerate(val_loader):
            encoder_inputs, labels = batch_data
            outputs = net(encoder_inputs)
            if masked_flag:
                loss = criterion(outputs, labels, missing_value)
            else:
                loss = criterion(outputs, labels)

            tmp.append(loss.item())
            if batch_index % 100 == 0:
                print('validation batch %s / %s, loss: %.2f' % (batch_index + 1, val_loader_length, loss.item()))
            if (limit is not None) and batch_index >= limit:
                break

        validation_loss = sum(tmp) / len(tmp)
        sw.add_scalar('validation_loss', validation_loss, epoch)
    return validation_loss


##### Training for when K =4

In [51]:
global_step_k4 = 0
best_epoch_k4 = 0
best_val_loss_k4 = np.inf
start_time= time()

In [52]:
validation_loss_list_k4 = []
training_loss_list_k4 = []
final_train_loss_k4 = []

# train model
#masked_flag = 0
for epoch in range(20):

    params_filename = os.path.join('./', '3ptsk4_04_MSTGCNepoch_%s.params' % epoch)
    masked_flag = 1

    if masked_flag:
        val_loss = compute_val_loss_mstgcn(model_hour_k4, val_loader, criterion_masked, masked_flag,missing_value,sw, epoch)
    else:
        val_loss = compute_val_loss_mstgcn(model_hour_k4, val_loader, criterion, masked_flag, missing_value, sw, epoch)

    ###appending the validation Loss in the List
    validation_loss_list_k4.append(val_loss)
    if val_loss < best_val_loss_k4:
        best_val_loss_k4 = val_loss
        best_epoch_k4 = epoch
        torch.save(model_hour_k4.state_dict(), params_filename)
        print('save parameters to file: %s' % params_filename)

    model_hour_k4.train()  # ensure dropout layers are in train mode

    for batch_index, batch_data in enumerate(train_loader):

        encoder_inputs, labels = batch_data

        optimizer.zero_grad()

        outputs = model_hour_k4(encoder_inputs)

        if masked_flag:
            loss = criterion_masked(outputs, labels,missing_value)
        else :
            loss = criterion(outputs, labels)


        loss.backward()

        optimizer.step()

        training_loss_k4 = loss.item()

        global_step_k4 += 1

        training_loss_list_k4.append(training_loss_k4)

        sw.add_scalar('training_loss', training_loss_k4, global_step_k4)
        #globalstep_training_loss_list.append(training_loss)

        if global_step_k4 % 200 == 0:

            print('global step: %s, training loss: %.2f, time: %.2fs' % (global_step_k4, training_loss_k4, time() - start_time))

    ##Estimating the Total Training Batch Loss
    train_batch_loss_k4 = sum(training_loss_list_k4)/len(training_loss_list_k4)
    final_train_loss_k4.append(train_batch_loss_k4)

print('best epoch:', best_epoch_k4)

validation batch 1 / 54, loss: 289.49
save parameters to file: ./3ptsk4_04_MSTGCNepoch_0.params
validation batch 1 / 54, loss: 136.29
save parameters to file: ./3ptsk4_04_MSTGCNepoch_1.params
global step: 200, training loss: 71.57, time: 50.47s
validation batch 1 / 54, loss: 62.67
save parameters to file: ./3ptsk4_04_MSTGCNepoch_2.params
global step: 400, training loss: 29.79, time: 89.85s
validation batch 1 / 54, loss: 33.05
save parameters to file: ./3ptsk4_04_MSTGCNepoch_3.params
global step: 600, training loss: 23.54, time: 129.29s
validation batch 1 / 54, loss: 28.89
save parameters to file: ./3ptsk4_04_MSTGCNepoch_4.params
global step: 800, training loss: 13.01, time: 168.51s
validation batch 1 / 54, loss: 28.64
save parameters to file: ./3ptsk4_04_MSTGCNepoch_5.params
validation batch 1 / 54, loss: 28.12
save parameters to file: ./3ptsk4_04_MSTGCNepoch_6.params
global step: 1000, training loss: 21.85, time: 211.42s
validation batch 1 / 54, loss: 26.01
save parameters to file: ./

In [53]:
len(validation_loss_list_k4)

20

In [54]:
len(final_train_loss_k4)

20

In [55]:
validation_loss_list_k4

[219.00568072001138,
 102.004431159408,
 50.493027457484494,
 28.963898340861004,
 24.38037074053729,
 23.966492776517516,
 22.313100073072647,
 21.316636156152796,
 21.081693649291992,
 21.166286521487766,
 20.85486525076407,
 20.768409923270898,
 21.09506423385055,
 20.421952865741872,
 21.119120562518084,
 20.42894600055836,
 20.284611331091988,
 20.286488903893364,
 20.218266204551416,
 20.673595781679506]

In [56]:
final_train_loss_k4

[151.376309299469,
 106.79676636457444,
 81.84096157153448,
 67.46072686612607,
 58.40914694905281,
 52.2335536390543,
 47.75826510105814,
 44.36707843914628,
 41.707964926958084,
 39.57351470887661,
 37.81120682575486,
 36.33569546093543,
 35.081915702728125,
 34.000387117266655,
 33.069047855933505,
 32.24879502095282,
 31.523286502150928,
 30.873512607481743,
 30.293754205264545,
 29.7702817222476]

In [57]:
def re_normalization(x, mean, std):
    x = x * std + mean
    return x


def max_min_normalization(x, _max, _min):
    x = 1. * (x - _min)/(_max - _min)
    x = x * 2. - 1.
    return x


def re_max_min_normalization(x, _max, _min):
    x = (x + 1.) / 2.
    x = 1. * x * (_max - _min) + _min
    return x


In [58]:
def predict_and_save_results_astgcn(net, data_loader, data_target_tensor, global_step, metric_method,_mean, _std, params_path, type):
    '''

    :param net: nn.Module
    :param data_loader: torch.utils.data.utils.DataLoader
    :param data_target_tensor: tensor
    :param epoch: int
    :param _mean: (1, 1, 3, 1)
    :param _std: (1, 1, 3, 1)
    :param params_path: the path for saving the results
    :return:
    '''
    net.train(False)  # ensure dropout layers are in test mode

    with torch.no_grad():

        data_target_tensor = data_target_tensor.cpu().numpy()

        loader_length = len(data_loader)  # nb of batch

        prediction = []  #storing the batch output

        input = []  #storing the batch input

        for batch_index, batch_data in enumerate(data_loader):

            encoder_inputs, labels = batch_data

            ##Taking only the single input feature
            input.append(encoder_inputs[:, :, 0:1].cpu().numpy())  # (batch, T', 1)

            outputs = net(encoder_inputs)

            prediction.append(outputs.detach().cpu().numpy())

            if batch_index % 100 == 0:
                print('predicting data set batch %s / %s' % (batch_index + 1, loader_length))

        input = np.concatenate(input, 0)

        input = re_normalization(input, _mean, _std)

        prediction = np.concatenate(prediction, 0)  # (batch, T', 1)

        print('input:', input.shape)
        print('prediction:', prediction.shape)
        print('data_target_tensor:', data_target_tensor.shape)
        output_filename = os.path.join(params_path, '3pts04recent_MST_k4output_epoch_%s_%s' % (global_step, type))
        np.savez(output_filename, input=input, prediction=prediction, data_target_tensor=data_target_tensor)


        excel_list = []

        ###prediction length has the shape of feature of a certain node
        prediction_length = prediction.shape[2]

        for i in range(prediction_length):

            ### ensuring number of data samples in target sensor is same as that of prediction tensor
            assert data_target_tensor.shape[0] == prediction.shape[0]
            print('current epoch: %s, predict %s points' % (global_step, i))
            if metric_method == 'mask':

                ## calculating the value of the feature prediction of each node for T timesteps
                mae = masked_mae_test(data_target_tensor[:, :, i], prediction[:, :, i],0.0)
                rmse = masked_rmse_test(data_target_tensor[:, :, i], prediction[:, :, i],0.0)
                mape = masked_mape_np(data_target_tensor[:, :, i], prediction[:, :, i], 0)
            else :
                mae = mean_absolute_error(data_target_tensor[:, :, i], prediction[:, :, i])
                rmse = mean_squared_error(data_target_tensor[:, :, i], prediction[:, :, i]) ** 0.5
                mape = masked_mape_np(data_target_tensor[:, :, i], prediction[:, :, i], 0)
            print('MAE: %.2f' % (mae))
            print('RMSE: %.2f' % (rmse))
            print('MAPE: %.2f' % (mape))
            excel_list.extend([mae, rmse, mape])

        # print overall results
        if metric_method == 'mask':
            mae = masked_mae_test(data_target_tensor.reshape(-1, 1), prediction.reshape(-1, 1), 0.0)
            rmse = masked_rmse_test(data_target_tensor.reshape(-1, 1), prediction.reshape(-1, 1), 0.0)
            mape = masked_mape_np(data_target_tensor.reshape(-1, 1), prediction.reshape(-1, 1), 0)
        else :
            mae = mean_absolute_error(data_target_tensor.reshape(-1, 1), prediction.reshape(-1, 1))
            rmse = mean_squared_error(data_target_tensor.reshape(-1, 1), prediction.reshape(-1, 1)) ** 0.5
            mape = masked_mape_np(data_target_tensor.reshape(-1, 1), prediction.reshape(-1, 1), 0)
        print('all MAE: %.2f' % (mae))
        print('all RMSE: %.2f' % (rmse))
        print('all MAPE: %.2f' % (mape))
        excel_list.extend([mae, rmse, mape])
        print(excel_list)

In [59]:
def predict_main(global_step, data_loader, data_target_tensor,metric_method, _mean, _std, type):
    '''

    :param global_step: int
    :param data_loader: torch.utils.data.utils.DataLoader
    :param data_target_tensor: tensor
    :param mean: (1, 1, 3, 1)
    :param std: (1, 1, 3, 1)
    :param type: string
    :return:
    '''
    params_path = '/content/drive/MyDrive/COMP9491_ASTGCN_Model/MSTGCN_Model_Hourly_07/For_PEMS04/'
    params_filename = os.path.join(params_path, '3ptsk4_04_MSTGCNepoch_%s.params' % global_step)
    print('load weight from:', params_filename)

    model_hour_k4.load_state_dict(torch.load(params_filename))

    predict_and_save_results_astgcn(model_hour_k4, data_loader, data_target_tensor, global_step, metric_method,_mean, _std, params_path, type)

###### Prediction for the Recent Model, when K = 4

In [60]:
predict_main(best_epoch_k4,test_loader, test_target_tensor,'unmask', _mean, _std, 'test' )

load weight from: /content/drive/MyDrive/COMP9491_ASTGCN_Model/MSTGCN_Model_Hourly_07/For_PEMS04/3ptsk4_04_MSTGCNepoch_18.params
predicting data set batch 1 / 54
input: (3396, 307, 1, 12)
prediction: (3396, 307, 3)
data_target_tensor: (3396, 307, 3)
current epoch: 18, predict 0 points
MAE: 18.10
RMSE: 28.91
MAPE: 0.13
current epoch: 18, predict 1 points
MAE: 19.81
RMSE: 31.27
MAPE: 0.14
current epoch: 18, predict 2 points
MAE: 21.26
RMSE: 33.27
MAPE: 0.15
all MAE: 19.72
all RMSE: 31.20
all MAPE: 0.14
[18.097017, 28.905737959991935, 0.12601194, 19.805262, 31.268818552543284, 0.13754585, 21.25624, 33.27334303080038, 0.14808065, 19.719513, 31.200407915627682, 0.13721284]
