## model

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import sys
import numpy as np
import pandas as pd

class nconv(nn.Module):
    def __init__(self):
        super(nconv,self).__init__()

    def forward(self,x, A):
        x = torch.einsum('ncvl,vw->ncwl',(x,A))
        return x.contiguous()

class linear(nn.Module):
    def __init__(self,c_in,c_out):
        super(linear,self).__init__()
        self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=True)

    def forward(self,x):
        return self.mlp(x)

class gcn(nn.Module):
    def __init__(self,c_in,c_out,dropout,support_len=3,order=2):
        super(gcn,self).__init__()
        self.nconv = nconv()
        c_in = (order*support_len+1)*c_in
        self.mlp = linear(c_in,c_out)
        self.dropout = dropout
        self.order = order

    def forward(self,x,support):
        out = [x]
        for a in support:
            x1 = self.nconv(x,a)
            out.append(x1)
            for k in range(2, self.order + 1):
                x2 = self.nconv(x1,a)
                out.append(x2)
                x1 = x2

        h = torch.cat(out,dim=1)
        h = self.mlp(h)
        h = F.dropout(h, self.dropout, training=self.training)
        return h


class gwnet(nn.Module):
    def __init__(self, device, num_nodes, dropout=0.3, supports=None, gcn_bool=True, addaptadj=True, aptinit=None, in_dim=2,
    out_dim=12,residual_channels=32,dilation_channels=32,skip_channels=256,end_channels=512,kernel_size=2,blocks=4,layers=2):
        super(gwnet, self).__init__()
        self.dropout = dropout
        self.blocks = blocks
        self.layers = layers
        self.gcn_bool = gcn_bool
        self.addaptadj = addaptadj
        self.num_nodes = num_nodes

        self.filter_convs = nn.ModuleList()
        self.gate_convs = nn.ModuleList()
        self.residual_convs = nn.ModuleList()
        self.skip_convs = nn.ModuleList()
        self.bn = nn.ModuleList()
        self.gconv = nn.ModuleList()

        self.start_conv = nn.Conv2d(in_channels=in_dim,
                                    out_channels=residual_channels,
                                    kernel_size=(1,1))
        self.supports = supports

        receptive_field = 1

        self.supports_len = 0
        if supports is not None:
            self.supports_len += len(supports)

        if gcn_bool and addaptadj:
            if aptinit is None:
                if supports is None:
                    self.supports = []
                self.nodevec1 = nn.Parameter(torch.randn(num_nodes, 10).to(device), requires_grad=True).to(device)
                self.nodevec2 = nn.Parameter(torch.randn(10, num_nodes).to(device), requires_grad=True).to(device)
                self.supports_len +=1
            else:
                if supports is None:
                    self.supports = []
                m, p, n = torch.svd(aptinit)  # aptinit & m & n: [node, node], p: [node]
                initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5)) # 10是为了与随机embedding为10保持一致
                initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t())
                self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to(device)
                self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to(device)
                self.supports_len += 1


        for b in range(blocks):
            additional_scope = kernel_size - 1
            new_dilation = 1
            for i in range(layers):
                # dilated convolutions
                self.filter_convs.append(nn.Conv2d(in_channels=residual_channels,
                                                   out_channels=dilation_channels,
                                                   kernel_size=(1,kernel_size),dilation=new_dilation))

                self.gate_convs.append(nn.Conv1d(in_channels=residual_channels,
                                                 out_channels=dilation_channels,
                                                 kernel_size=(1, kernel_size), dilation=new_dilation))

                # 1x1 convolution for residual connection
                self.residual_convs.append(nn.Conv1d(in_channels=dilation_channels,
                                                     out_channels=residual_channels,
                                                     kernel_size=(1, 1)))

                # 1x1 convolution for skip connection
                self.skip_convs.append(nn.Conv1d(in_channels=dilation_channels,
                                                 out_channels=skip_channels,
                                                 kernel_size=(1, 1)))
                self.bn.append(nn.BatchNorm2d(residual_channels))
                new_dilation *=2
                receptive_field += additional_scope
                additional_scope *= 2
                if self.gcn_bool:
                    self.gconv.append(gcn(dilation_channels,residual_channels,dropout,support_len=self.supports_len))



        self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
                                  out_channels=end_channels,
                                  kernel_size=(1,1),
                                  bias=True)

        self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
                                    out_channels=out_dim,
                                    kernel_size=(1,1),
                                    bias=True)

        self.receptive_field = receptive_field



    def forward(self, input):
        # input: [bs, 2, node, 12]
        in_len = input.size(3) 
        if in_len<self.receptive_field:  # 13
            x = nn.functional.pad(input,(self.receptive_field-in_len,0,0,0)) # [bs, 2, node, 13]
        else:
            x = input
        x = self.start_conv(x) # [bs, 2, node, 13]
        skip = 0

        # calculate the current adaptive adj matrix once per iteration
        new_supports = None
        if self.gcn_bool and self.addaptadj and self.supports is not None:
            adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1)
            new_supports = self.supports + [adp]

        # WaveNet layers
        for i in range(self.blocks * self.layers):

            #            |----------------------------------------|     *residual*
            #            |                                        |
            #            |    |-- conv -- tanh --|                |
            # -> dilate -|----|                  * ----|-- 1x1 -- + -->	*input*
            #                 |-- conv -- sigm --|     |
            #                                         1x1
            #                                          |
            # ---------------------------------------> + ------------->	*skip*

            #(dilation, init_dilation) = self.dilations[i]

            #residual = dilation_func(x, dilation, init_dilation, i)
            residual = x
            # dilated convolution
            filter = self.filter_convs[i](residual)
            filter = torch.tanh(filter)
            gate = self.gate_convs[i](residual)
            gate = torch.sigmoid(gate)
            x = filter * gate

            # parametrized skip connection

            s = x
            s = self.skip_convs[i](s)
            try:
                skip = skip[:, :, :,  -s.size(3):] # 当前步的skip加上上一步的skip
            except:
                skip = 0
            skip = s + skip


            if self.gcn_bool and self.supports is not None:
                if self.addaptadj:
                    x = self.gconv[i](x, new_supports)
                else:
                    x = self.gconv[i](x,self.supports)
            else:
                x = self.residual_convs[i](x)

            x = x + residual[:, :, :, -x.size(3):] # 当前步的X加上上一步的x


            x = self.bn[i](x)
        
        x = F.relu(skip) # [bs, skip_channels, node, 1]
        x = F.relu(self.end_conv_1(x))
        x = self.end_conv_2(x)
        return x.view(-1, self.num_nodes)

## utils

In [9]:
import pickle
import numpy as np
import os
import scipy.sparse as sp
import torch
from scipy.sparse import linalg


class DataLoader(object):
    def __init__(self, xs, ys, batch_size, pad_with_last_sample=True):
        """
        :param xs:
        :param ys:
        :param batch_size:
        :param pad_with_last_sample: pad with the last sample to make number of samples divisible to batch_size.
        """
        self.batch_size = batch_size
        self.current_ind = 0
        if pad_with_last_sample:
            num_padding = (batch_size - (len(xs) % batch_size)) % batch_size
            x_padding = np.repeat(xs[-1:], num_padding, axis=0)
            y_padding = np.repeat(ys[-1:], num_padding, axis=0)
            xs = np.concatenate([xs, x_padding], axis=0)
            ys = np.concatenate([ys, y_padding], axis=0)
        self.size = len(xs)
        self.num_batch = int(self.size // self.batch_size)
        self.xs = xs
        self.ys = ys

    def shuffle(self):
        permutation = np.random.permutation(self.size)
        xs, ys = self.xs[permutation], self.ys[permutation]
        self.xs = xs
        self.ys = ys

    def get_iterator(self):
        self.current_ind = 0

        def _wrapper():
            while self.current_ind < self.num_batch:
                start_ind = self.batch_size * self.current_ind
                end_ind = min(self.size, self.batch_size * (self.current_ind + 1))
                x_i = self.xs[start_ind: end_ind, ...]
                y_i = self.ys[start_ind: end_ind, ...]
                yield (x_i, y_i)
                self.current_ind += 1

        return _wrapper()

class StandardScaler():
    """
    Standard the input
    """

    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def transform(self, data):
        return (data - self.mean) / self.std

    def inverse_transform(self, data):
        return (data * self.std) + self.mean



def sym_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).astype(np.float32).todense()

def asym_adj(adj):
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)).flatten()
    d_inv = np.power(rowsum, -1).flatten()
    d_inv[np.isinf(d_inv)] = 0.
    d_mat= sp.diags(d_inv)
    return d_mat.dot(adj).astype(np.float32).todense()

def calculate_normalized_laplacian(adj):
    """
    # L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2
    # D = diag(A 1)
    :param adj:
    :return:
    """
    adj = sp.coo_matrix(adj)
    d = np.array(adj.sum(1))
    d_inv_sqrt = np.power(d, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    normalized_laplacian = sp.eye(adj.shape[0]) - adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
    return normalized_laplacian

def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True):
    if undirected:
        adj_mx = np.maximum.reduce([adj_mx, adj_mx.T])
    L = calculate_normalized_laplacian(adj_mx)
    if lambda_max is None:
        lambda_max, _ = linalg.eigsh(L, 1, which='LM')
        lambda_max = lambda_max[0]
    L = sp.csr_matrix(L)
    M, _ = L.shape
    I = sp.identity(M, format='csr', dtype=L.dtype)
    L = (2 / lambda_max * L) - I
    return L.astype(np.float32).todense()

def masked_mse(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels!=null_val)
    mask = mask.float()
    mask /= torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = (preds-labels)**2
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)

def masked_rmse(preds, labels, null_val=np.nan):
    return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val))


def masked_mae(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels!=null_val)
    mask = mask.float()
    mask /=  torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = torch.abs(preds-labels)
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)


def masked_mape(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels!=null_val)
    mask = mask.float()
    mask /=  torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = torch.abs(preds-labels)/labels
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)


def metric(pred, real):
    mae = masked_mae(pred,real,0.0).item()
    mape = masked_mape(pred,real,0.0).item()
    rmse = masked_rmse(pred,real,0.0).item()
    return mae,mape,rmse


In [10]:
import torch.optim as optim

class trainer():
    def __init__(self, scaler, in_dim, seq_length, num_nodes, nhid , dropout, lrate, wdecay, device, supports, gcn_bool, addaptadj, aptinit):
        self.model = gwnet(device, num_nodes, dropout, supports=supports, gcn_bool=gcn_bool, addaptadj=addaptadj, 
                           aptinit=aptinit, in_dim=in_dim, out_dim=1, residual_channels=nhid,
                           dilation_channels=nhid, skip_channels=nhid * 8, end_channels=nhid * 16,
                           kernel_size=4, blocks=8, layers=2)
        self.model.to(device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay)
        self.loss = masked_mae
        self.scaler = scaler
        self.clip = 5

    def train(self, input, real_val):
        self.model.train()
        self.optimizer.zero_grad()
        input = nn.functional.pad(input,(1,0,0,0))
        output = self.model(input)
        #output = output.transpose(1,3)
        #output = [batch_size,12,num_nodes,1]
        #eal = torch.unsqueeze(real_val,dim=1)
        predict = self.scaler.inverse_transform(output)

        loss = self.loss(predict, real_val, 0.0)
        loss.backward()
        if self.clip is not None:
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
        self.optimizer.step()
        mape = masked_mape(predict,real_val,10.0).item()
        rmse = masked_rmse(predict,real_val,0.0).item()
        return loss.item(),mape,rmse

    def eval(self, input, real_val):
        self.model.eval()
        input = nn.functional.pad(input,(1,0,0,0))
        output = self.model(input)
        #output = output.transpose(1,3)
        #output = [batch_size,12,num_nodes,1]
        #real = torch.unsqueeze(real_val,dim=1)
        predict = self.scaler.inverse_transform(output)
        loss = self.loss(predict, real_val, 0.0)
        mape = masked_mape(predict,real_val,0.0).item()
        rmse = masked_rmse(predict,real_val,0.0).item()
        return loss.item(),mape,rmse


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true1, y_pred1 = np.array(y_true).flatten(), np.array(y_pred).flatten()
    y_true1, y_pred1 = y_true1[np.where(y_true1 > 10)[0]], y_pred1[np.where(y_true1 > 10)[0]]
    return np.mean(np.abs((y_true1 - y_pred1) / y_true1)) * 100

def compute_me(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = mean_absolute_percentage_error(y_true[np.where(y_true > 10)[0]], y_pred[np.where(y_true > 10)[0]])
    return mae, rmse, mape

## TaxiSZ

In [11]:
import time 
import numpy

X_train_sz = np.load('./data/taxi_sz/X_train20.npy').astype('float32')
X_val_sz = np.load('./data/taxi_sz/X_val20.npy').astype('float32')
X_test_sz = np.load('./data/taxi_sz/X_test20.npy').astype('float32')
y_train_sz = np.load('./data/taxi_sz/y_train20.npy').astype('float32')
y_val_sz = np.load('./data/taxi_sz/y_val20.npy').astype('float32')
y_test_sz = np.load('./data/taxi_sz/y_test20.npy').astype('float32')

edges_in_sz = np.load('./data/taxi_sz/edges_in_sz.npy')
print(edges_in_sz.shape)

high_similar_poi_sz = pd.read_csv('./data/taxi_sz/high_similar_poi.csv')
high_similar_poi_sz.head(2)

device = torch.device('cuda')

(654, 2)


In [12]:
X_train, y_train = X_train_sz.transpose(1,0,2)[:,np.newaxis,:,:], y_train_sz.T
X_val, y_val = X_val_sz.transpose(1,0,2)[:,np.newaxis,:,:], y_val_sz.T
X_test, y_test = X_test_sz.transpose(1,0,2)[:,np.newaxis,:,:], y_test_sz.T

scaler = StandardScaler(np.mean(X_train.flatten()), np.std(X_train.flatten()))

X_train_std = scaler.transform(X_train)
X_val_std = scaler.transform(X_val)
X_test_std = scaler.transform(X_test)

print(X_train_std.shape, y_train.shape, X_test_std.shape, y_test.shape)

(3672, 1, 101, 24) (3672, 101) (504, 1, 101, 24) (504, 101)


In [13]:
adj_mx = np.zeros((101, 101))

for i in range(101): 
    adj_mx[i,i] = 1
    
for i in range(len(edges_in_sz)):
    adj_mx[edges_in_sz[i,0], edges_in_sz[i,1]] = 1
    
for i in range(len(high_similar_poi_sz)):
    adj_mx[high_similar_poi_sz['i'].iloc[i], high_similar_poi_sz['j'].iloc[i]] = 1
    
adj_mx = torch.FloatTensor(adj_mx).to(device)

In [14]:
device=torch.device('cuda')
trainloader = DataLoader(X_train_std, y_train, 1)
valloader = DataLoader(X_val_std, y_val, 1)
testloader = DataLoader(X_test_std, y_test, 1)
    
engine = trainer(scaler=scaler, in_dim=1, seq_length=24, num_nodes=101, nhid=32, dropout=0.5, lrate=0.01, wdecay=0.0001, 
    device=device, supports=[adj_mx], gcn_bool=True, addaptadj=True, aptinit=adj_mx)


In [17]:
print("start training...",flush=True)
his_loss =[]
val_time = []
train_time = []
for i in range(1,2):
    #if i % 10 == 0:
        #lr = max(0.000002,args.learning_rate * (0.1 ** (i // 10)))
        #for g in engine.optimizer.param_groups:
            #g['lr'] = lr
    train_loss = []
    train_mape = []
    train_rmse = []
    t1 = time.time()
    trainloader.shuffle()
    for iter, (x, y) in enumerate(trainloader.get_iterator()):
        trainx = torch.Tensor(x).to(device)
        trainy = torch.Tensor(y).to(device)
        metrics = engine.train(trainx, trainy)
        train_loss.append(metrics[0])
        train_mape.append(metrics[1])
        train_rmse.append(metrics[2])
        if iter % 50 == 0 :
            log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}'
            print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]),flush=True)
    t2 = time.time()
    train_time.append(t2-t1)
    #validation
    valid_loss = []
    valid_mape = []
    valid_rmse = []


    s1 = time.time()
    for iter, (x, y) in enumerate(valloader.get_iterator()):
        testx = torch.Tensor(x).to(device)
        testy = torch.Tensor(y).to(device)
        metrics = engine.eval(testx, testy)
        valid_loss.append(metrics[0])
        valid_mape.append(metrics[1])
        valid_rmse.append(metrics[2])
    s2 = time.time()
    log = 'Epoch: {:03d}, Inference Time: {:.4f} secs'
    print(log.format(i,(s2-s1)))
    val_time.append(s2-s1)
    mtrain_loss = np.mean(train_loss)
    mtrain_mape = np.mean(train_mape)
    mtrain_rmse = np.mean(train_rmse)

    mvalid_loss = np.mean(valid_loss)
    mvalid_mape = np.mean(valid_mape)
    mvalid_rmse = np.mean(valid_rmse)
    his_loss.append(mvalid_loss)

    log = 'Epoch: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}, Valid Loss: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}, Training Time: {:.4f}/epoch'
    print(log.format(i, mtrain_loss, mtrain_mape, mtrain_rmse, mvalid_loss, mvalid_mape, mvalid_rmse, (t2 - t1)),flush=True)
    torch.save(engine.model.state_dict(), "./compare_with_state_of_art/graph_wavenet_model/epoch-" + str(i) + ".pth")
print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time)))
print("Average Inference Time: {:.4f} secs".format(np.mean(val_time)))

#testing
bestid = np.argmin(his_loss)

engine.model.load_state_dict(torch.load("./compare_with_state_of_art/graph_wavenet_model/epoch-" + str(bestid+1) + ".pth"))

outputs = []
realy = torch.Tensor(testloader['ys']).to(device)

for iter, (x, y) in enumerate(testloader.get_iterator()):
    testx = torch.Tensor(x).to(device)
    with torch.no_grad():
        preds = engine.model(testx)
    outputs.append(preds)

yhat = torch.cat(outputs,dim=0)
yhat = yhat[:realy.size(0),...]


print("Training finished")
print("The valid loss on best model is", str(round(his_loss[bestid],4)))


amae = []
amape = []
armse = []
for i in range(12):
    pred = scaler.inverse_transform(yhat[:,:,i])
    real = realy[:,:,i]
    metrics = util.metric(pred,real)
    log = 'Evaluate best model on test data for horizon {:d}, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}'
    print(log.format(i+1, metrics[0], metrics[1], metrics[2]))
    amae.append(metrics[0])
    amape.append(metrics[1])
    armse.append(metrics[2])

log = 'On average over 12 horizons, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}'
print(log.format(np.mean(amae),np.mean(amape),np.mean(armse)))


start training...
Iter: 000, Train Loss: 380.7150, Train MAPE: 33.8767, Train RMSE: 387.4974
Iter: 050, Train Loss: 8.6841, Train MAPE: inf, Train RMSE: 11.6685
Iter: 100, Train Loss: 12.1201, Train MAPE: inf, Train RMSE: 18.3169
Iter: 150, Train Loss: 7.7791, Train MAPE: inf, Train RMSE: 12.4082
Iter: 200, Train Loss: 8.5986, Train MAPE: inf, Train RMSE: 16.0842
Iter: 250, Train Loss: 9.2662, Train MAPE: inf, Train RMSE: 15.0268
Iter: 300, Train Loss: 6.2013, Train MAPE: inf, Train RMSE: 8.6370
Iter: 350, Train Loss: 10.1335, Train MAPE: inf, Train RMSE: 16.0608
Iter: 400, Train Loss: 31.1759, Train MAPE: inf, Train RMSE: 39.9931
Iter: 450, Train Loss: 7.3248, Train MAPE: inf, Train RMSE: 12.9246
Iter: 500, Train Loss: 4.1576, Train MAPE: inf, Train RMSE: 6.5574
Iter: 550, Train Loss: 11.5257, Train MAPE: 0.6091, Train RMSE: 19.7673
Iter: 600, Train Loss: 6.8442, Train MAPE: inf, Train RMSE: 10.5598
Iter: 650, Train Loss: 8.0521, Train MAPE: inf, Train RMSE: 12.0541
Iter: 700, Train L

NameError: name 'dataloader' is not defined

In [50]:
#testing
bestid = np.argmin(his_loss)

engine.model.load_state_dict(torch.load("./compare_with_state_of_art/graph_wavenet_model/epoch-" + str(bestid+1) + ".pth"))

outputs = []

for iter, (x, y) in enumerate(testloader.get_iterator()):
    testx = torch.Tensor(x).to(device)
    with torch.no_grad():
        preds = engine.model(testx)
    outputs.append(preds)

yhat = torch.cat(outputs,dim=0)
pred = scaler.inverse_transform(yhat)    
errors = compute_me(y_test, pred.cpu().numpy())
print(errors[0], errors[1], errors[2])    

In [41]:
errors = compute_me(y_test, pred.cpu().numpy())
print(errors[0], errors[1], errors[2])

9.734826 13.919515 23.610320687294006


## TaxiNY

In [17]:
X_train_ny = np.load('./data/taxi_ny/X_train20.npy')
y_train_ny = np.load('./data/taxi_ny/y_train20.npy')
X_val_ny = np.load('./data/taxi_ny/X_val20.npy')
y_val_ny = np.load('./data/taxi_ny/y_val20.npy')
X_test_ny = np.load('./data/taxi_ny/X_test20.npy')
y_test_ny = np.load('./data/taxi_ny/y_test20.npy')

edges_in_ny = np.load('./data/taxi_ny/edges_manhattan.npy')
ny_high_similar_poi = pd.read_csv('./data/taxi_ny/high_similar_poi.csv')

print(X_train_ny.shape, y_train_ny.shape)

(63, 8856, 24) (63, 8856)


In [18]:
X_train, y_train = X_train_ny.transpose(1,0,2)[:,np.newaxis,:,:], y_train_ny.T
X_val, y_val = X_val_ny.transpose(1,0,2)[:,np.newaxis,:,:], y_val_ny.T
X_test, y_test = X_test_ny.transpose(1,0,2)[:,np.newaxis,:,:], y_test_ny.T

scaler = StandardScaler(np.mean(X_train.flatten()), np.std(X_train.flatten()))

X_train_std = scaler.transform(X_train)
X_val_std = scaler.transform(X_val)
X_test_std = scaler.transform(X_test)

print(X_train_std.shape, y_train.shape, X_test_std.shape, y_test.shape)

(8856, 1, 63, 24) (8856, 63) (1008, 1, 63, 24) (1008, 63)


In [19]:
device = torch.device('cuda')

adj_mx = np.zeros((63, 63))

for i in range(63): 
    adj_mx[i,i] = 1
    
for i in range(len(edges_in_ny)):
    adj_mx[edges_in_ny[i,0], edges_in_ny[i,1]] = 1
    
for i in range(len(high_similar_poi_sz)):
    adj_mx[ny_high_similar_poi['i'].iloc[i], ny_high_similar_poi['j'].iloc[i]] = 1
    
adj_mx = torch.FloatTensor(adj_mx).to(device)

In [20]:
trainloader = DataLoader(X_train_std, y_train, 1)
valloader = DataLoader(X_val_std, y_val, 1)
testloader = DataLoader(X_test_std, y_test, 1)
    
engine = trainer(scaler=scaler, in_dim=1, seq_length=24, num_nodes=63, nhid=32, dropout=0.5, lrate=0.01, wdecay=0.0001, 
    device=device, supports=[adj_mx], gcn_bool=True, addaptadj=True, aptinit=adj_mx)


In [48]:
print("start training...",flush=True)
his_loss =[]
val_time = []
train_time = []
for i in range(1,2):
    #if i % 10 == 0:
        #lr = max(0.000002,args.learning_rate * (0.1 ** (i // 10)))
        #for g in engine.optimizer.param_groups:
            #g['lr'] = lr
    train_loss = []
    train_mape = []
    train_rmse = []
    t1 = time.time()
    trainloader.shuffle()
    for iter, (x, y) in enumerate(trainloader.get_iterator()):
        trainx = torch.Tensor(x).to(device)
        trainy = torch.Tensor(y).to(device)
        metrics = engine.train(trainx, trainy)
        train_loss.append(metrics[0])
        train_mape.append(metrics[1])
        train_rmse.append(metrics[2])
        if iter % 50 == 0 :
            log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}'
            print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]),flush=True)
    t2 = time.time()
    train_time.append(t2-t1)
    #validation
    valid_loss = []
    valid_mape = []
    valid_rmse = []


    s1 = time.time()
    for iter, (x, y) in enumerate(valloader.get_iterator()):
        testx = torch.Tensor(x).to(device)
        testy = torch.Tensor(y).to(device)
        metrics = engine.eval(testx, testy)
        valid_loss.append(metrics[0])
        valid_mape.append(metrics[1])
        valid_rmse.append(metrics[2])
    s2 = time.time()
    log = 'Epoch: {:03d}, Inference Time: {:.4f} secs'
    print(log.format(i,(s2-s1)))
    val_time.append(s2-s1)
    mtrain_loss = np.mean(train_loss)
    mtrain_mape = np.mean(train_mape)
    mtrain_rmse = np.mean(train_rmse)

    mvalid_loss = np.mean(valid_loss)
    mvalid_mape = np.mean(valid_mape)
    mvalid_rmse = np.mean(valid_rmse)
    his_loss.append(mvalid_loss)

    log = 'Epoch: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}, Valid Loss: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}, Training Time: {:.4f}/epoch'
    print(log.format(i, mtrain_loss, mtrain_mape, mtrain_rmse, mvalid_loss, mvalid_mape, mvalid_rmse, (t2 - t1)),flush=True)
    torch.save(engine.model.state_dict(), "./compare_with_state_of_art/graph_wavenet_model/epoch-" + str(i) + ".pth")
print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time)))
print("Average Inference Time: {:.4f} secs".format(np.mean(val_time)))

#testing
bestid = np.argmin(his_loss)

engine.model.load_state_dict(torch.load("./compare_with_state_of_art/graph_wavenet_model/epoch-" + str(bestid+1) + ".pth"))



start training...
Iter: 000, Train Loss: 41.8780, Train MAPE: inf, Train RMSE: 65.1953
Iter: 050, Train Loss: 57.7694, Train MAPE: inf, Train RMSE: 76.7200
Iter: 100, Train Loss: 10.0250, Train MAPE: inf, Train RMSE: 15.6046
Iter: 150, Train Loss: 6.7972, Train MAPE: inf, Train RMSE: 7.9593
Iter: 200, Train Loss: 13.6343, Train MAPE: inf, Train RMSE: 19.3703
Iter: 250, Train Loss: 15.2930, Train MAPE: inf, Train RMSE: 23.1272
Iter: 300, Train Loss: 31.1631, Train MAPE: inf, Train RMSE: 46.7367
Iter: 350, Train Loss: 9.6679, Train MAPE: inf, Train RMSE: 13.4549
Iter: 400, Train Loss: 10.1134, Train MAPE: inf, Train RMSE: 18.2023
Iter: 450, Train Loss: 15.3073, Train MAPE: inf, Train RMSE: 27.7086
Iter: 500, Train Loss: 16.3722, Train MAPE: inf, Train RMSE: 21.5574
Iter: 550, Train Loss: 10.6068, Train MAPE: inf, Train RMSE: 16.0670
Iter: 600, Train Loss: 18.0018, Train MAPE: inf, Train RMSE: 30.0206
Iter: 650, Train Loss: 3.7110, Train MAPE: inf, Train RMSE: 4.6394
Iter: 700, Train Loss

FileNotFoundError: [Errno 2] No such file or directory: './compare_with_state_of_art/graph_wavenet_model/NY-epoch-1.pth'

In [51]:
#testing
bestid = np.argmin(his_loss)

engine.model.load_state_dict(torch.load("./compare_with_state_of_art/graph_wavenet_model/epoch-" + str(bestid+1) + ".pth"))

outputs = []

for iter, (x, y) in enumerate(testloader.get_iterator()):
    testx = torch.Tensor(x).to(device)
    with torch.no_grad():
        preds = engine.model(testx)
    outputs.append(preds)

yhat = torch.cat(outputs,dim=0)
pred = scaler.inverse_transform(yhat)    
errors = compute_me(y_test, pred.cpu().numpy())
print(errors[0], errors[1], errors[2])    

4.641262 9.229001 21.61650061607361
