In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from sklearn.metrics import mean_squared_error, mean_absolute_error
import dgl
import dgl.function as fn

gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h')

device = torch.device('cuda')

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def compute_metric(y_true, y_pred, name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse= np.sqrt(mean_squared_error(y_true, y_pred))
    mape = mean_absolute_percentage_error(y_true[np.where(y_true > 10)[0]], y_pred[np.where(y_true > 10)[0]])
    print("mae " + name, mae)    
    print("rmse " + name, rmse)
    print("mape(>10) " + name, mape)  
    return rmse

def build_graph(src, dst, nb_nodes, device):
    g = dgl.DGLGraph()
    g.add_nodes(nb_nodes)
    g.add_edges(src, dst)
    return g.to(device)

def create_batches(node_features_batch, batch_size, src, dst, nb_nodes, device):
    my_graphs = []
    for i in range(batch_size):
        temp_g = build_graph(src, dst, nb_nodes, device)
        temp_g.ndata['h'] = torch.from_numpy(node_features_batch[:,i,:]).to(device)
        my_graphs.append(temp_g)
    return dgl.batch(my_graphs)
    

class NodeApplyModule(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(NodeApplyModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation
        
    def forward(self, node):
        h = self.linear(node.data['h'])
        h = self.activation(h)
        return {'h' : h}
    

class GCN(nn.Module):
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)
        
    def forward(self, g, feature):
        g.ndata['h'] = feature
        g.update_all(gcn_msg, gcn_reduce)
        g.apply_nodes(func=self.apply_mod)
        return g.ndata.pop('h')

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

Using backend: pytorch


In [2]:
class TGCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, batch_size, community_detail, device, nb_neurons_gru=20, nb_neurons_dense=10):
        super().__init__()
        self.batch_size = batch_size
        self.device=device
        self.community_detail = community_detail
        self.nb_nodes = sum([len(_) for _ in community_detail])
        self.grus = nn.ModuleList([nn.GRU(1, 20, 2, batch_first=True) for _ in range(self.nb_nodes)])

        self.gcnlayers1 = nn.ModuleList([
            GCN(in_dim, 32, F.relu),
            GCN(32, hidden_dim, F.relu)
        ])
        
        self.gcnlayers2 = nn.ModuleList([
            GCN(in_dim, 32, F.relu),
            GCN(32, hidden_dim, F.relu)
        ])

        self.fc1 = nn.Linear(40, 1)
        
    def forward(self, g, g2):
        h = g.ndata['h']        
        h = h.unsqueeze(dim=2)

        gru_outputs = [gru(h[range(i, i+self.nb_nodes*self.batch_size, self.nb_nodes),:])[0][:,-1,:]
             for i, gru in enumerate(self.grus)]
        
        h = torch.zeros((self.nb_nodes*self.batch_size, 20)).to(self.device)

        for i in range(len(gru_outputs)):
            h[range(i, i+self.nb_nodes*self.batch_size, self.nb_nodes),:] = gru_outputs[i]

        h2 = h.clone().detach()
        
        for conv in self.gcnlayers1:
            h = conv(g, h)
        g.ndata['h'] = h

        for conv in self.gcnlayers2:
            h2 = conv(g2, h2)
        g2.ndata['h2'] = h2
        
        h3 = torch.cat([h, h2], dim=1)  # (nb_nodes * batch_size, 40)
        h3 = torch.cat([h3[i*self.nb_nodes : (i+1)*self.nb_nodes].unsqueeze(0) for i in range(self.batch_size)], dim=0) #[bs, nb_nodes, 40] 
        X = self.fc1(h3)
        return X.squeeze(-1).T # [nb_nodes, bs]


In [3]:
class SimpleGCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, batch_size, community_detail, device, nb_neurons_gru=20, nb_neurons_dense=10):
        super().__init__()
        self.batch_size = batch_size
        self.device=device
        self.community_detail = community_detail
        self.nb_nodes = sum([len(_) for _ in community_detail])
        
        self.gcnlayers1 = nn.ModuleList([
            GCN(in_dim, 32, F.relu),
            GCN(32, hidden_dim, F.relu)
        ])
        
        self.gcnlayers2 = nn.ModuleList([
            GCN(in_dim, 32, F.relu),
            GCN(32, hidden_dim, F.relu)
        ])

        self.fc1 = nn.Linear(40, 1)
        
    def forward(self, g, g2):
        h = g.ndata['h']        

        h2 = h.clone().detach()
        
        for conv in self.gcnlayers1:
            h = conv(g, h)
        g.ndata['h'] = h

        for conv in self.gcnlayers2:
            h2 = conv(g2, h2)
        g2.ndata['h2'] = h2
        
        h3 = torch.cat([h, h2], dim=1)  # (nb_nodes * batch_size, 40)
        h3 = torch.cat([h3[i*self.nb_nodes : (i+1)*self.nb_nodes].unsqueeze(0) for i in range(self.batch_size)], dim=0) #[bs, nb_nodes, 40] 
        X = self.fc1(h3)
        return X.squeeze(-1).T # [nb_nodes, bs]
 

In [4]:
class SimpleGCN11(nn.Module):
    def __init__(self, in_dim, hidden_dim, batch_size, nb_nodes, device, nb_neurons_gru=20, nb_neurons_dense=10):
        super().__init__()
        self.batch_size = batch_size
        self.device=device
        self.nb_nodes = nb_nodes
        
        self.gcnlayers1 = nn.ModuleList([
            GCN(in_dim, 32, F.relu),
            GCN(32, hidden_dim, F.relu)
        ])
        
        self.gcnlayers2 = nn.ModuleList([
            GCN(in_dim, 32, F.relu),
            GCN(32, hidden_dim, F.relu)
        ])

        self.fcs = nn.ModuleList([nn.Linear(40, 1) for _ in range(nb_nodes)])
        
    def forward(self, g, g2):
        h = g.ndata['h']        

        h2 = h.clone().detach()
        
        for conv in self.gcnlayers1:
            h = conv(g, h)
        g.ndata['h'] = h

        for conv in self.gcnlayers2:
            h2 = conv(g2, h2)
        g2.ndata['h2'] = h2
        
        h3 = torch.cat([h, h2], dim=1)  # (nb_nodes * batch_size, 40)
        h3 = h3.view(-1, self.nb_nodes, 40)
        X = [self.fcs[i](h3[:,i,:]) for i in range(self.nb_nodes)]
        X = torch.cat(X, dim=1).T
        
        return X # [nb_nodes, bs]
 

In [5]:
def train(net, X_train, X_val, y_train, y_val, saved_path, nb_nodes, community_detail, high_similar_poi, edges, batch_size = 72, 
          learning_rate=0.001, epochs=100, device=device):
    
    nb_train, nb_val = y_train.shape[1], y_val.shape[1]
    
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    
    nb_nodes = sum([len(_) for _ in community_detail])
    
    early_stopping = EarlyStopping(patience=7, verbose=False, path=saved_path)
    
    for epoch in range(epochs):
        loss_all = []
        net.train()
        for i in range(nb_train//batch_size):
            batched_graph1 = create_batches(X_train[:,i*batch_size:(i+1)*batch_size,:], batch_size, edges[:,0], edges[:,1], nb_nodes, device)
            batched_graph2 = create_batches(X_train[:,i*batch_size:(i+1)*batch_size,:], batch_size, high_similar_poi['i'], high_similar_poi['j'], nb_nodes, device)
            logits_train = net(batched_graph1, batched_graph2) # [nb_node, bs]
            y_train_temp = torch.from_numpy(y_train[:,i*batch_size:(i+1)*batch_size]).to(device)
            loss = F.mse_loss(logits_train, y_train_temp)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        net.eval()
        pred_val = []
        with torch.no_grad():
            for i in range(nb_val//batch_size):
                batched_graph1 = create_batches(X_val[:,i*batch_size:(i+1)*batch_size,:], batch_size, edges[:,0], edges[:,1], nb_nodes, device)
                batched_graph2 = create_batches(X_val[:,i*batch_size:(i+1)*batch_size,:], batch_size, high_similar_poi['i'], high_similar_poi['j'], nb_nodes, device)
                logits_val = net(batched_graph1, batched_graph2) # [nb_node, bs]
                pred_val.append(logits_val)
            
            pred = torch.cat(pred_val, dim=1)
            rmse_current = compute_metric(y_val.flatten(), pred.cpu().numpy().flatten(), str(epoch))
            print()
        
        early_stopping(rmse_current, net)

        if early_stopping.early_stop:
            print('Early stopping')
            break

    net.load_state_dict(torch.load(saved_path))
    return net
 

In [6]:
def test(model, X_test, y_test, batch_size, edges, high_sim, nb_nodes, device):
    pred_test = []
    nb_test = X_test.shape[1]

    with torch.no_grad():
        for i in range(nb_test//batch_size):
            batched_graph1 = create_batches(X_test[:,i*batch_size:(i+1)*batch_size,:], batch_size, edges[:,0], edges[:,1], nb_nodes, device)
            batched_graph2 = create_batches(X_test[:,i*batch_size:(i+1)*batch_size,:], batch_size, high_sim['i'], high_sim['j'], nb_nodes, device)
            logits_test = model(batched_graph1, batched_graph2) # [nb_node, bs]
            pred_test.append(logits_test)

        pred = torch.cat(pred_test, dim=1)
        rmse_current = compute_metric(y_test.flatten(), pred.cpu().numpy().flatten(), 'test')
    

# TGCN-SZ

In [7]:
X_train_sz = np.load('./data/taxi_sz/X_train20.npy').astype('float32')
X_val_sz = np.load('./data/taxi_sz/X_val20.npy').astype('float32')
X_test_sz = np.load('./data/taxi_sz/X_test20.npy').astype('float32')
y_train_sz = np.load('./data/taxi_sz/y_train20.npy').astype('float32')
y_val_sz = np.load('./data/taxi_sz/y_val20.npy').astype('float32')
y_test_sz = np.load('./data/taxi_sz/y_test20.npy').astype('float32')

sz_8community = np.load('./data/taxi_sz/sz_8community.npy', allow_pickle=True).tolist()
print(sz_8community)

edges_in_sz = np.load('./data/taxi_sz/edges_in_sz.npy')
print(edges_in_sz.shape)

high_similar_poi_sz = pd.read_csv('./data/taxi_sz/high_similar_poi.csv')

print(X_train_sz.shape, y_train_sz.shape)

[[0, 1, 2, 3, 4, 6, 9, 63, 69, 77], [5, 7, 8, 10, 22, 25, 45, 46, 47, 48, 49, 50, 51, 58, 66, 71, 81], [11, 21, 24, 29, 30, 35, 37, 39, 40, 41, 42, 43, 53, 92, 96], [12, 13, 15, 16, 23, 34, 36, 54, 55, 72, 78, 79, 97], [14, 17, 18, 19, 20, 38, 44, 84, 85, 86, 87, 88, 89, 90, 91, 93, 94, 95, 98, 100], [26, 27, 28, 31, 32, 33], [52, 56, 57, 59, 60, 61, 62, 64, 65], [67, 68, 70, 73, 74, 75, 76, 80, 82, 83, 99]]
(654, 2)
(101, 3672, 24) (101, 3672)


In [51]:
test(model_sz, X_test_sz, y_test_sz, 72, edges_in_sz, high_similar_poi_sz, 101, device)

mae test 10.17994
rmse test 15.334211
mape(>10) test 32.654547691345215


In [30]:
nb_test = X_test_sz.shape[1]

model = TGCN(20, 20, nb_test, sz_8community, device).to(device)
model.load_state_dict(torch.load("./saved/tgcn_sz.pt"))

t0 = time.time()
with torch.no_grad():
    batched_graph1 = create_batches(X_test_sz, nb_test, edges_in_sz[:,0], edges_in_sz[:,1], 101, device)
    batched_graph2 = create_batches(X_test_sz, nb_test, high_similar_poi_sz['i'], high_similar_poi_sz['j'], 101, device)
    logits_test = model(batched_graph1, batched_graph2) # [nb_node, bs]
    print('time cost: ', time.time() - t0)


time cost:  1.055283784866333


# TGCN-NY

In [14]:
X_train_ny = np.load('./data/taxi_ny/X_train20.npy')
y_train_ny = np.load('./data/taxi_ny/y_train20.npy')
X_val_ny = np.load('./data/taxi_ny/X_val20.npy')
y_val_ny = np.load('./data/taxi_ny/y_val20.npy')
X_test_ny = np.load('./data/taxi_ny/X_test20.npy')
y_test_ny = np.load('./data/taxi_ny/y_test20.npy')

ny_6community = [[0, 5, 7, 10, 11, 12, 13, 32, 33, 49, 54], [1, 2, 4, 6, 21, 22, 24, 25, 43, 48, 56, 57, 59, 60, 61, 62],
    [3, 8, 9, 15, 16, 28, 29, 30, 34, 55],  [14, 17, 31, 36, 37, 38, 39, 41, 46, 47, 50, 52, 53], [18, 26, 42, 45, 51], 
    [19, 20, 23, 27, 35, 40, 44, 58]]

edges_in_ny = np.load('./data/taxi_ny/edges_manhattan.npy')
ny_high_similar_poi = pd.read_csv('./data/taxi_ny/high_similar_poi.csv')

print(X_train_ny.shape, y_train_ny.shape)

(63, 8856, 24) (63, 8856)


In [10]:
net = TGCN(20, 20, 72, ny_6community, device).to(device)
    
model_ny = train(net, X_train_ny, X_val_ny, y_train_ny, y_val_ny, "./saved/tgcn_ny.pt", 63, ny_6community, ny_high_similar_poi, 
                  edges_in_ny, 72, 0.01, 200, device)

mae 0 12.546094
rmse 0 21.178024
mape(>10) 0 35.97038388252258

mae 1 9.140538
rmse 1 14.032609
mape(>10) 1 29.422244429588318

mae 2 8.970854
rmse 2 13.795502
mape(>10) 2 31.849405169487

mae 3 7.5466223
rmse 3 12.077554
mape(>10) 3 28.25866937637329

mae 4 6.604911
rmse 4 11.034801
mape(>10) 4 25.520598888397217

mae 5 7.3309913
rmse 5 11.769248
mape(>10) 5 27.420124411582947

EarlyStopping counter: 1 out of 7
mae 6 6.3626366
rmse 6 11.05088
mape(>10) 6 24.42053109407425

EarlyStopping counter: 2 out of 7
mae 7 6.253881
rmse 7 10.676598
mape(>10) 7 25.13779103755951

mae 8 5.9095383
rmse 8 10.453227
mape(>10) 8 24.286293983459473

mae 9 5.917006
rmse 9 10.439485
mape(>10) 9 24.715986847877502

mae 10 5.6930623
rmse 10 9.94839
mape(>10) 10 23.994168639183044

mae 11 5.9857697
rmse 11 10.910449
mape(>10) 11 24.98735785484314

EarlyStopping counter: 1 out of 7
mae 12 5.565188
rmse 12 10.199126
mape(>10) 12 23.925909399986267

EarlyStopping counter: 2 out of 7
mae 13 5.612858
rmse 13 10.

In [11]:
test(model_ny, X_test_ny, y_test_ny, 72, edges_in_ny, ny_high_similar_poi, 63, device)

mae test 5.2405906
rmse test 9.777584
mape(>10) test 23.488961160182953


In [32]:
nb_test = X_test_sz.shape[1]

model = TGCN(20, 20, nb_test, ny_6community, device).to(device)
model.load_state_dict(torch.load("./saved/tgcn_ny.pt"))

t0 = time.time()
with torch.no_grad():
    batched_graph1 = create_batches(X_test_ny, nb_test, edges_in_ny[:,0], edges_in_ny[:,1], 63, device)
    batched_graph2 = create_batches(X_test_ny, nb_test, ny_high_similar_poi['i'], ny_high_similar_poi['j'], 63, device)
    logits_test = model(batched_graph1, batched_graph2) # [nb_node, bs]
    print('time cost: ', time.time() - t0)


time cost:  1.0813148021697998


# GCN-SZ

In [22]:
gcn = SimpleGCN11(24, 20, 72, 101, device).to(device)
gcn_sz =  train(gcn, X_train_sz, X_val_sz, y_train_sz, y_val_sz, "./saved/gcn_sz-new.pt", 101, sz_8community, high_similar_poi_sz, 
                  edges_in_sz, batch_size = 72, learning_rate=0.01, epochs=200, device=device)

mae 0 21.572289
rmse 0 35.49498
mape(>10) 0 64.18524980545044

mae 1 19.441395
rmse 1 32.60053
mape(>10) 1 55.56862950325012

mae 2 18.361517
rmse 2 31.149458
mape(>10) 2 48.70470464229584

mae 3 18.175404
rmse 3 32.003975
mape(>10) 3 44.490283727645874

EarlyStopping counter: 1 out of 7
mae 4 15.678753
rmse 4 26.64277
mape(>10) 4 44.238853454589844

mae 5 15.287438
rmse 5 26.217567
mape(>10) 5 44.76290941238403

mae 6 14.913826
rmse 6 25.131935
mape(>10) 6 45.28956115245819

mae 7 14.579987
rmse 7 24.558638
mape(>10) 7 45.28106153011322

mae 8 14.6053505
rmse 8 24.216417
mape(>10) 8 46.15618586540222

mae 9 14.513723
rmse 9 23.905525
mape(>10) 9 45.964252948760986

mae 10 14.2843
rmse 10 23.415873
mape(>10) 10 44.94393765926361

mae 11 14.084131
rmse 11 23.25454
mape(>10) 11 45.20532190799713

mae 12 14.032031
rmse 12 23.09624
mape(>10) 12 44.792240858078

mae 13 13.95388
rmse 13 22.97678
mape(>10) 13 44.734951853752136

mae 14 13.979988
rmse 14 23.076511
mape(>10) 14 45.2906847000122

In [23]:
test(gcn_sz, X_test_sz, y_test_sz, 72, edges_in_sz, high_similar_poi_sz, 101, device)

mae test 14.263112
rmse test 23.16417
mape(>10) test 44.70433592796326


In [13]:
import time

device = torch.device('cuda')
nb_test = X_test_sz.shape[1]

model = SimpleGCN11(24, 20, 72, 101, device).to(device)
model.load_state_dict(torch.load("./saved/gcn_sz-new.pt"))

t0 = time.time()
with torch.no_grad():
    batched_graph1 = create_batches(X_test_sz, nb_test, edges_in_sz[:,0], edges_in_sz[:,1], 101, device)
    batched_graph2 = create_batches(X_test_sz, nb_test, high_similar_poi_sz['i'], high_similar_poi_sz['j'], 101, device)
    logits_test = model(batched_graph1, batched_graph2) # [nb_node, bs]
    print('time cost: ', time.time() - t0)


time cost:  0.8680210113525391


# GCN-NY

In [25]:
gcn = SimpleGCN11(24, 20, 72, 63, device).to(device)
gcn_ny =  train(gcn, X_train_ny, X_val_ny, y_train_ny, y_val_ny, "./saved/gcn_ny-new.pt", 63, ny_6community, ny_high_similar_poi, 
                  edges_in_ny, batch_size = 72, learning_rate=0.01, epochs=200, device=device)

mae 0 14.543928
rmse 0 32.11388
mape(>10) 0 65.01575112342834

mae 1 13.444556
rmse 1 30.307028
mape(>10) 1 59.51480269432068

mae 2 12.633872
rmse 2 27.948252
mape(>10) 2 55.31506538391113

mae 3 12.048494
rmse 3 26.411545
mape(>10) 3 55.09911775588989

mae 4 11.797436
rmse 4 25.836926
mape(>10) 4 55.185818672180176

mae 5 12.167112
rmse 5 27.012894
mape(>10) 5 57.618647813797

EarlyStopping counter: 1 out of 7
mae 6 11.976238
rmse 6 26.159227
mape(>10) 6 57.435542345047

EarlyStopping counter: 2 out of 7
mae 7 11.934835
rmse 7 26.057175
mape(>10) 7 57.16792941093445

EarlyStopping counter: 3 out of 7
mae 8 12.045452
rmse 8 26.242498
mape(>10) 8 55.48233985900879

EarlyStopping counter: 4 out of 7
mae 9 12.550909
rmse 9 27.093061
mape(>10) 9 56.73884153366089

EarlyStopping counter: 5 out of 7
mae 10 12.087004
rmse 10 26.477774
mape(>10) 10 57.126736640930176

EarlyStopping counter: 6 out of 7
mae 11 11.945834
rmse 11 25.905634
mape(>10) 11 55.52185773849487

EarlyStopping counter: 7 

In [27]:
test(gcn_ny, X_test_ny, y_test_ny, 72, edges_in_ny, ny_high_similar_poi, 63, device)

mae test 11.661935
rmse test 25.417847
mape(>10) test 56.45459294319153


In [15]:
nb_test = X_test_sz.shape[1]

model = gcn = SimpleGCN11(24, 20, 72, 63, device).to(device)
model.load_state_dict(torch.load("./saved/gcn_ny-new.pt"))

t0 = time.time()
with torch.no_grad():
    batched_graph1 = create_batches(X_test_ny, nb_test, edges_in_ny[:,0], edges_in_ny[:,1], 63, device)
    batched_graph2 = create_batches(X_test_ny, nb_test, ny_high_similar_poi['i'], ny_high_similar_poi['j'], 63, device)
    logits_test = model(batched_graph1, batched_graph2) # [nb_node, bs]
    print('time cost: ', time.time() - t0)


time cost:  0.8706495761871338
