In [1]:
import torch
print(torch.__version__)

2.0.1+cu118


In [2]:
!pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.1+cu118.html
!pip install torch-geometric
!pip install torch-geometric-temporal

Looking in links: https://data.pyg.org/whl/torch-2.0.1+cu118.html


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from datetime import datetime
import numpy as np
import os
import pandas as pd
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric_temporal.nn import STConv
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

Handling GPU out of memory

In [14]:
torch.cuda.memory_summary(device=None, abbreviated=False)
torch.cuda.empty_cache()
!nvidia-smi

Tue Oct  3 00:52:45 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   56C    P0    33W /  70W |   1703MiB / 15360MiB |     43%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

STGCN Model

In [6]:
class FullyConnLayer(nn.Module):
    def __init__(self, c):
        super(FullyConnLayer, self).__init__()
        self.conv = nn.Conv2d(c, 1, 1)

    def forward(self, x):
        return self.conv(x)

class OutputLayer(nn.Module):
    def __init__(self, c, T, n):
        super(OutputLayer, self).__init__()
        self.tconv1 = nn.Conv2d(c, c, (T, 1), 1)
        self.ln = nn.LayerNorm([n, c])
        self.tconv2 = nn.Conv2d(c, c, (1, 1), 1)
        self.fc = FullyConnLayer(c)

    def forward(self, x):
        x_t1 = self.tconv1(x)
        x_ln = self.ln(x_t1.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
        x_t2 = self.tconv2(x_ln)

        return self.fc(x_t2)

# Final model with multiple STConv layer before the linear layer
class TrafficModel(nn.Module):
    def __init__(self, device, num_nodes, channel_size_list, num_layers,
                 kernel_size, K, window_size, normalization = 'sym', bias = True):
    # num_nodes = node count of input
    # channel_size_list = 2Darray, representing feature dimension in model
    # num_layers = STConv layers count
    # kernel_size = temporal kernel length
    # K = Chebyshev polynormial spatial convolution size
    # window_size = historical steps to be included

        super(TrafficModel, self).__init__()
        self.layers = nn.ModuleList([])
        # stacking STConv layers
        for l in range(num_layers):
            input_size = channel_size_list[l][0]
            hidden_size = channel_size_list[l][1]
            output_size = channel_size_list[l][2]

            self.layers.append(STConv(num_nodes, input_size, hidden_size,
                                      output_size, kernel_size, K,
                                      normalization, bias))
        # output layer
        self.layers.append(OutputLayer(channel_size_list[-1][-1], \
                                       window_size - 2 * num_layers * (kernel_size - 1), \
                                       num_nodes))
        # CUDA if available
        for layer in self.layers:
            layer = layer.to(device)

    def forward(self, x, edge_index, edge_weight):
        for layer in self.layers[:-1]:
          x = layer(x, edge_index, edge_weight)
        out_layer = self.layers[-1]
        x = x.permute(0, 3, 1, 2)
        x = out_layer(x)
        return x

Training parameters

In [7]:
# Training parameters
channels = np.array([[1, 16, 64], [64, 16, 64]]) # Channels size, start from 1

kernel_size = 3   # temporal size
K = 3             # Chebyshev filter size


learning_rate = 0.001
batch_size = 24
num_epochs = 10
num_layers = 2    # ST-Conv layers count
n_his = 14        # historical steps to be included (lag)
n_pred = 5        # predicted steps
train_prop = 0.6 # train set ratio
val_prop = 0.2   # validation set ratio
test_prop = 0.2  # test set ratio

# Model saving path
model_save_path = os.path.join('drive', 'MyDrive', 'Colab Notebooks', 'STGNN', 'best_model.pt')

Dataloader

In [8]:
def data_transform(data, n_his, n_pred, device):
    # data = $V$ batch
    # n_his = historical steps
    # n_pred = predicted steps

    num_nodes = data.shape[1]
    num_obs = len(data) - n_his - n_pred
    x = np.zeros([num_obs, n_his, num_nodes, 1])
    y = np.zeros([num_obs, num_nodes])

    obs_idx = 0
    for i in range(num_obs):
        head = i
        tail = i + n_his
        x[obs_idx, :, :, :] = data[head: tail].reshape(n_his, num_nodes, 1)
        y[obs_idx] = data[tail + n_pred - 1]
        obs_idx += 1

    return torch.Tensor(x).to(device), torch.Tensor(y).to(device)

# Read CSV
weighted_adj_matrix_path = os.path.join('drive', 'MyDrive', 'Colab Notebooks', 'STGNN', 'data', 'W.csv')
W = pd.read_csv(weighted_adj_matrix_path)

feature_vectors_path = os.path.join('drive', 'MyDrive', 'Colab Notebooks', 'STGNN', 'data', 'V.csv')
V = pd.read_csv(feature_vectors_path)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Graph properties
num_samples, num_nodes = V.shape

# $V$ matrix split
len_train = round(num_samples * train_prop)
len_val = round(num_samples * val_prop)
train = V[ : len_train]
val = V[len_train : len_train + len_val]
test = V[len_train + len_val : len_train + len_val + round(num_samples * test_prop)]

# Normalization
scaler = StandardScaler()
train = np.nan_to_num(scaler.fit_transform(train))
val = np.nan_to_num(scaler.transform(val))
test = np.nan_to_num(scaler.transform(test))

# Data Splitting
x_train, y_train = data_transform(train, n_his, n_pred, device)
x_val, y_val = data_transform(val, n_his, n_pred, device)
x_test, y_test = data_transform(test, n_his, n_pred, device)

# Create trainable torch data
train_data = torch.utils.data.TensorDataset(x_train, y_train)
train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True)
val_data = torch.utils.data.TensorDataset(x_val, y_val)
val_iter = torch.utils.data.DataLoader(val_data, batch_size)
test_data = torch.utils.data.TensorDataset(x_test, y_test)
test_iter = torch.utils.data.DataLoader(test_data, batch_size)

# convert W matrix to COO format
G = sp.coo_matrix(W)
edge_index = torch.tensor(np.array([G.row, G.col]), dtype=torch.int64).to(device)
edge_weight = torch.tensor(G.data).float().to(device)

Training

In [9]:
# Model initialization
model = TrafficModel(device, num_nodes, channels, num_layers, kernel_size, K,
                     n_his, normalization='sym', bias=True).to(device)

# Define loss
loss = nn.MSELoss()
# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

def evaluate_model(model, loss, data_iter, edge_index, edge_weight, device):
  model.eval()
  l_sum, n = 0.0, 0
  with torch.no_grad():
      for x, y in data_iter:
          y_pred = model(x.to(device), edge_index, edge_weight).view(len(x), -1)
          l = loss(y_pred, y)
          l_sum += l.item() * y.shape[0]
          n += y.shape[0]
      return l_sum / n

# evaluation metric
def evaluate_metric(model, data_iter, scaler, edge_index, edge_weight, device):
    model.eval()
    with torch.no_grad():
        mae, mape, mse = [], [], []
        for x, y in data_iter:
            y = scaler.inverse_transform(y.cpu().numpy()).reshape(-1)
            y_pred = scaler.inverse_transform(model(x.to(device), edge_index,
                                                    edge_weight).view(len(x), -1).cpu().numpy()).reshape(-1)
            d = np.abs(y - y_pred)
            mae += d.tolist()
            mape += (d / y).tolist()
            mse += (d ** 2).tolist()
        MAE = np.array(mae).mean()
        MAPE = np.array(mape).mean()
        RMSE = np.sqrt(np.array(mse).mean())
        return MAE, MAPE, RMSE

In [13]:
#torch.cuda.memory_summary()
!nvidia-smi

Tue Oct  3 00:52:34 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   57C    P0    30W /  70W |  13467MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

Training progress

In [11]:
min_val_loss = np.inf
for epoch in tqdm(range(1, num_epochs + 1), desc = 'Epoch', position = 0):
  l_sum, n = 0.0, 0

  model.train()
  for x, y in tqdm(train_iter, desc = 'Batch', position = 0):
    # get model predictions and compute loss
    y_pred = model(x.to(device), edge_index, edge_weight).view(len(x), -1)
    l = loss(y_pred, y)
    # backpropogation
    optimizer.zero_grad()
    l.backward()
    optimizer.step()

    l_sum += l.item() * y.shape[0]
    n += y.shape[0]

  # validation loss
  val_loss = evaluate_model(model, loss, val_iter, edge_index, edge_weight, device)
  # save the validation loss if it is less than before
  if val_loss < min_val_loss:
      min_val_loss = val_loss
      torch.save(model.state_dict(), model_save_path)
  print("epoch", epoch, ", train loss:", l_sum / n, ", validation loss:", val_loss)

Batch: 100%|██████████| 146/146 [11:26<00:00,  4.70s/it]
Epoch:  10%|█         | 1/10 [13:31<2:01:43, 811.45s/it]

epoch 1 , train loss: 0.7090181779574446 , validation loss: 53.805734305174454


Batch: 100%|██████████| 146/146 [11:21<00:00,  4.67s/it]
Epoch:  20%|██        | 2/10 [26:58<1:47:50, 808.79s/it]

epoch 2 , train loss: 0.6387800365199534 , validation loss: 53.65498481294383


Batch: 100%|██████████| 146/146 [11:20<00:00,  4.66s/it]
Epoch:  30%|███       | 3/10 [40:25<1:34:14, 807.82s/it]

epoch 3 , train loss: 0.6208631705079526 , validation loss: 53.542776412860206


Batch: 100%|██████████| 146/146 [11:20<00:00,  4.66s/it]
Epoch:  40%|████      | 4/10 [53:51<1:20:44, 807.41s/it]

epoch 4 , train loss: 0.6131729662469616 , validation loss: 53.612729209091356


Batch: 100%|██████████| 146/146 [11:19<00:00,  4.66s/it]
Epoch:  50%|█████     | 5/10 [1:07:17<1:07:13, 806.77s/it]

epoch 5 , train loss: 0.6080551965423217 , validation loss: 53.59725861808528


Batch: 100%|██████████| 146/146 [11:19<00:00,  4.66s/it]
Epoch:  60%|██████    | 6/10 [1:20:41<53:43, 805.86s/it]  

epoch 6 , train loss: 0.6027576799343499 , validation loss: 53.57252295338589


Batch: 100%|██████████| 146/146 [11:20<00:00,  4.66s/it]
Epoch:  70%|███████   | 7/10 [1:34:06<40:16, 805.41s/it]

epoch 7 , train loss: 0.6001863518564843 , validation loss: 53.56776592389397


Batch: 100%|██████████| 146/146 [11:20<00:00,  4.66s/it]
Epoch:  80%|████████  | 8/10 [1:47:32<26:51, 805.72s/it]

epoch 8 , train loss: 0.5970026020437915 , validation loss: 53.495495057416996


Batch: 100%|██████████| 146/146 [11:20<00:00,  4.66s/it]
Epoch:  90%|█████████ | 9/10 [2:00:57<13:25, 805.54s/it]

epoch 9 , train loss: 0.5937144785530077 , validation loss: 53.68756136821664


Batch: 100%|██████████| 146/146 [11:20<00:00,  4.66s/it]
Epoch: 100%|██████████| 10/10 [2:14:22<00:00, 806.24s/it]

epoch 10 , train loss: 0.5960824955463 , validation loss: 53.56787288510281





Evaluation

In [12]:
# Load the best model with lowest validation loss
best_model = TrafficModel(device, num_nodes, channels, num_layers, kernel_size, K, \
                     n_his, normalization = 'sym', bias = True).to(device)
best_model.load_state_dict(torch.load(model_save_path))

# Evaluation on test dataset
l = evaluate_model(best_model, loss, test_iter, edge_index, edge_weight, device)
MAE, MAPE, RMSE = evaluate_metric(best_model, test_iter, scaler, edge_index, edge_weight, device)
print("test loss:", l, "\nMAE:", MAE, ", MAPE:", MAPE, ", RMSE:", RMSE)

  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
  mape += (d / y).tolist()
 

test loss: 214.88216129966403 
MAE: 0.021061429854225294 , MAPE: inf , RMSE: 0.04684220839264269
