In [1]:
import numpy as np
import torch

SEED = 3

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [2]:
import sys
import os

sys.path.append(os.path.dirname(os.getcwd()))

In [3]:
from datetime import date

from torchvision import transforms

from util.dataset import GraphPassengerFlowDataset
from util.transform import GraphToTensor, GraphRollExogenousFeatures

transform = transforms.Compose([
    GraphToTensor(),
    GraphRollExogenousFeatures()
])

train_data = GraphPassengerFlowDataset(
    min_date=date(2022, 1, 1),
    max_date=date(2023, 1, 1),
    transform=transform
)
validation_data = GraphPassengerFlowDataset(
    min_date=date(2023, 1, 1),
    max_date=date(2023, 4, 1),
    transform=transform)
test_data = GraphPassengerFlowDataset(
    min_date=date(2023, 4, 1),
    max_date=date(2023, 7, 1),
    transform=transform)

LOADING DATA: 100%|██████████| 14/14 [00:53<00:00,  3.79s/it]
LOADING DATA: 100%|██████████| 14/14 [00:16<00:00,  1.17s/it]
LOADING DATA: 100%|██████████| 14/14 [00:16<00:00,  1.15s/it]


In [4]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
validation_loader = DataLoader(validation_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

In [5]:
data = train_data._data[['passengers']]
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,passengers
datetime,origin,destination,Unnamed: 3_level_1
2022-01-01 00:00:00,12,19,159.0
2022-01-01 00:00:00,12,LM,6.0
2022-01-01 00:00:00,12,OW,25.0
2022-01-01 00:00:00,16,24,78.0
2022-01-01 00:00:00,16,CC,82.0
...,...,...,...
2022-12-31 23:00:00,WD,ED,17.0
2022-12-31 23:00:00,WP,NC,14.0
2022-12-31 23:00:00,WP,PC,35.0
2022-12-31 23:00:00,WS,FM,121.0


In [6]:
import numpy as np
import pandas as pd

station_connections = data.index.droplevel(0).unique()

physical_graph = np.zeros((station_connections.size, station_connections.size), dtype=np.float64)
for idx, con in enumerate(station_connections):
    # Retrieve connections such that `this.dest == that.origin`
    neighbours = station_connections[station_connections.get_loc(con[1])]
    neighbours = neighbours[neighbours.map(lambda index: index[1] != con[0])]

    # Set the edge weight for all neighbours
    for n in neighbours:
        physical_graph[idx, station_connections.get_loc(n)] = 1.0 / neighbours.size

pd.DataFrame(physical_graph, columns=station_connections.to_series().map(lambda con: f'{con[0]}->{con[1]}')).set_index(station_connections.to_series().map(lambda con: f'{con[0]}->{con[1]}'))

Unnamed: 0,12->19,12->LM,12->OW,16->24,16->CC,19->12,19->MA,24->16,24->GP,AN->PC,...,UC->FM,UC->SH,WC->LF,WC->PH,WD->CV,WD->ED,WP->NC,WP->PC,WS->FM,WS->ML
12->19,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12->LM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12->OW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16->24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16->CC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WD->ED,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
WP->NC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
WP->PC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
WS->FM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
from models.stgan import STGAN
from torch.optim.lr_scheduler import ExponentialLR
from torch.nn import MSELoss
from torch.optim import AdamW

device = torch.device('cuda:0')

model = STGAN(
        order=(23, 1, 0),
        seasonal_lag=24,
        seasonal_order=(7, 0, 0),
        static_features=7,
        exogenous_features=14,
        exogenous_window=(-2, 4),
        k_steps=4
).to(device)

criterion = MSELoss()
optimizer = AdamW(model.parameters(), lr=1.0e-3, eps=1.0e-4)
scheduler = ExponentialLR(optimizer, 0.9)

In [8]:
ADJACENCY_MATRIX_NORMALIZATION = False

adj_mx = np.stack([physical_graph], axis=-1)
if ADJACENCY_MATRIX_NORMALIZATION:
    adj_mx = adj_mx / (adj_mx.sum(axis=0) + 1e-18)
print('Edge features:', adj_mx.shape)

src, dst = adj_mx.sum(axis=-1).nonzero()
edge_index = torch.tensor(np.array([src, dst], dtype=np.int_), dtype=torch.long, device=device)
edge_attr = torch.tensor(adj_mx[adj_mx.sum(axis=-1) != 0],
                         dtype=torch.float,
                         device=device)

Edge features: (102, 102, 1)


In [None]:
import wandb

# Log run to Weights and Biases
wandb.init(project='passenger-flow-forecasting',
           config={'model': 'STGAN'})

In [10]:
from tqdm.notebook import tqdm
from torch.nn.utils import clip_grad_norm_
import math
from sklearn import metrics
import os

EPOCHS = 50  # at least 50, preferably 200
last_state = (-1, None)

# Create weights directories
os.makedirs('weights/stgan/checkpoints', exist_ok=True)

tqdm_epoch = tqdm(desc='EPOCH', total=EPOCHS)
tqdm_batch = tqdm(desc='TRAIN', total=0)
for epoch in range(EPOCHS):
    # Set the correct mode for training
    model.train()

    # Keep track of the training loss
    y_true = []
    y_pred = []

    # Loop over training data in batches
    tqdm_batch.desc = f'TRAIN #{epoch:03d}'
    tqdm_batch.reset(len(train_loader))
    for batch in train_loader:
        # Move the data to the same device as the model
        # Data format: [batch, time, node, feature]
        history, horizon = tuple(t.to(device) for t in batch)

        # Select views of data
        y = horizon[:, 0, :, 0].squeeze()

        # Clear the gradients
        optimizer.zero_grad()

        # Compute outputs using a forward pass
        outputs = model(history, horizon, edge_index).squeeze()

        # Compute the loss of this batch
        loss = criterion(outputs, y)

        # Perform a backward pass to update weights
        loss.backward()
        clip_grad_norm_(model.parameters(), 5)
        optimizer.step()

        y_true += y.cpu().numpy().tolist()
        y_pred += outputs.cpu().detach().numpy().tolist()

        tqdm_batch.update()

    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()
    train_mse = metrics.mean_squared_error(y_true, y_pred)
    train_mae = metrics.mean_absolute_error(y_true, y_pred)
    y_true = []
    y_pred = []

    # We don't need gradients during validation
    with torch.no_grad():
        model.eval()
        # Loop over validation data in batches
        tqdm_batch.desc = f'VALIDATE #{epoch:03d}'
        tqdm_batch.reset(len(train_loader))
        for batch in train_loader:
            # Move the data to the same device as the model
            # Data format: [batch, time, node, feature]
            history, horizon = tuple(t.to(device) for t in batch)

            # Select views of data
            y = horizon[:, 0, :, 0].squeeze()

            # Clear the gradients
            optimizer.zero_grad()

            # Compute outputs using a forward pass
            outputs = model(history, horizon, edge_index).squeeze()

            y_true += y.cpu().numpy().tolist()
            y_pred += outputs.cpu().detach().numpy().tolist()

            tqdm_batch.update()

    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()
    validation_mse = metrics.mean_squared_error(y_true, y_pred)
    validation_mae = metrics.mean_absolute_error(y_true, y_pred)

    print(f'#{epoch:2d}    RMSE: {math.sqrt(validation_mse):6.2f}    MAE: {validation_mae:6.2f}')

    # Decrease learning rate
    scheduler.step()

    # Log metrics to Weights and Biases
    wandb.log({
        'train_mse': train_mse,
        'train_mae': train_mae,
        'validation_mse': validation_mse,
        'validation_mae': validation_mae
    }, commit=epoch < EPOCHS)

    last_state = epoch, model.state_dict()

    if epoch < 5 or epoch % 5 == 0:
        torch.save(model.state_dict(), f'weights/stgan/checkpoints/{epoch:2d}.pt')

    tqdm_epoch.update()

tqdm_batch.close()

EPOCH:   0%|          | 0/50 [00:00<?, ?it/s]

TRAIN: 0it [00:00, ?it/s]

# 0    RMSE: 135.73    MAE:  67.09
# 1    RMSE: 126.69    MAE:  56.69
# 2    RMSE: 127.43    MAE:  59.62
# 3    RMSE: 122.19    MAE:  53.08
# 4    RMSE: 120.95    MAE:  52.28
# 5    RMSE: 118.94    MAE:  50.90
# 6    RMSE: 118.64    MAE:  50.63
# 7    RMSE: 117.73    MAE:  50.09
# 8    RMSE: 117.45    MAE:  50.20
# 9    RMSE: 118.83    MAE:  52.93
#10    RMSE: 117.52    MAE:  51.47
#11    RMSE: 117.15    MAE:  50.31
#12    RMSE: 116.35    MAE:  49.87
#13    RMSE: 115.20    MAE:  48.58
#14    RMSE: 115.09    MAE:  48.63
#15    RMSE: 114.71    MAE:  48.53
#16    RMSE: 116.08    MAE:  50.78
#17    RMSE: 114.43    MAE:  48.52
#18    RMSE: 114.21    MAE:  48.37
#19    RMSE: 114.00    MAE:  48.98


In [11]:
from datetime import datetime

# Save the trained model
now = datetime.now()
datestring = f'{now.year}{str(now.month).zfill(2)}{str(now.day).zfill(2)}-{str(now.hour).zfill(2)}{str(now.minute).zfill(2)}'
torch.save(last_state[1], f'weights/stgan/{datestring}--{last_state[0]}.pt')
torch.save(last_state[1], f'weights/stgan/seed-{SEED}.pt')
torch.save(last_state[1], f'weights/stgan/latest.pt')

In [12]:
from os.path import exists

if exists(f'weights/stgan/latest.pt'):
    model.load_state_dict(torch.load(f'weights/stgan/latest.pt'))

In [13]:
# Keep track of the loss
y_true = []
y_pred = []

# We don't need to keep track of gradients while testing
with torch.no_grad():
    model.eval()

    # Loop over data in batches
    for batch in tqdm(test_loader, desc='TEST'):
        # Move the data to the same device as the model
        # Data format: [batch, time, node, feature]
        history, horizon = tuple(t.to(device) for t in batch)

        # Select views of data
        y = horizon[:, 0, :, 0].squeeze()

        # Clear the gradients
        optimizer.zero_grad()

        # Compute outputs using a forward pass
        outputs = model(history, horizon, edge_index).squeeze()

        y_true += y.cpu().numpy().tolist()
        y_pred += outputs.cpu().detach().numpy().tolist()

y_true = np.array(y_true).flatten()
y_pred = np.array(y_pred).flatten()

# Cast results to integers
y_true = y_true.astype('int')
y_pred = y_pred.astype('int')

# Drop results where y_true == 0
mask = y_true > 0
y_true = y_true[mask]
y_pred = y_pred[mask]

test_mse = metrics.mean_squared_error(y_true, y_pred)
test_rmse = metrics.mean_squared_error(y_true, y_pred, squared=False)
test_mae = metrics.mean_absolute_error(y_true, y_pred)
test_mape = metrics.mean_absolute_percentage_error(y_true, y_pred)

# Log metrics to Weights and Biases
wandb.log({
    'mse': test_mse,
    'rmse': test_rmse,
    'mae': test_mae,
    'mape': test_mape
}, commit=True)

print(f'MSE: {test_mse:.2f}')
print(f'RMSE: {test_rmse:.2f}')
print(f'MAE: {test_mae:.2f}')
print(f'MAPE: {test_mape:.2f}')

In [14]:
wandb.finish()