In [3]:
# Here we import database we will need later
import torch    # Basic PyTorch library for tensor operations
import torch.nn as nn   # Building Neural Networks
import torch.nn.functional as F # Activation functions and other utilities
from torch_geometric_temporal.nn.recurrent import A3TGCN    # See below
from torch_geometric_temporal.dataset import METRLADatasetLoader    # Load the dataset
from torch_geometric.loader import DataLoader  # For batching and loading data
from tqdm import tqdm   # The visualization of processing progress
import matplotlib.pyplot as plt # For basic visualization
import seaborn as sns  # For advanced visualization
import numpy as np  # For numerical operations
import pandas as pd  # For data manipulation and analysis that i am familiar with

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Now, we will load the METR-LA dataset and see what is inside
loader = METRLADatasetLoader()
# We will take the data from the time span of before 60 min to after 60 min
dataset = loader.get_dataset(num_timesteps_in=12, num_timesteps_out=12)

# Take a look at the data structure -- 
for snapshot in dataset:
    print(snapshot)
    break

Data(x=[207, 2, 12], edge_index=[2, 1722], edge_attr=[1722], y=[207, 12])


#### Here we will config a simple GNN model

#### In this version, now there is one important concept added: 
There are different types of "edges" in traffic circumstances: 

    Free Flow: the traffic can move forward, this is a "Downstream" edge
    
    Congestion: there is a backward traffic shock wave. This is an "Upstream" edge. 


In [5]:
class TemporalGNN(nn.Module):
    def __init__(self, node_features, periods):
        super(TemporalGNN,self).__init__()
        # Here we will use A3TGCN (mentioned above) as our temporal GNN layer

        # DIFFERENT From before, now we will design two A3TGCN and combine them together. 
        self.tgnn_down = A3TGCN(in_channels=node_features,out_channels=32,periods=periods)
        
        self.tgnn_up = A3TGCN(in_channels=node_features,out_channels=32,periods=periods)

        self.linear = nn.Linear(32, 12)

    def forward(self,x,edge_index,edge_weight):
        # Now before everything in this message passing & aggregation function
        # we will classify nodes into "upstream" and "downstream"
        # The default is "upstream"
        edge_index_down = edge_index

        # Now we will define the "upstream" -- Backward -- revserse
        edge_index_up = torch.stack([edge_index[1],edge_index[0]],dim=0)

        # Below will be the excution 

        h_down = self.tgnn_down(x,edge_index_down,edge_weight)

        h_up = self.tgnn_up(x,edge_index_up,edge_weight)
        # ReLU is the modeling phase transition in this traffic prediction context
        # WHY? -- a linear model cannot learn from the rapid and sharp changes in traffic conditions
        # ReLU introduces non-linearity, allowing the model to capture complex patterns and sudden shifts in traffic data

        # simple sum aggregation of up and down
        h = h_up + h_down 
        h = F.relu(h)

        # Final prediction
        h = self.linear(h)
        return h

In [6]:
# INstead of training overr and over again while the error is fluctuating but not decreasing.
# the training should also learn how to stop

class EarlyStopper:
    # patience here means is the result is not improving 
    # __init__ is the fixed name of Constructor, which means initialization
    # self literally means "myself"
    # Also, here the min_delta means the expected minumum improvement, 
    # the drop of error must be bigger than this value, then the drop will be counted as a True
    def __int__(self,patience=5,min_delta=0):
    
        self.patience = patience 
        self.min_delta = min_delta

        self.counter = 0

        self.min_validation_loss = np.inf
    
    def early_stop(self,validation_loss):

        # This is when the model made improvements
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self_counter = 0    # We dont count it as a step without improvemetens
            return False
        
        # But when the model started to not make improvements
        elif validation_loss > (self.min_valication_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

#### After the model is initially configured, we will train it. 

In [7]:
# Setup the device for computation
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
PIN_MEMORY=True

In [8]:
# Now we can no longer simply use all the data to train
# Here we will apply split
full_data = list(dataset)

# Split logic: 80% train, 20% validation

split_index = int(len(full_data) * 0.8)
train_dataset = full_data[:split_index]
val_dataset = full_data[split_index:]

train_loader = DataLoader(train_dataset,batch_size=16,shuffle=True,
                          pin_memory=True)
val_loader = DataLoader(val_dataset,batch_size=16,shuffle=True,
                          pin_memory=True)



In [10]:
torch.mps.empty_cache()
model = TemporalGNN(node_features=2,periods=12).to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)    # Manually setup learning rate of Gradient Descent optimizer as 0.01
loss_fn = torch.nn.L1Loss()

early_stopper = EarlyStopper()

print(f'Starting training on {len(train_dataset)} samples, validating on {len(val_dataset)} samples...')


for epoch in range(1,51):   # Train for 20 epochs
    epoch_loss = 0
    step = 0
    progressor = tqdm(train_loader,desc=f'Epoch {epoch:02d}',leave=False)
    model.train()
    for batch in progressor:
        batch = batch.to(device)

        optimizer.zero_grad()
        # This is how we get the prediction from the model

        y_hat = model(batch.x, batch.edge_index, batch.edge_attr)

        # Here we calculate the loss between the prediction and the ground truth
        # INITIALLY TRIAL i use MSE, but soon i will use more feasible method here
        loss = loss_fn(y_hat, batch.y)
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping to prevent exploding gradients

        optimizer.step()
        epoch_loss += loss.item()
        step += 1

    avg_loss = epoch_loss / step

    # Now below is the added evaluation part

    model.eval()
    val_loss = 0
    val_step = 0
    progressor2 = tqdm(val_loader,desc=f'Epoch {epoch:02d}',leave=False)
    with torch.no_grad():   # We have to disable the gradient calculation
        for batch in val_loader:

            batch = batch.to(device)
            y_hat = model(batch.x, batch.edge_index, batch.edge_attr)
            loss = loss_fn(y_hat,batch.y)

            val_loss += loss.item()

            val_step += 1
    
    avg_val_loss = val_loss / val_step

    print(f'Epoch {epoch:02d} | Train MAE: {avg_loss:.4f} | Val MAE: {avg_val_loss:.4f}')

    if early_stopper.early_stop(avg_val_loss): 
        print(f'Early stopping is triggered.\nThe training stops at Epoch {epoch}')
        break


print('Training process finished.')

Starting training on 27399 samples, validating on 6850 samples...


                                                           

KeyboardInterrupt: 

#### Here comes to my stage review?
Where did we change? 
- The classification of adjacency matrix. We learnt how to deal with the heterogenous GNN, which in this circumstance indicate "upstream" and "downstream", two kinds of edge type.

- The evaluation system. At the last raw edition, the error kept fluctuating and didnt see a decrease for a while. After studying, i learnt that the "early stopping" mechanism. So here we design the earlystop model and apply trian-validation split on the dataset. 

Why this matters? 
- In our traffic prediction, we are also trying to avoide overfitting -- when the training loss is decreasing but the validation loss is increasing. We have to tell the model, how to stop in time. 