In [None]:
# Module imports.
import torch
import time
import pandas as pd
import networkx as nx
from torch.utils.data import random_split
from torch_geometric.loader import DataLoader
from torchmetrics import MeanAbsolutePercentageError as MAPE
from torch_geometric.utils import to_networkx
from od_gnn_cls.gnn_dataset import *
from od_gnn_cls.gnn_gcn import *
from od_gnn_cls.gnn_gat import *

In [None]:
# Device check.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Import dataset: InMemory Dataset, PyG graph data is already prepared.
str_dir_dataset_root = "dataset_history_pyg_inMemory"       # Just include root directory.
# Import InMemory dataset.
dataset_od_flow = od_flow_graphs_inMemory(
    root= "dataset_history_pyg_inMemory",
    lst_path_graphs= []
)
# Sample data to extract dimension info.
data_sample = dataset_od_flow[0]                            
int_dim_node_features = int(data_sample.num_node_features)  # Node feature dimension.
int_dim_node_out = int(data_sample.y.shape[1])              # Node output value dimension.

In [None]:
# Let's check our graph.
data_sample

In [None]:
# How it looks like..
g = to_networkx(data_sample, to_undirected= True)
nx.draw(g)

In [None]:
# Check input OD vectors. (Node features, [nrVehs])
data_sample.x

In [None]:
# Output link flows. (Node outputs, [nrVehs/hr])
data_sample.y

In [None]:
# Other characteristics.
print(data_sample.num_nodes)
print(data_sample.num_node_features)
print(data_sample.num_edges)
print(data_sample.num_features)
print(data_sample.is_undirected())
print(data_sample.has_self_loops())
print(data_sample.has_isolated_nodes())

In [None]:
#  Define size of datasets for test and train
int_size_dataset = len(dataset_od_flow)
float_rat_train = 0.8     # Sum of ratios should be 1.
float_rat_test = 0.2 
int_size_train = int(int_size_dataset*float_rat_train)
int_size_test = int(int_size_dataset*float_rat_test)

# Split original dataset into test and train datasets.
dataset_train, dataset_test = random_split(dataset_od_flow, [int_size_train, int_size_test])

# Print size information.
print("Graph sets have been split.")
print("Total Graphs: {}".format(int_size_dataset))
print("   Train Graphs: {}".format(int_size_train))
print("   Test Graphs: {}".format(int_size_test))

In [None]:
# Let's have batched dataset (surely from PyG, not basic Pytorch)
int_size_batch = 32 # Some number as 2^x... (e.g. 32,64 ..)
loaded_train = DataLoader(dataset_train, batch_size= int_size_batch, shuffle= True)
loaded_test = DataLoader(dataset_test, batch_size= int_size_batch, shuffle= True)
# Print out process.
print("Data has been loaded. Batch Size: {}".format(int_size_batch))

In [None]:
# Before use main model, let's check if graph convolution is working.
stupiud_test_model = gnn_GCN_CONV_test(14,2,14).to(device)
stupiud_test_model.forward(data_sample.to(device))
# As GCN utilize Laplacian matrix spectoral convolution,
# there should be minus values and also diagonal elements shouldn't be all zero.

In [None]:
# Loop for training parameters should be reset before training.
def train_loop(dataloader, model, loss_fn, optimizer):
    
    model.train() # Activate drop-out layers.
    
    loss = 0
    size = len(dataloader.dataset)
    
    for batch, data in enumerate(dataloader):
        # Prediction from forward calculation.
        # Loss term calculation.
        data.to(device)
        pred = model(data)
        loss = loss_fn(pred, data.y)
        # Back-propagation and optimization.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Reporting.
        if batch % 10 == 0 : # For each 10 batchs.
            loss_val = loss.item()
            nr_used_data = int((batch * len(data.x)) / int_dim_node_features)
            print("Loss: {loss:>.5f}  [{current:>5d}/{size:>5d}]".format(loss=loss_val, current= nr_used_data, size= size))

# Loop for test.
@torch.no_grad()    # Context-manager that disabled gradient calculation.
def test_loop(dataloader, model, loss_fn):
    
    model.eval() # Deactivate drop-out layer.
    
    # size = len(dataloader.dataset)
    num_batches = len(dataloader)
    clc_mape = MAPE().to(device)
    test_loss, correct = 0, 0
    
    for data in dataloader:
        data.to(device)
        pred = model(data)
        test_loss += loss_fn(pred, data.y).item()
        correct += clc_mape(pred, data.y)
        # Below is for classification !!
        # correct += (pred.argmax(1) == y).type(torch.float).sum().item() 

    test_loss /= num_batches
    correct /= num_batches
    
    print(f"Test Error: \n MAPE: {(correct*100):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    return [correct*100, test_loss]

# Function for run training.
def run_training(in_model, in_tot_epoch:int = 300, in_lr:int = 0.0005) -> pd.DataFrame:
    
    # Let your machine works.
    start_time = time.time()                # Timer starts.
    in_model.reset_parameters()             # Reset all parameters in the model.
    loss_fn = torch.nn.MSELoss()            # Loss function: MSE
    # Optimizer init.
    optimizer = torch.optim.Adam(in_model.parameters(), lr=in_lr)
    epochs = in_tot_epoch                   # Total number of iterations. NOTE:RECOMMEND ABOVE 300.
    # Empty lists for stamps.
    lst_mape = []       
    lst_loss = []
    lst_time = []

    # Training Starts!
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train_loop(loaded_train, in_model, loss_fn, optimizer)
        mape, loss = test_loop(loaded_test, in_model, loss_fn)
        time_epoch = time.time() - start_time
        lst_time.append(int(time_epoch))
        lst_mape.append(float(mape))
        lst_loss.append(float(loss))
    print("Done!")

    # Keep training record.
    len_hist_learn_tmp = len(lst_loss)
    dic_hist_learn_tmp = {
        "Iteration" : range(1, len_hist_learn_tmp + 1),
        "Time": lst_time,
        "MSE_Loss" : lst_loss,
        "MAPE" : lst_mape
    }
    df_hist_learn_tmp = pd.DataFrame(dic_hist_learn_tmp)
    
    # Return training record.
    return df_hist_learn_tmp

In [None]:
# OK...stupid model says GCNConv layer is doing its job.
# Let's build not that much stupid model. 

# GNN MODEL IMPORT
# FIRST TRIAL: 2GCN + 1LIN layers, No BatchNorm, No Dropout.
model_2GCN_1LIN = gnn_GCN_CONV_LIN(
    in_dim_x= int_dim_node_features, in_dim_y= int_dim_node_out,
    in_dim_hid= int_dim_node_features, in_num_layers= 2, 
    in_lc_norm= False, in_lc_dropout= False
).to(device)

# Print-out model spec.
# Actual layer structure is not same as printed results!
print(model_2GCN_1LIN)

In [None]:
# Let your model works. Hope you have a good GPU. 
# Um...I got my personal RTX3060 12GB. I'd say it's not for Gaming purpose :)...
df_hist_learn_2GCN_1LIN = run_training(model_2GCN_1LIN, 10)

In [None]:
# Check your learning history via last part of the dataframe.
df_hist_learn_2GCN_1LIN.tail(5)

In [None]:
# GNN MODEL IMPORT
# SECOND TRIAL: 2GAT + 1LIN layers, No BatchNorm, No Dropout.
model_2GAT_1LIN = gnn_GAT_CONV_LIN(
    in_dim_x= int_dim_node_features, in_dim_y= int_dim_node_out,
    in_dim_hid= int_dim_node_features, in_num_layers= 2, 
    in_lc_norm= False, in_lc_dropout= False
).to(device)

# Print-out model spec.
# Actual layer structure is not same as printed results!
print(model_2GAT_1LIN)

In [None]:
# Let your model works. Hope you have a good GPU.
df_hist_learn_2GAT_1LIN = run_training(model_2GAT_1LIN, 10)

In [None]:
# Check the result.
df_hist_learn_2GAT_1LIN.tail(5)

In [None]:
# GNN MODEL IMPORT
# THIRD TRIAL: 2GAT + 1LIN layers 0.2 Negative Slope, No BatchNorm, No Dropout.
# Model importing with relevant arguments.
model_2GAT_1LIN_NegSlope = gnn_GAT_CONV_LIN(
    in_dim_x= int_dim_node_features, in_dim_y= int_dim_node_out,
    in_dim_hid= int_dim_node_features, in_neg_slope= 0.2, in_num_layers= 2, 
    in_lc_norm= False, in_lc_dropout= False
).to(device)

# Print-out model spec.
# Actual layer structure is not same as printed results!
print(model_2GAT_1LIN_NegSlope)

In [None]:
# Let your model works. Hope you have a good GPU.
df_hist_learn_2GAT_1LIN_NegSlope = run_training(model_2GAT_1LIN_NegSlope, 10)

In [None]:
# Check the result.
df_hist_learn_2GAT_1LIN_NegSlope.tail(5)

In [None]:
# GNN MODEL IMPORT
# FOURTH TRIAL: 2GCN + 2LIN layers, No BatchNorm, No Dropout.
# Model importing with relevant arguments.
model_2GCN_2LIN = gnn_GCN_CONV_LIN2(
    in_dim_x= int_dim_node_features, in_dim_y= int_dim_node_out,
    in_dim_hid= int_dim_node_features, in_num_layers= 2, 
    in_lc_norm= False, in_lc_dropout= False
).to(device)

# Print-out model spec.
# Actual layer structure is not same as printed results!
print(model_2GCN_2LIN)

In [None]:
# Let your model works. Hope you have a good GPU.
df_hist_learn_2GCN_2LIN = run_training(model_2GCN_2LIN, 10)

In [None]:
# Check the result.
df_hist_learn_2GCN_2LIN.tail(5)

In [None]:
# GNN MODEL IMPORT
# FIFTH TRIAL: 2GATv2 + 1LIN layers, No BatchNorm, No Dropout.
# Model importing with relevant arguments.
model_2GATv2_1LIN = gnn_GATv2_CONV_LIN(
    in_dim_x= int_dim_node_features, in_dim_y= int_dim_node_out,
    in_dim_hid= int_dim_node_features, in_num_layers= 2, 
    in_lc_norm= False, in_lc_dropout= False
).to(device)

# Print-out model spec.
# Actual layer structure is not same as printed results!
print(model_2GATv2_1LIN)

In [None]:
# Let your model works. Hope you have a good GPU.
df_hist_learn_2GATv2_1LIN = run_training(model_2GATv2_1LIN, 10)

In [None]:
# Check the result.
df_hist_learn_2GATv2_1LIN.tail(5)

In [None]:
# Model importing with relevant arguments.
model_3GATv2_1LIN = gnn_GATv2_CONV_LIN(
    in_dim_x= int_dim_node_features, in_dim_y= int_dim_node_out,
    in_dim_hid= int_dim_node_features, in_num_layers= 3, 
    in_lc_norm= False, in_lc_dropout= False
).to(device)

# Print-out model spec.
# Actual layer structure is not same as printed results!
print(model_3GATv2_1LIN)

In [None]:
# Let your model works. Hope you have a good GPU.
df_hist_learn_3GATv2_1LIN = run_training(model_3GATv2_1LIN, 10)

In [None]:
# Check the result.
df_hist_learn_3GATv2_1LIN.tail(5)