In [None]:
import graph_generator as gg
import numpy as np
from torch_geometric.data import DataLoader
from torch_geometric.nn import NNConv
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
from tqdm import trange
import copy

%load_ext autoreload
%autoreload 2

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
print("PyTorch has version {}".format(torch.__version__))
print('Using device:', device)

In [None]:
m = 8; n = 8; num = 10

config = {
    'graph_type': 'GEOM',
    'threshold': 0.5,
    'scaling': 1/ np.sqrt(2)
}

train_dataset = DataLoader(gg.generate_examples(num, m, n, [0.75] * m, **config), shuffle=True)
test_dataset = DataLoader(gg.generate_examples(num, m, n, [0.75] * m, **config), shuffle=True)

In [None]:
class OBM_NNConv(torch.nn.Module):
    """
    GNN to predict node-level embeddings. Then applies a post-message passing layer to transform into the output
    dimension.
    Part of the code definition is inspired by Colab 2:
    https://colab.research.google.com/drive/1xHmpjVO-Z74NK-dH3qoUBTf-tKUPfOKW?usp=sharing

    The main model used for convolutions is NNConv from the "Dynamic Edge-Conditioned Filters in Convolutional Neural
    Networks on Graphs" <https://arxiv.org/abs/1704.02901> paper
    """

    def __init__(self, input_dim, output_dim, edge_feature_dim, args):
        """
        Initializing the GNN
        Args:
            input_dim: dimension of node features
            output_dim: output dimension required
            edge_feature_dim: dimension of the edge features
            args: object containing the rest of the GNN description, including the number of layers, dropout, ...
        """
        super(OBM_NNConv, self).__init__()

        hidden_dim = args.hidden_dim
        self.dropout = args.dropout

        self.forward_conv = NNConv(input_dim, hidden_dim, nn.Linear(edge_feature_dim, input_dim * hidden_dim), aggr='max')
        self.backward_conv = NNConv(hidden_dim, output_dim, nn.Linear(edge_feature_dim, output_dim * hidden_dim), aggr='max')
        self.batch_norm = nn.BatchNorm1d(hidden_dim)
        self.regression_head = torch.nn.Linear(output_dim, 1)

    def reset_parameters(self):
        self.forward_conv.reset_parameters()
        self.backward_conv.reset_parameters()
        self.batch_norm.reset_parameters()
        self.regression_head.reset_parameters()

    def forward(self, x, edge_index, edge_attr):
        x = self.forward_conv(x, edge_index, edge_attr)
        x = F.relu(x)
        x = F.dropout(x, self.dropout, self.training)
        x = self.backward_conv(x, edge_index, edge_attr)
        x = F.relu(x)
        return self.regression_head(x)

In [None]:
class MaskedMSELoss(nn.Module):

    def __init__(self):
        super(MaskedMSELoss, self).__init__()

    def forward(self, pred, value_to_go, neighbor_mask):
        """
        Computes MSE over neighbors of the arriving node.
        Args:
            pred: predicted node embeddings
            value_to_go: array of underlying value to gos
            neighbor_mask: mask for neighbors of arriving node

        Returns:
            Masked mean square error.
        """
        
        return F.mse_loss(pred[neighbor_mask].squeeze(dim=1), value_to_go)
    

def build_optimizer(args, params):
    """
    Builds an optimizer according to the given parameters.
    """

    weight_decay = args.weight_decay
    filter_fn = filter(lambda p: p.requires_grad, params)
    if args.opt == 'adam':
        optimizer = optim.Adam(filter_fn, lr = args.lr, weight_decay = weight_decay)
    elif args.opt == 'sgd':
        optimizer = optim.SGD(filter_fn, lr = args.lr, momentum = 0.95, weight_decay = weight_decay)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(filter_fn, lr = args.lr, weight_decay = weight_decay)
    elif args.opt == 'adagrad':
        optimizer = optim.Adagrad(filter_fn, lr = args.lr, weight_decay = weight_decay)
    if args.opt_scheduler == 'none':
        return None, optimizer
    elif args.opt_scheduler == 'step':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = args.opt_decay_step, gamma = args.opt_decay_rate)
    elif args.opt_scheduler == 'cos':
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = args.opt_restart)
    return scheduler, optimizer


class objectview(object):
    def __init__(self, d):
        self.__dict__ = d

# args defines the model and training hyperparameters
args = {
    'batch_size':    1,
    'hidden_dim':    32,
    'heads':         4,
    'dropout':       0.5,
    'epochs':        100,
    'opt':           'adam',
    'opt_scheduler': 'none',
    'opt_restart':   0,
    'weight_decay':  5e-3,
    'lr':            0.001
}
args = objectview(args)

In [None]:
def test(loader, test_model, loss_fn):
    test_model.eval()
    test_model.to(device)
    total_loss = 0

    for batch in loader:
        batch.to(device)
        with torch.no_grad():
            pred = test_model(batch.x, batch.edge_index, batch.edge_attr)
            loss = loss_fn(pred, batch.hint, batch.neighbors)
            total_loss += loss * batch.num_graphs

    total_loss /= len(loader.dataset)

    return total_loss

In [None]:
def train(train_loader: DataLoader, test_loader: DataLoader, args: dict):
    """
    Trains a GNN model, periodically testing it and accumulating loss values
    Args:
        args: dictionary object containing training parameters
    """

    # Input dimension is 1 (we only have demand information for every node)
    # Edge feature dimension is 2 (capacity and cost per edge)
    # Output dimension is 1 since we predict scalar potential values for each vertex
    model = OBM_NNConv(4, 1, 1, args)
    loss_fn = MaskedMSELoss()

    _, opt = build_optimizer(args, model.parameters())
    model.to(device)

    # accumulate model performance for plotting
    train_losses = []
    test_losses = []
    best_loss = None
    best_model = None

    for epoch in trange(args.epochs, desc = "Training", unit = "Epochs"):
        total_loss = 0
        model.train()

        for batch in train_loader:
            batch.to(device)
            opt.zero_grad()
            pred = model(batch.x, batch.edge_index, batch.edge_attr)
            loss = loss_fn(pred, batch.hint, batch.neighbors)
            loss.backward()
            opt.step()

            total_loss += loss.item() * batch.num_graphs
        total_loss /= len(train_loader.dataset)
        train_losses.append(total_loss)

        if epoch % 10 == 0:
            test_loss = test(test_loader, model, loss_fn)
            test_losses.append(test_loss)
            if best_loss is None or test_loss < best_loss:
                best_loss = test_loss
                best_model = copy.deepcopy(model)
        else:
            test_losses.append(test_losses[-1])

    return train_losses, test_losses, best_model, best_loss

In [None]:
train(train_dataset, test_dataset, args)