# Evaluating Siamese GNNs on Synthetic Graph Edit Distance Tasks

## Initialize and load model

We first load a particular checkpoint of interest.

In [49]:
from model import GraphMatchingNetwork, GraphConvolutionNetwork
import torch

# Load checkpoint (full)
# filename = f'./checkpoints/gcn_frosty-tree-7.pth.tar'
# filename = f'./checkpoints/gmn_glad-bird-6.pth.tar'
filename = f'./checkpoints/gmn_rose-durian-8.pth.tar'
checkpoint = torch.load(filename, map_location=lambda storage,
                        loc: storage.cuda() if torch.cuda.is_available() else storage.cpu())

# Retrieve state dict
state_dict = checkpoint['state_dict']
cfg = checkpoint['config']

# Initialize model
if cfg.model.name == 'gmn':
    model = GraphMatchingNetwork(cfg)
elif cfg.model.name == 'gcn':
    model = GraphConvolutionNetwork(cfg)
else: raise ValueError("Model should be either GMN or GCN")

# Load state dict
model.load_state_dict(state_dict)  # should output "<All keys matched successfully>"

<All keys matched successfully>

## Fixed Triplet Dataset

In [7]:
from utils import load_pickle

# Fixed dataset settings
num_nodes = 10
kp = 1
kn = 2
pe = 0.2

# Load dataset
dataset = load_pickle(f"./data/FixedDatasetGED_nodes={num_nodes}_kp={kp}_kn={kn}_pe={pe}.pickle")

## Triplet accuracy

Here we compute the triplet accuracy. </br>


In [22]:
from utils import reshape_and_split_tensors
from loss import triplet_loss
from metrics import euclidean_distance
from tqdm.auto import tqdm
import numpy as np

# Set model to eval model
model.eval()

for num_nodes in [8]:

    # Fixed dataset settings
    kp = 1
    kn = 2
    pe = 0.2

    # Load dataset
    dataset = load_pickle(f"./data/FixedDatasetGED_nodes={num_nodes}_kp={kp}_kn={kn}_pe={pe}.pickle")
    dataset = dataset[:100]

    # Loop through data
    rel_distances = []
    losses = []
    for triplet in tqdm(dataset, total=len(dataset)):

        # Prepare the data
        edge_index = triplet['edge_index']  # edge index
        node_feats = torch.ones(triplet.num_nodes, cfg.model.node_dim)  # node features to all-ones
        edge_feats = torch.ones(triplet.num_edges, cfg.model.edge_dim)  # edge features to all-ones
        batch_id = triplet['order']

        # Feedforward
        _, graph_feats = model(edge_index, x1=node_feats, x2=None, edge_feats=edge_feats, batch=batch_id)
        graph_feats = reshape_and_split_tensors(graph_feats, 4)

        # Get accuracy measure
        rel_distances.append(euclidean_distance(graph_feats[0], graph_feats[1]) \
                   - euclidean_distance(graph_feats[2], graph_feats[3]))

        # Get loss
        losses.append(triplet_loss(*graph_feats, cfg).detach().numpy())

    # print(rel_distances)
    losses = np.mean(losses)
    corrects = torch.sum(torch.tensor(rel_distances) < 0)
    accuracy = corrects / len(dataset)

    print(f"Performance (nodes={num_nodes} kp={kp} kn={kn} pe={pe}):\n"
          f"\tAccuracy:\t{accuracy}\n"
          f"\tLosses:\t\t{losses}\n")

  0%|          | 0/100 [00:00<?, ?it/s]

Performance (nodes=8 kp=1 kn=2 pe=0.2):
	Accuracy:	1.0
	Losses:		0.0



**Looks weird ?? Why are the scores so off?**

We will try it using the other dataloader

In [67]:
from data import TripletDatasetGED
from torch_geometric.loader import DataLoader

# Load checkpoint (full)
# filename = f'./checkpoints/gcn_frosty-tree-7.pth.tar'
# filename = f'./checkpoints/gmn_glad-bird-6.pth.tar'
# filename = f'./checkpoints/gmn_rose-durian-8.pth.tar'
filename = f'./checkpoints/gmn_decent-smoke-17.pth.tar'
checkpoint = torch.load(filename, map_location=lambda storage,
                        loc: storage.cuda() if torch.cuda.is_available() else storage.cpu())

# Retrieve state dict
state_dict = checkpoint['state_dict']
cfg = checkpoint['config']

# Initialize model
if cfg.model.name == 'gmn':
    model = GraphMatchingNetwork(cfg)
elif cfg.model.name == 'gcn':
    model = GraphConvolutionNetwork(cfg)
else: raise ValueError("Model should be either GMN or GCN")

# Load state dict
model.load_state_dict(state_dict)  # should output "<All keys matched successfully>"

# Fixed dataset settings
for num_nodes, kp, kn, pe in [[20, 1, 2, 0.2],
                              [20, 2, 3, 0.2],
                              [50, 1, 2, 0.2],
                              [50, 2, 3, 0.2],
                              [20, 1, 2, 0.5],
                              [20, 2, 3, 0.5],
                              [50, 1, 2, 0.5],
                              [50, 2, 3, 0.5]]:

    # Dataloader and size
    N = 50000
    bs = 20
    size = int(np.ceil(N / bs))

    # Initialize dataset and loader
    dataset = TripletDatasetGED(size=size, num_nodes=num_nodes,
                                kp=kp, kn=kn, pe=pe,
                                permute=True)
    dataloader = DataLoader(dataset, batch_size=bs, shuffle=True)

    with torch.no_grad():

        # Set model to eval mode
        model.eval()

        # Loop through data
        rel_distances = []
        losses = []
        for batch in tqdm(dataloader, total=len(dataloader)):

            # Prepare the data
            edge_index = batch['edge_index']  # edge index
            node_feats = torch.ones(batch.num_nodes, cfg.model.node_dim)  # node features to all-ones
            edge_feats = torch.ones(batch.num_edges, cfg.model.edge_dim)  # edge features to all-ones
            batch_id = batch['order'] + 4 * batch['batch']

            # Feedforward
            _, graph_feats = model(edge_index, x1=node_feats, x2=None,
                                   edge_feats=edge_feats, batch=batch_id)
            graph_feats = reshape_and_split_tensors(graph_feats, 4)

            # Compute and append (relative) distance
            d12 = euclidean_distance(graph_feats[0], graph_feats[1])
            d13 = euclidean_distance(graph_feats[2], graph_feats[3])
            drel = d12 - d13  # relative distance
            rel_distances.extend(drel.tolist())  #

            # Compute and append loss
            loss = triplet_loss(*graph_feats, cfg).numpy()
            losses.extend(loss.tolist())

        # print(rel_distances)
        losses = np.mean(losses)
        corrects = np.sum(np.array(rel_distances) < 0)
        accuracy = corrects / len(dataset)

        print(f"Train:\t----- "
              f"N={cfg.data.num_nodes}"
              f"\tkp={cfg.data.kp}"
              f"\tkn={cfg.data.kn}"
              f"\tpe={cfg.data.pe}")
        print(f"Eval:\t----- "
              f"N={num_nodes}"
              f"\tkp={kp}"
              f"\tkn={kn}"
              f"\tpe={pe}")
        print(f"Performance:\n"
              f"\tAcc:\t{accuracy:.5f}\n"
              f"\tLoss:\t{losses:.5f}\n")

  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=20	kp=1	kn=2	pe=0.2
Performance:
	Acc:	0.61160
	Loss:	1.05898



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=20	kp=2	kn=3	pe=0.2
Performance:
	Acc:	0.61880
	Loss:	1.03965



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=50	kp=1	kn=2	pe=0.2
Performance:
	Acc:	0.87640
	Loss:	0.43942



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=50	kp=2	kn=3	pe=0.2
Performance:
	Acc:	0.87440
	Loss:	0.45939



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=20	kp=1	kn=2	pe=0.5
Performance:
	Acc:	0.85800
	Loss:	0.42660



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=20	kp=2	kn=3	pe=0.5
Performance:
	Acc:	0.87480
	Loss:	0.36041



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=50	kp=1	kn=2	pe=0.5
Performance:
	Acc:	0.45560
	Loss:	1.00548



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[20, 50]	kp=1	kn=2	pe=[0.2, 0.5]
Eval:	----- N=50	kp=2	kn=3	pe=0.5
Performance:
	Acc:	0.45680
	Loss:	1.00446

