# Evaluating Siamese GNNs on Synthetic Graph Edit Distance Tasks

## Initialize and load model

We first load a particular checkpoint of interest.

In [8]:
from model import GraphMatchingNetwork, GraphConvolutionNetwork
import torch

# Load checkpoint (full)
# filename = f'./checkpoints/gcn_frosty-tree-7.pth.tar'
# filename = f'./checkpoints/gmn_glad-bird-6.pth.tar'
# filename = f'./checkpoints/gmn_rose-durian-8.pth.tar'
# filename = f'./checkpoints/gcn_gentle-universe-26.pth.tar'
filename = f'./checkpoints/gmn_ancient-wildflower-24.pth.tar'
checkpoint = torch.load(filename, map_location=lambda storage,
                        loc: storage.cuda() if torch.cuda.is_available() else storage.cpu())

# Retrieve state dict
state_dict = checkpoint['state_dict']
cfg = checkpoint['config']

# Initialize model
if cfg.model.name == 'gmn':
    model = GraphMatchingNetwork(cfg)
elif cfg.model.name == 'gcn':
    model = GraphConvolutionNetwork(cfg)
else: raise ValueError("Model should be either GMN or GCN")

# Load state dict
model.load_state_dict(state_dict)  # should output "<All keys matched successfully>"

# from omegaconf import OmegaConf
#
# print(OmegaConf.create(cfg))

<All keys matched successfully>

## Fixed Triplet Dataset

In [7]:
from utils import load_pickle

# Fixed dataset settings
num_nodes = 10
kp = 1
kn = 2
pe = 0.2

# Load dataset
dataset = load_pickle(f"./data/FixedDatasetGED_nodes={num_nodes}_kp={kp}_kn={kn}_pe={pe}.pickle")

## Triplet accuracy

Here we compute the triplet accuracy. </br>


In [22]:
from utils import reshape_and_split_tensors
from loss import triplet_loss
from metrics import euclidean_distance
from tqdm.auto import tqdm
import numpy as np

# Set model to eval model
model.eval()

for num_nodes in [8]:

    # Fixed dataset settings
    kp = 1
    kn = 2
    pe = 0.2

    # Load dataset
    dataset = load_pickle(f"./data/FixedDatasetGED_nodes={num_nodes}_kp={kp}_kn={kn}_pe={pe}.pickle")
    dataset = dataset[:100]

    # Loop through data
    rel_distances = []
    losses = []
    for triplet in tqdm(dataset, total=len(dataset)):

        # Prepare the data
        edge_index = triplet['edge_index']  # edge index
        node_feats = torch.ones(triplet.num_nodes, cfg.model.node_dim)  # node features to all-ones
        edge_feats = torch.ones(triplet.num_edges, cfg.model.edge_dim)  # edge features to all-ones
        batch_id = triplet['order']

        # Feedforward
        _, graph_feats = model(edge_index, x1=node_feats, x2=None, edge_feats=edge_feats, batch=batch_id)
        graph_feats = reshape_and_split_tensors(graph_feats, 4)

        # Get accuracy measure
        rel_distances.append(euclidean_distance(graph_feats[0], graph_feats[1]) \
                   - euclidean_distance(graph_feats[2], graph_feats[3]))

        # Get loss
        losses.append(triplet_loss(*graph_feats, cfg).detach().numpy())

    # print(rel_distances)
    losses = np.mean(losses)
    corrects = torch.sum(torch.tensor(rel_distances) < 0)
    accuracy = corrects / len(dataset)

    print(f"Performance (nodes={num_nodes} kp={kp} kn={kn} pe={pe}):\n"
          f"\tAccuracy:\t{accuracy}\n"
          f"\tLosses:\t\t{losses}\n")

  0%|          | 0/100 [00:00<?, ?it/s]

Performance (nodes=8 kp=1 kn=2 pe=0.2):
	Accuracy:	1.0
	Losses:		0.0



**Looks weird ?? Why are the scores so off?**

We will try it using the other dataloader

In [5]:
from utils import reshape_and_split_tensors
from loss import triplet_loss
from metrics import euclidean_distance
from tqdm.auto import tqdm
import numpy as np
from data import TripletDatasetGED
from torch_geometric.loader import DataLoader
from utils import AverageMeter
from model import GraphMatchingNetwork, GraphConvolutionNetwork
import torch

filename = f'./checkpoints/gcn_feasible-wave-29.pth.tar'
checkpoint = torch.load(filename, map_location=lambda storage,
                        loc: storage.cuda() if torch.cuda.is_available() else storage.cpu())

# Retrieve state dict
state_dict = checkpoint['state_dict']
cfg = checkpoint['config']

# Initialize model
if cfg.model.name == 'gmn':
    model = GraphMatchingNetwork(cfg)
elif cfg.model.name == 'gcn':
    model = GraphConvolutionNetwork(cfg)
else: raise ValueError("Model should be either GMN or GCN")

# Load state dict
model.load_state_dict(state_dict)  # should output "<All keys matched successfully>"

# from omegaconf import OmegaConf
#
# print(OmegaConf.create(cfg))

# Fixed dataset settings
for num_nodes, kp, kn, pe in [[10, 1, 2, 0.2],
                              [10, 2, 3, 0.2],
                              [20, 1, 2, 0.2],
                              [20, 2, 3, 0.2],
                              [50, 1, 2, 0.2],
                              [50, 2, 3, 0.2],
                              [20, 1, 2, 0.5],
                              [20, 2, 3, 0.5],
                              [50, 1, 2, 0.5],
                              [50, 2, 3, 0.5]]:

    # Dataloader and size
    N = 50000
    bs = 20
    size = int(np.ceil(N / bs))

    # Initialize dataset and loader
    dataset = TripletDatasetGED(size=size, num_nodes=num_nodes,
                                kp=kp, kn=kn, pe=pe,
                                permute=True)
    dataloader = DataLoader(dataset, batch_size=bs, shuffle=True)

    # Set model to eval mode
    model.eval()

    with torch.no_grad():

        # Loop through data
        val_loss = AverageMeter()
        val_accs = AverageMeter()

        for batch in tqdm(dataloader, total=len(dataloader)):

            # Prepare the data
            edge_index = batch['edge_index']  # edge index
            node_feats = torch.ones(batch.num_nodes, cfg.model.node_dim)  # node features to all-ones
            edge_feats = torch.ones(batch.num_edges, cfg.model.edge_dim)  # edge features to all-ones
            batch_id = batch['order'] + 4 * batch['batch']

            # Feedforward
            _, graph_feats = model(edge_index, x1=node_feats, x2=None, edge_feats=edge_feats, batch=batch_id)
            graph_feats = reshape_and_split_tensors(graph_feats, 4)

            # Compute and append loss
            losses = triplet_loss(*graph_feats, cfg)

            loss = losses.mean()
            val_loss.update(loss.item())

            # Performance (accuracy)
            rel_distance = euclidean_distance(graph_feats[0], graph_feats[1]) \
                       - euclidean_distance(graph_feats[2], graph_feats[3])
            corrects = torch.sum(rel_distance < 0)
            val_accs.update(corrects.item() / bs)

        print(f"Train:\t----- "
              f"N={cfg.data.num_nodes}"
              f"\tkp={cfg.data.kp}"
              f"\tkn={cfg.data.kn}"
              f"\tpe={cfg.data.pe}")
        print(f"Eval:\t----- "
              f"N={num_nodes}"
              f"\tkp={kp}"
              f"\tkn={kn}"
              f"\tpe={pe}")
        print(f"Performance:\n"
              f"\tAcc:\t{val_accs.avg:.5f}\n"
              f"\tLoss:\t{val_loss.avg:.5f}\n")

  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=10	kp=1	kn=2	pe=0.2
Performance:
	Acc:	0.76640
	Loss:	0.56921



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=10	kp=2	kn=3	pe=0.2
Performance:
	Acc:	0.60560
	Loss:	1.10243



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=20	kp=1	kn=2	pe=0.2
Performance:
	Acc:	0.90520
	Loss:	0.24722



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=20	kp=2	kn=3	pe=0.2
Performance:
	Acc:	0.74880
	Loss:	0.60317



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=50	kp=1	kn=2	pe=0.2
Performance:
	Acc:	0.74640
	Loss:	0.93708



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=50	kp=2	kn=3	pe=0.2
Performance:
	Acc:	0.64640
	Loss:	0.93997



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=20	kp=1	kn=2	pe=0.5
Performance:
	Acc:	0.70600
	Loss:	0.75921



  0%|          | 0/125 [00:00<?, ?it/s]

Train:	----- N=[10, 20]	kp=1	kn=2	pe=0.2
Eval:	----- N=20	kp=2	kn=3	pe=0.5
Performance:
	Acc:	0.59760
	Loss:	0.88581



  0%|          | 0/125 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Users\caspervanengel\Anaconda3\envs\pytorch-geom\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\caspervanengel\AppData\Local\Temp\ipykernel_12956\1906423538.py", line 66, in <module>
    for batch in tqdm(dataloader, total=len(dataloader)):
  File "c:\Users\caspervanengel\Anaconda3\envs\pytorch-geom\lib\site-packages\tqdm\notebook.py", line 254, in __iter__
    for obj in it:
  File "c:\Users\caspervanengel\Anaconda3\envs\pytorch-geom\lib\site-packages\tqdm\std.py", line 1178, in __iter__
    for obj in iterable:
  File "c:\Users\caspervanengel\Anaconda3\envs\pytorch-geom\lib\site-packages\torch\utils\data\dataloader.py", line 628, in __next__
    data = self._next_data()
  File "c:\Users\caspervanengel\Anaconda3\envs\pytorch-geom\lib\site-packages\torch\utils\data\dataloader.py", line 671, in _next_data
    data = self._dataset_fetcher.fetch