Benchmarks QM9, Mutag, Zinc 

Mutag 
- Classification: The best model currently is catboost (primes to paths) 

In [None]:
import numpy as np
import pandas as pd
from scipy import sparse

import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.utils import from_scipy_sparse_matrix
from torch_geometric.data import Data
from torch_geometric.transforms import RandomNodeSplit
from torch_geometric.nn import GATConv

In [None]:
# 🟦 YOUR DATA -----------------------------------------------------------------
node_dict       = ...  # {node_id: DataFrame with feature columns + 'y' column}
adjacency       = ...  # 2-D numpy array or DataFrame with 0/1 values
# -----------------------------------------------------------------------------


# 1 a)  Gather features and labels in a reproducible node order
node_ids   = np.array(sorted(node_dict.keys()))          # [N]
features   = np.vstack([node_dict[n].drop('y', axis=1).values[0]
                        for n in node_ids])              # [N, F]
labels     = np.array([node_dict[n]['y'].iloc[0] for n in node_ids])  # [N]

num_nodes, num_features = features.shape
num_classes  = len(np.unique(labels))

# 1 b)  Build the edge list  (PyG wants COO format)
adj = np.asarray(adjacency)                              # in case it’s a DF
edge_index, _ = from_scipy_sparse_matrix(
    sparse.csr_matrix(adj))                              # shape [2, E]

# 1 c)  Create the PyG Data object
data = Data(
    x=torch.tensor(features, dtype=torch.float),
    y=torch.tensor(labels,    dtype=torch.long),
    edge_index=edge_index
)

# 1 d)  Add train/val/test masks  (80 / 10 / 10 split here)
# data = RandomNodeSplit(num_val=0.1, num_test=0.1)(data)
# print(data)

In [None]:
def sequential_split(data, test_ratio: float = 0.10):
    """
    Puts the *last* `test_ratio` fraction of nodes into `data.test_mask`
    and the rest into `data.train_mask`.  (No validation set created.)
    """
    N = data.num_nodes
    num_test = int(round(N * test_ratio))

    data.train_mask = torch.zeros(N, dtype=torch.bool)
    data.test_mask  = torch.zeros(N, dtype=torch.bool)

    data.test_mask[-num_test:] = True       # last 10 %
    data.train_mask[:-num_test] = True      # everything before that
    return data


# --- usage --------------------------------------------------------------
data = sequential_split(data, test_ratio=0.10)
print(data.train_mask.sum(), data.test_mask.sum())

In [None]:
class GAT(torch.nn.Module):
    def __init__(self,
                 in_channels:  int,
                 hidden_channels: int,
                 out_channels: int,
                 heads: int = 8):
        super().__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads)
        self.conv2 = GATConv(hidden_channels * heads,
                             out_channels,
                             heads=1)

    def forward(self, x, edge_index):
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GAT(in_channels=num_features,
            hidden_channels=16,
            out_channels=num_classes,
            heads=8)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
@torch.no_grad()
def accuracy(data, mask):
    model.eval()
    out  = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)
    return (pred[mask] == data.y[mask]).float().mean().item()

def train(data):
    model.train()
    optimizer.zero_grad()
    out  = model(data.x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

# ---- main loop ----
for epoch in range(1, 201):
    loss = train(data)
    val_acc  = accuracy(data, data.val_mask)
    test_acc = accuracy(data, data.test_mask)
    if epoch % 10 == 0 or epoch == 1:
        print(f'Epoch {epoch:03d} | loss {loss:.4f} | '
              f'val {val_acc:.3f} | test {test_acc:.3f}')