In [25]:
!pip install torch torch-geometric scikit-learn numpy



In [26]:
import torch
import torch.nn.functional as F
import torch_geometric
from torch_geometric.nn import GCNConv, GATConv, GINConv
from torch_geometric.datasets import Planetoid, DeezerEurope
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.utils import train_test_split_edges, negative_sampling, subgraph
from sklearn.model_selection import train_test_split

# Load dataset (replace 'CoraFull' with 'DeezerEurope' for the second dataset)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset_name = "CoraFull" # "DeezerEurope"
if dataset_name == "CoraFull":
    dataset = Planetoid(root='CoraFull', name='Cora')
elif dataset_name == "DeezerEurope":
    dataset = DeezerEurope(root='DeezerEurope')

data = dataset[0]
train_ratio, val_ratio, test_ratio = 0.6, 0.2, 0.2
hidden_dim = 64
epochs = 100
learning_rate = 0.01

In [27]:
# Assume `data` is the Data object from your torch geometric dataset.
# train_test_split splits the graph by its nodes.
# The below example code sets test set size to 50% of the graph.
# The split from the doc was 60:20:20. You may or may not keep a validation portion,
# in which case you could have a test set of size 40%.
num_nodes = data.num_nodes
train_nodes, test_nodes = train_test_split(range(num_nodes), test_size=test_ratio, random_state=42)
train_nodes, val_nodes = train_test_split(train_nodes, test_size=val_ratio / (train_ratio + val_ratio), random_state=42)


# The below code produces the induced subgraph of train set nodes.
# In the process, it also produces a separated edge index.
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_nodes] = True
val_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask[val_nodes] = True
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[test_nodes] = True

train_edge_index, _ = subgraph(train_nodes, data.edge_index, relabel_nodes=True)

# If you need the features from only the nodes in the training set,
# use the train mask.
feats = data.x[train_mask]


# The code below will sample non-neighbours from the train portion of the graph.
# The non-neighbour set will be the same size as the training edge index.
# The mask ensures that only the nodes in the training set are used.
neg_edge_index = negative_sampling(edge_index=train_edge_index,
                                   num_nodes=data.num_nodes,
                                   num_neg_samples=train_edge_index.size(1))


# During inference, build a subset of nodes Q from `test_nodes`.
# In this set, each node should be part of atleast one triangle.
# Then you pair each node in Q with each other node in the test set.
# Score and rank for result retrieval.

In [28]:
# GNN Model Definition
class GNNModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, gnn_type="GCN"):
        super(GNNModel, self).__init__()
        if gnn_type == "GCN":
            self.conv1 = GCNConv(in_channels, hidden_channels)
            self.conv2 = GCNConv(hidden_channels, out_channels)
        elif gnn_type == "GAT":
            self.conv1 = GATConv(in_channels, hidden_channels)
            self.conv2 = GATConv(hidden_channels, out_channels)
        elif gnn_type == "GIN":
            self.conv1 = GINConv(torch.nn.Sequential(
                torch.nn.Linear(in_channels, hidden_channels),
                torch.nn.ReLU(),
                torch.nn.Linear(hidden_channels, hidden_channels)
            ))
            self.conv2 = GINConv(torch.nn.Sequential(
                torch.nn.Linear(hidden_channels, out_channels),
                torch.nn.ReLU(),
                torch.nn.Linear(out_channels, out_channels)
            ))
        else:
            raise ValueError("Unsupported GNN type")

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x

In [29]:
def auc_loss(model, data, train_edge_index, neg_edge_index, margin=1.0):
    node_embeddings = model(data.x, train_edge_index)

    # Positive edge scores
    pos_u, pos_v = train_edge_index
    pos_scores = F.cosine_similarity(node_embeddings[pos_u], node_embeddings[pos_v])

    # Negative edge scores
    neg_u, neg_v = neg_edge_index
    neg_scores = F.cosine_similarity(node_embeddings[neg_u], node_embeddings[neg_v])

    # Margin-based ranking loss
    loss = F.relu(margin + neg_scores - pos_scores).mean()
    return loss

In [30]:
def train(model, data, optimizer, train_edge_index):
    model.train()
    optimizer.zero_grad()
    neg_edge_index = negative_sampling(
        edge_index=train_edge_index,
        num_nodes=data.num_nodes,
        num_neg_samples=train_edge_index.size(1),
    )
    loss = auc_loss(model, data, train_edge_index, neg_edge_index)
    loss.backward()
    optimizer.step()
    return loss.item()

In [31]:
def evaluate(edge_index, mask):
    model.eval()
    with torch.no_grad():
        node_embeddings = model(data.x, edge_index)
        u, v = edge_index[:, mask]
        pos_scores = F.cosine_similarity(node_embeddings[u], node_embeddings[v])
        neg_u, neg_v = neg_edge_index
        neg_scores = F.cosine_similarity(node_embeddings[neg_u], node_embeddings[neg_v])
        scores = torch.cat([pos_scores, neg_scores])
        labels = torch.cat([torch.ones(pos_scores.size(0)), torch.zeros(neg_scores.size(0))])
        return F.binary_cross_entropy_with_logits(scores, labels).item()

In [32]:
model = GNNModel(data.num_features, hidden_dim, hidden_dim, gnn_type="GCN").to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Main Training
for epoch in range(1, epochs + 1):
    loss = train(model, data, optimizer, train_edge_index)
    if epoch % 10 == 0:
    #    val_loss = evaluate(data.edge_index, val_mask)
        print(f"Epoch {epoch:2d}, Loss: {loss:.4f}") #, Val Loss: {val_loss:.4f}")

# Final Test
#test_loss = evaluate(data.edge_index, test_mask)
#print(f"Final Test Loss: {test_loss:.4f}")

Epoch 10, Loss: 0.2198
Epoch 20, Loss: 0.1577
Epoch 30, Loss: 0.1316
Epoch 40, Loss: 0.1167
Epoch 50, Loss: 0.1052
Epoch 60, Loss: 0.0949
Epoch 70, Loss: 0.0858
Epoch 80, Loss: 0.0825
Epoch 90, Loss: 0.0755
Epoch 100, Loss: 0.0686
