# Graph Neural Network

Build a Graph Neural Network. 

In [11]:
# import libraries
import os.path as osp
import time

import torch
import torch.nn.functional as F
from torch.nn import Parameter
from tqdm import tqdm

from torch_geometric.datasets import RelLinkPredDataset
from torch_geometric.nn import GAE, GCNConv
import torch
from torch_geometric.nn import NNConv
from sklearn.metrics import roc_auc_score
from torch_geometric.utils import negative_sampling

Define the device to use

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'
print('device:', device)

device: cuda


In [13]:
server_graph_data_filepath = './data/server_graph_data.pth'
graph_data = torch.load(server_graph_data_filepath)
graph_data

  graph_data = torch.load(server_graph_data_filepath)


Data(edge_index=[2, 2830503], edge_attr=[2830503, 47], num_nodes=19129)

In [14]:
class Net(torch.nn.Module):
    def __init__(self, num_nodes, edge_feature_dim, hidden_channels, out_channels):
        super().__init__()
        # Learnable node embeddings
        self.embedding = torch.nn.Embedding(num_nodes, hidden_channels)

        # Neural networks for edge feature transformation
        nn1 = torch.nn.Sequential(
            torch.nn.Linear(edge_feature_dim, hidden_channels * hidden_channels)
        )
        self.conv1 = NNConv(
            in_channels=hidden_channels,
            out_channels=hidden_channels,
            nn=nn1,
            aggr='mean'  # Aggregation method: 'add', 'mean', 'max'
        )

        nn2 = torch.nn.Sequential(
            torch.nn.Linear(edge_feature_dim, hidden_channels * out_channels)
        )
        self.conv2 = NNConv(
            in_channels=hidden_channels,
            out_channels=out_channels,
            nn=nn2,
            aggr='mean'
        )

    def encode(self, edge_index, edge_attr):
        # Initialize node features as embeddings
        x = self.embedding.weight  # Shape: [num_nodes, hidden_channels]

        # First NNConv layer with ReLU activation
        x = self.conv1(x, edge_index, edge_attr).relu()

        # Second NNConv layer
        x = self.conv2(x, edge_index, edge_attr)

        return x  # Node embeddings

    def decode(self, z, edge_label_index):
        # Similarity scores for edge pairs
        return (z[edge_label_index[0]] * z[edge_label_index[1]]).sum(dim=-1)

    def decode_all(self, z):
        # Predict all possible edges (may be memory-intensive for large graphs)
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()

In [15]:

# Assume 'data' is your graph data object
# Move data to device
edge_index = graph_data.edge_index.to(device)
edge_attr = graph_data.edge_attr.to(device)
num_nodes = graph_data.num_nodes

# Positive edge indices (existing edges)
pos_edge_index = edge_index

# Negative edge indices (non-existing edges)
neg_edge_index = negative_sampling(
    edge_index=edge_index,
    num_nodes=num_nodes,
    num_neg_samples=pos_edge_index.size(1),
    method='sparse'
).to(device)

In [16]:
# Combine positive and negative samples
edge_label_index = torch.cat([pos_edge_index, neg_edge_index], dim=1).to(device)

# Create edge labels: 1 for positive edges, 0 for negative edges
edge_label = torch.cat([
    torch.ones(pos_edge_index.size(1), device=device),
    torch.zeros(neg_edge_index.size(1), device=device)
])

In [17]:

# Instantiate the model and move it to device
hidden_channels = 32  # Hyperparameter
out_channels = 32     # Embedding size
edge_feature_dim = graph_data.edge_attr.size(1)  # 47 in your case

model = Net(num_nodes, edge_feature_dim, hidden_channels, out_channels).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

In [None]:
# Training process
model.train()
epochs = 1000  # Adjust the number of epochs as needed
for epoch in range(epochs):
    optimizer.zero_grad()

    # Encode node embeddings
    z = model.encode(edge_index, edge_attr)

    # Decode edge scores
    edge_scores = model.decode(z, edge_label_index).view(-1)

    # Compute loss
    loss = criterion(edge_scores, edge_label)

    # Backpropagation
    loss.backward()
    optimizer.step()

    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

In [21]:
# Evaluation
from sklearn.metrics import roc_auc_score

model.eval()
with torch.no_grad():
    z = model.encode(edge_index, edge_attr)
    edge_scores = model.decode(z, edge_label_index).view(-1)
    edge_probs = torch.sigmoid(edge_scores)

    # Move data to CPU for evaluation
    edge_label_cpu = edge_label.cpu().numpy()
    edge_probs_cpu = edge_probs.cpu().numpy()

    # Compute ROC AUC
    auc = roc_auc_score(edge_label_cpu, edge_probs_cpu)
    print(f'ROC AUC: {auc:.4f}')

ROC AUC: 0.5238
