# Install dependencies

In [2]:
!pip install torch torchvision torchaudio
!pip install torch-geometric

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

# Importing Libraries

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.nn import GCNConv

* torch → deep learning framework

* torch_geometric → graph neural network (GNN) library on PyTorch

* Planetoid → loads Cora, Citeseer, PubMed dataset

* RandomLinkSplit → splits graph edges into train, val, test for link prediction

* GCNConv → the GCN layer

# Loading the Cora dataset

In [4]:
dataset = Planetoid(root="data/Cora", name="Cora")
data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


* Downloads the Cora citation network

* `data` contains:

    * node features `x`

    * edges `edge_index`

    * labels (not used here)

    * etc.

# Splitting data for link prediction

In [5]:
transform = RandomLinkSplit(
    num_val=0.05,
    num_test=0.1,
    is_undirected=True,
    add_negative_train_samples=True
)
train_data, val_data, test_data = transform(data)


This takes the original graph and produces:

Train:


* positive edges
* negative edges
* `edge_label_index`
* `edge_label`


Val/Test:

* same as above, but without training edges

This prepares the graph for **link prediction**, not node classification.

# GCN Encoder (creates node embeddings)

In [6]:
class GCNEncoder(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

Two-layer GCN:


1. First layer → hidden representation
2. Second layer → final embedding `z`


**Forward pass**

In [7]:
def forward(self, x, edge_index):
    x = F.relu(self.conv1(x, edge_index))
    x = self.conv2(x, edge_index)
    return x


* Apply GCN
* Apply ReLU
* Compute final embeddings


Output: `z` where each node becomes a vector.

# Link Predictor (decoder)

In [8]:
class DotProductPredictor(nn.Module):
    def forward(self, z, edge_index):
        src = z[edge_index[0]]
        dst = z[edge_index[1]]
        return (src * dst).sum(dim=1)

Given:


* embeddings `z`
* node pairs in edge_index


It computes:

In [9]:
score = dot(z_src, z_dst)

NameError: name 'dot' is not defined

High score → likely link
Low score → no link

# Initialize model & optimizer

In [10]:
model = GCNEncoder(dataset.num_node_features, 64, 32)
predictor = DotProductPredictor()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

* Encoder: 64-dim hidden, 32-dim embedding

* Optimizer: Adam

# Loss function

In [11]:
def get_loss(z, edge_label_index, edge_label):
    pred = predictor(z, edge_label_index)
    return F.binary_cross_entropy_with_logits(pred, edge_label.float())

* pred is raw dot-product score (logit)

* Use binary cross-entropy for link prediction

* Labels: 1 (edge exists), 0 (no edge)

# Evaluation (AUC and AP metrics)

In [None]:
@torch.no_grad()
def evaluate(data, z):
    pred = torch.sigmoid(predictor(z, data.edge_label_index))
    labels = data.edge_label


* Computes predictions on val/test edges
* sigmoid converts logits → probabilities
* AUC, AP from sklearn

# Training loop

In [12]:
for epoch in range(1, 201):
    model.train()
    optimizer.zero_grad()

    z = model(train_data.x, train_data.edge_index)

NotImplementedError: Module [GCNEncoder] is missing the required "forward" function

1. Compute node embeddings
2. Compute los
3. sBackprop & update weights

In [None]:
    loss = get_loss(
        z,
        train_data.edge_label_index,
        train_data.edge_label
    )


In [None]:
    loss.backward()
    optimizer.step()


Every 20 epochs, print validation metrics:

In [None]:
if epoch % 20 == 0:
    model.eval()
    z = model(train_data.x, train_data.edge_index)

    val_auc, val_ap = evaluate(val_data, z)

    print(...)

# Final testing

In [None]:
model.eval()
z = model(train_data.x, train_data.edge_index)

test_auc, test_ap = evaluate(test_data, z)

This evaluates on test edges that the model never saw.

# Final output