<a href="https://colab.research.google.com/github/apester/Awesome-Graph-Neural-Networks/blob/master/GNN_CNN_RNN_on_Cora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Setup: Cora + Message Passing GNN
This uses PyTorch + PyTorch Geometric.

In [None]:
!pip install torch torchvision torchaudio
!pip install torch-geometric torch-scatter torch-sparse torch-cluster

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.datasets import Planetoid
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops

# 1) Load Cora citation network
dataset = Planetoid(root="data/Planetoid", name="Cora")
data = dataset[0]  # one graph: data.x, data.edge_index, data.y, train/val/test masks

print(data)
# data.x: [N, F] node features
# data.edge_index: [2, E] edges (source, target indices)
# data.y: [N] labels (7 classes)


2. Custom Message-Passing Layer
This is the core didactic part: message → aggregate → update.


In [None]:
class SimpleGNNLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        # 'add' aggregation: sums messages from neighbors
        super().__init__(aggr="add")
        self.lin = nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index):
        """
        x: [N, F_in] node features
        edge_index: [2, E] graph edges (source -> target)
        """
        # (optional) add self-loops so nodes send messages to themselves
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        # 1) Linear transform (local feature update before messaging)
        x = self.lin(x)
        # 2) Start message passing: calls message -> aggregate -> update
        return self.propagate(edge_index=edge_index, x=x)

    def message(self, x_j):
        """
        x_j: features of neighbor nodes j for each edge (i <- j)
        This defines the "message" sent along each edge.
        """
        return x_j  # simplest choice: just send neighbor features

    def update(self, aggr_out):
        """
        aggr_out: aggregated messages for each node (sum over neighbors).
        This defines how we combine the aggregated messages into new node states.
        """
        return F.relu(aggr_out)


3. Full GNN Model + Training
Two layers of message passing; node classification on Cora.

In [None]:
class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SimpleGNNLayer(in_channels, hidden_channels)
        self.conv2 = SimpleGNNLayer(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)       # 1-hop aggregation
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)       # 2-hop aggregation
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = GNN(
    in_channels=dataset.num_node_features,
    hidden_channels=16,
    out_channels=dataset.num_classes
).to(device)

data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return float(loss)

@torch.no_grad()
def test():
    model.eval()
    logits = model(data.x, data.edge_index)
    preds = logits.argmax(dim=-1)

    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = (preds[mask] == data.y[mask]).sum()
        accs.append(float(correct) / int(mask.sum()))
    return accs  # [train_acc, val_acc, test_acc]

for epoch in range(1, 201):
    loss = train()
    train_acc, val_acc, test_acc = test()
    if epoch % 20 == 0:
        print(f"Epoch {epoch:03d} | Loss {loss:.3f} | "
              f"Train {train_acc:.3f} | Val {val_acc:.3f} | Test {test_acc:.3f}")


4. “What if I use CNN or RNN on the same data?”
Same data = same node features data.x and labels data.y, but no edges.

---


4.1. MLP / “CNN-ish” baseline (no graph, only features)
Treat each node feature vector as a “flat image”:

In [None]:
class MLPBaseline(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_channels, hidden_channels),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_channels, out_channels)
        )

    def forward(self, x):
        return self.net(x)

mlp = MLPBaseline(dataset.num_node_features, 16, dataset.num_classes).to(device)
optimizer_mlp = torch.optim.Adam(mlp.parameters(), lr=0.01, weight_decay=5e-4)

def train_mlp():
    mlp.train()
    optimizer_mlp.zero_grad()
    out = mlp(data.x)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer_mlp.step()
    return float(loss)

@torch.no_grad()
def test_mlp():
    mlp.eval()
    logits = mlp(data.x)
    preds = logits.argmax(dim=-1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = (preds[mask] == data.y[mask]).sum()
        accs.append(float(correct) / int(mask.sum()))
    return accs

for epoch in range(1, 201):
    loss = train_mlp()
    train_acc, val_acc, test_acc = test_mlp()
    if epoch % 20 == 0:
        print(f"[MLP] Epoch {epoch:03d} | Loss {loss:.3f} | "
              f"Train {train_acc:.3f} | Val {val_acc:.3f} | Test {test_acc:.3f}")


4.2. RNN baseline (conceptual)
You can force an RNN on this data, but it’s awkward:
You’d have to order nodes or their neighbors arbitrarily and feed them as sequences.


*   That ordering is artificial and usually harmfull
*   The RNN sees a sequence, not a combinatorial graph with permutation invariance.


Sketch (no training loop, just the idea):

In [None]:
class RNNBaseline(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.rnn = nn.GRU(input_size=in_channels, hidden_size=hidden_channels,
                          batch_first=True)
        self.fc = nn.Linear(hidden_channels, out_channels)

    def forward(self, x):
        # x: [N, F] node features
        # fake sequence: each node is a "time step" in one long sequence
        x_seq = x.unsqueeze(0)  # [1, N, F]
        _, h_n = self.rnn(x_seq)  # h_n: [1, hidden]
        logits = self.fc(h_n.squeeze(0))  # classify the whole graph, not nodes!
        return logits


why CNNs/RNNs are misaligned with this data:


*   CNN: assumes local grid and translation invariance
*   RNN: assumes sequence order and temporal dependence.


*   GNN: assumes arbitrary graph and permutation invariance → matches Cora / molecules / social networks / knowledge graphs, etc.







