In [3]:
# code to install all dependencies in a mamba environemnt
# !pip3 install torch
# !mamba install pyg -c pyg



In [1]:
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv

In [2]:
from torch_geometric.transforms import GDC

gdc = GDC(
    self_loop_weight=1,
    normalization_in="sym",
    normalization_out="col",
    diffusion_kwargs=dict(method="ppr", alpha=0.15, eps=1e-4),
    sparsification_kwargs=dict(method="threshold", eps=1e-4),
    exact=True,
)

In [3]:
# ----------------------------------------------------------------------
# 2) Load dataset and apply GDC
# ----------------------------------------------------------------------
# dataset = Planetoid(root='data/Planetoid', name='Cora', transform=gdc)
dataset = Planetoid(root="data/Planetoid", name="Cora")
data = dataset[0]

# Now data.edge_index and data.edge_attr (if any) represent the diffused graph

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [5]:
from torch_sparse import spmm

In [6]:
# 1) Apply GDC to get a diffused adjacency:
data_diffused = gdc(data)  # This modifies edge_index, edge_attr in a new Data object.

# data_diffused.edge_index is the "diffused" adjacency structure
# data_diffused.edge_attr (if not None) are the corresponding weights

# 2) Multiply the new adjacency by your features:
edge_index = data_diffused.edge_index
edge_weight = data_diffused.edge_attr  # or None if unweighted
X = data.x

# spmm performs sparse matrix multiplication:  X_new = A * X
# where A is encoded by (edge_index, edge_weight)
X_diffused = spmm(edge_index, edge_weight, X.size(0), X.size(0), X)

# Now X_diffused has the "diffused" features for each node
data.x = X_diffused  # Overwrite original features if desired

In [7]:
# ----------------------------------------------------------------------
# 3) Define a simple GCN to use on the new diffused graph
# ----------------------------------------------------------------------
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GCN(dataset.num_features, 64, dataset.num_classes).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [8]:
# ----------------------------------------------------------------------
# 4) Training loop
# ----------------------------------------------------------------------
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return float(loss)


def test():
    model.eval()
    logits = model(data.x, data.edge_index)
    preds = logits.argmax(dim=1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        acc = (preds[mask] == data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs


for epoch in range(1, 201):
    loss = train()
    train_acc, val_acc, test_acc = test()
    if epoch % 20 == 0:
        print(
            f"Epoch: {epoch:03d}, Loss: {loss:.4f}, "
            f"Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}"
        )

Epoch: 020, Loss: 0.0677, Train Acc: 1.0000, Val Acc: 0.7880, Test Acc: 0.8050
Epoch: 040, Loss: 0.0209, Train Acc: 1.0000, Val Acc: 0.7960, Test Acc: 0.7990
Epoch: 060, Loss: 0.0269, Train Acc: 1.0000, Val Acc: 0.7960, Test Acc: 0.8120
Epoch: 080, Loss: 0.0254, Train Acc: 1.0000, Val Acc: 0.7780, Test Acc: 0.8040
Epoch: 100, Loss: 0.0229, Train Acc: 1.0000, Val Acc: 0.7840, Test Acc: 0.8110
Epoch: 120, Loss: 0.0201, Train Acc: 1.0000, Val Acc: 0.7840, Test Acc: 0.8070
Epoch: 140, Loss: 0.0173, Train Acc: 1.0000, Val Acc: 0.7780, Test Acc: 0.8070
Epoch: 160, Loss: 0.0159, Train Acc: 1.0000, Val Acc: 0.7860, Test Acc: 0.8100
Epoch: 180, Loss: 0.0183, Train Acc: 1.0000, Val Acc: 0.7840, Test Acc: 0.8160
Epoch: 200, Loss: 0.0125, Train Acc: 1.0000, Val Acc: 0.7840, Test Acc: 0.8130
