In this work, you are required to build a GNN training pipline. Then you can truly use the Graph Neural Network.

In [432]:
# !pip install  dgl -f https://data.dgl.ai/wheels/repo.html
# !pip install torch_geometric
# !pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.1+cpu.html

First, we need to download the dataset and load data.

In [433]:
import torch_geometric.transforms as T
import torch.nn.functional as F
import torch.nn as nn
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import MessagePassing


dataset = Planetoid("./", "Cora", transform=T.NormalizeFeatures())
data = dataset[0]

x = data.x
edge_index = data.edge_index
edge_weight = data.edge_weight

Then, you need to implement a GNN model. You may copy the GCNConv from your work two weeks ago, and build the model with the convolution layers.

然后，您需要实现一个GNN模型。您可以从两周前的工作中复制GCNConv，并使用卷积层构建模型。

In [434]:
class PyG_GCNConv(MessagePassing):
    def __init__(self, in_features, out_features):
        super(PyG_GCNConv, self).__init__(aggr='add')
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x, edge_index):
        return self.propagate(edge_index, x=x, size=None)

    def message(self, x_j):
        return x_j

    def update(self, aggr_out):
        return F.relu(self.linear(aggr_out))

class PyG_GCN(nn.Module):
    def __init__(self, in_features, hidden_features, num_classes):
        super(PyG_GCN, self).__init__()
        self.conv1 = PyG_GCNConv(in_features, hidden_features)
        self.conv2 = PyG_GCNConv(hidden_features, num_classes)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x

In [435]:
from numpy import double
from torch_geometric.logging import log
import torch

hidden_dim = 13
lr = 0.001
epochs = 100
model = PyG_GCN(dataset.num_features, hidden_dim, dataset.num_classes)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def test():
    model.eval()
    pred = model(data.x, data.edge_index).argmax(dim=-1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        accs.append( (double(pred[mask] == data.y[mask]).sum()) / (int(mask.sum())) )
    return accs

best_val_acc = 0
test_acc = 0
epochs = 300
for epoch in range (epochs):
    loss = train()
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    if epoch%50==0:
        log(Epoch=epoch, Loss=loss, Train=train_acc, Val=val_acc, Test=test_acc)

print(f"Best validation accuracy: {best_val_acc:.4f}")
print(f"Test accuracy: {test_acc:.4f}")

Epoch: 000, Loss: 1.9529, Train: 0.2071, Val: 0.1120, Test: 0.1180
Epoch: 050, Loss: 1.4418, Train: 0.6286, Val: 0.4380, Test: 0.4270
Epoch: 100, Loss: 1.1817, Train: 0.6714, Val: 0.4400, Test: 0.4510
Epoch: 150, Loss: 1.0134, Train: 0.7000, Val: 0.4700, Test: 0.4550
Epoch: 200, Loss: 0.9003, Train: 0.7000, Val: 0.4740, Test: 0.4560
Epoch: 250, Loss: 0.8224, Train: 0.7000, Val: 0.4740, Test: 0.4560
Best validation accuracy: 0.4740
Test accuracy: 0.4560


# 使用DGL完成节点分类  

Now, you can train the GCN model with PyG. Next, you may try using the DGL to implement the similiar function.  

In [436]:
import argparse

import dgl
import dgl.nn as dglnn

import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import AddSelfLoop
from dgl.data import CoraGraphDataset

In [437]:
class DGL_GCNConv(nn.Module):
    def __init__(self, in_features, out_features):
        super(DGL_GCNConv, self).__init__()
        self.weight=torch.randn(in_features,out_features)
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, g, feature):
        with g.local_scope():
            g.ndata['h'] = feature
            g.update_all(dgl.function.copy_u('h', 'm'), dgl.function.sum('m', 'h'))
            h = g.ndata['h']
            return self.linear(h)
            # return F.relu(self.linear(h))

class DGL_GCN(nn.Module):
    def __init__(self, in_features, hidden_features, num_classes):
        super(DGL_GCN, self).__init__()
        self.conv1 = DGL_GCNConv(in_features, hidden_features)
        self.conv2 = DGL_GCNConv(hidden_features, num_classes)

    def forward(self, g, features):
        h = F.relu(self.conv1(g, features))
        h = self.conv2(g, h)
        return h

In [438]:
epochs=5

def train(g, features, labels, masks, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(epochs):
        model.train()
        logits = model(g, features)
        loss = nn.CrossEntropyLoss()(logits[masks[0]], labels[masks[0]])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print("{:d}: {:.4f}".format(epoch,loss.item()))

def evaluate(g, features, labels, mask, model):
    model.eval()
    with torch.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)

In [439]:
transform = (
    AddSelfLoop()
)
data = CoraGraphDataset(transform=transform)
g = data[0]
features = g.ndata["feat"]
labels = g.ndata["label"]
masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [440]:
# 原始s
# model = DGL_GCN(features.shape[1], 14, data.num_classes)
# print("Training...")
# train(g, features, labels, masks, model)

# # test the model
# print("Testing...")
# acc = evaluate(g, features, labels, masks[2], model)
# print("Test accuracy {:.4f}".format(acc))

In [441]:
model = DGL_GCN(features.shape[1], 14, data.num_classes)
for i in range (5):
    print("Training...")
    train(g, features, labels, masks, model)

    # test the model
    print("Testing...")
    acc = evaluate(g, features, labels, masks[2], model)
    print("Test accuracy {:.4f}".format(acc))

Training...
0: 1.9506


1: 1.7972
2: 1.6448
3: 1.5222
4: 1.4259
Testing...
Test accuracy 0.5130
Training...
0: 1.3433
1: 1.2446
2: 1.1485
3: 1.0578
4: 0.9727
Testing...
Test accuracy 0.7130
Training...
0: 0.8914
1: 0.8062
2: 0.7255
3: 0.6505
4: 0.5819
Testing...
Test accuracy 0.7280
Training...
0: 0.5188
1: 0.4585
2: 0.4041
3: 0.3557
4: 0.3126
Testing...
Test accuracy 0.7450
Training...
0: 0.2744
1: 0.2384
2: 0.2067
3: 0.1789
4: 0.1544
Testing...
Test accuracy 0.7500


If you find it hard to implement, you may refer to the official implementation of the GNN training, like [PyG](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/gcn.py) and [DGL](https://github.com/dmlc/dgl/blob/master/examples/pytorch/gcn/train.py).