In [1]:
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl
import dgl.data
from dgl.nn import GraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = dgl.data.CoraGraphDataset()
print(f'Num categories: {dataset.num_classes}')

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Num categories: 7


In [3]:
g  = dataset[0]

In [4]:
g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [5]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)
    
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [6]:
model = GCN(g.ndata["feat"].shape[1], 16, dataset.num_classes)

In [7]:
g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [8]:
def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']

    for epoch in range(100):
        logits = model(g, features)
        pred = logits.argmax(1)
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch % 5 == 0:
            print(f'Epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f} (best {best_val_acc:.3f}), test acc: {test_acc:.3f} (best {best_test_acc:.3f})')

In [33]:
model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes)
train(g, model)

Epoch 0, loss: 1.945, val acc: 0.188 (best 0.188), test acc: 0.148 (best 0.000)
Epoch 5, loss: 1.888, val acc: 0.556 (best 0.556), test acc: 0.547 (best 0.000)
Epoch 10, loss: 1.805, val acc: 0.664 (best 0.668), test acc: 0.667 (best 0.000)
Epoch 15, loss: 1.699, val acc: 0.666 (best 0.668), test acc: 0.662 (best 0.000)
Epoch 20, loss: 1.572, val acc: 0.686 (best 0.686), test acc: 0.685 (best 0.000)
Epoch 25, loss: 1.425, val acc: 0.704 (best 0.704), test acc: 0.705 (best 0.000)
Epoch 30, loss: 1.264, val acc: 0.706 (best 0.706), test acc: 0.707 (best 0.000)
Epoch 35, loss: 1.097, val acc: 0.710 (best 0.710), test acc: 0.722 (best 0.000)
Epoch 40, loss: 0.932, val acc: 0.722 (best 0.722), test acc: 0.735 (best 0.000)
Epoch 45, loss: 0.778, val acc: 0.730 (best 0.730), test acc: 0.745 (best 0.000)
Epoch 50, loss: 0.641, val acc: 0.748 (best 0.748), test acc: 0.760 (best 0.000)
Epoch 55, loss: 0.523, val acc: 0.754 (best 0.754), test acc: 0.764 (best 0.000)
Epoch 60, loss: 0.426, val acc

In [11]:
in_feat = g.ndata['feat']
in_feat.shape

torch.Size([2708, 1433])

In [13]:
h = model.conv1(g, in_feat)
h.shape

torch.Size([2708, 16])