In [1]:
import dgl.data

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


Using backend: pytorch


## CoRA Dataset Description 

The Cora dataset consists of 2708 scientific publications classified into one of seven classes. The citation network consists of 5429 links. Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary. The dictionary consists of 1433 unique words.

source: https://relational.fit.cvut.cz/dataset/CORA

In [5]:
dataset = dgl.data.CoraGraphDataset()

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [7]:
dir(dataset)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_download',
 '_force_reload',
 '_g',
 '_get_hash',
 '_graph',
 '_hash',
 '_hash_key',
 '_load',
 '_name',
 '_num_classes',
 '_raw_dir',
 '_reverse_edge',
 '_save_dir',
 '_url',
 '_urls',
 '_verbose',
 'download',
 'features',
 'graph',
 'has_cache',
 'hash',
 'labels',
 'load',
 'name',
 'num_classes',
 'num_labels',
 'process',
 'raw_dir',
 'raw_path',
 'reverse_edge',
 'save',
 'save_dir',
 'save_name',
 'save_path',
 'test_mask',
 'train_mask',
 'url',
 'val_mask',
 'verbose']

In [8]:
dataset.graph



<networkx.classes.digraph.DiGraph at 0x7fe02c54e1d0>

In [9]:
len(dataset)

1

In [10]:
g = dataset[0]

In [11]:
g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [12]:
g.ndata

{'feat': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), 'label': tensor([3, 4, 4,  ..., 3, 3, 3]), 'test_mask': tensor([False, False, False,  ...,  True,  True,  True]), 'train_mask': tensor([ True,  True,  True,  ..., False, False, False]), 'val_mask': tensor([False, False, False,  ..., False, False, False])}

In [15]:
g.ndata['feat'].shape

torch.Size([2708, 1433])

In [25]:
features = g.ndata['feat'][3]
print(features[features.nonzero()])

tensor([[0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476],
        [0.0476]])


In [26]:
g.edata

{}

In [27]:
from dgl.nn import GraphConv

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [30]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)
    
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [31]:
model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes)

In [35]:
def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0
    
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    
    for e in range(100):
        logits = model(g, features)
        
        pred = logits.argmax(1)
        
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])
        
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()
        
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if e % 5 == 0:
            print(f"Epoch {e}: loss: {loss} train acc: {train_acc} val_acc: {val_acc} (best: {best_val_acc}) test_acc: {test_acc} (best: {best_test_acc})")

In [36]:
train(g, model)

Epoch 0: loss: 1.946557641029358 train acc: 0.09285714477300644 val_acc: 0.23999999463558197 (best: 0.23999999463558197) test_acc: 0.24400000274181366 (best: 0.24400000274181366)
Epoch 5: loss: 1.8985185623168945 train acc: 0.44285714626312256 val_acc: 0.4259999990463257 (best: 0.44600000977516174) test_acc: 0.44200000166893005 (best: 0.46000000834465027)
Epoch 10: loss: 1.823298692703247 train acc: 0.5142857432365417 val_acc: 0.5040000081062317 (best: 0.5040000081062317) test_acc: 0.503000020980835 (best: 0.503000020980835)
Epoch 15: loss: 1.725233554840088 train acc: 0.800000011920929 val_acc: 0.6240000128746033 (best: 0.6240000128746033) test_acc: 0.6349999904632568 (best: 0.6349999904632568)
Epoch 20: loss: 1.6064339876174927 train acc: 0.8571428656578064 val_acc: 0.6639999747276306 (best: 0.6660000085830688) test_acc: 0.6809999942779541 (best: 0.6769999861717224)
Epoch 25: loss: 1.4691616296768188 train acc: 0.8928571343421936 val_acc: 0.6819999814033508 (best: 0.6819999814033508)