In [1]:
import dgl
from dgl.data import register_data_args
from dgl.data import CitationGraphDataset
# from dgl.data import CoraGraphDataset, CiteseerGraphDataset

dataset = CitationGraphDataset('cora')
# dataset = CoraGraphDataset()
g = dataset[0]

print(dataset.num_classes)
print(g)

Using backend: pytorch


  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
7
Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'train_mask': Scheme(shape=(), dtype=torch.bool), 'label': Scheme(shape=(), dtype=torch.int64), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'feat': Scheme(shape=(1433,), dtype=torch.float32)}
      edata_schemes={})


In [2]:
n_classes = dataset.num_classes

train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
labels = g.ndata['label']
features = g.ndata['feat']

In [3]:
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from dgl.nn import GATConv

class GAT(nn.Module):
    def __init__(self,
                 g,
                 num_layers,
                 in_dim,
                 num_hidden,
                 num_classes,
                 heads,
                 activation,
                 feat_drop,
                 attn_drop,
                 negative_slope,
                 residual):
        super(GAT, self).__init__()
        self.g = g
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        
        # input projection (no residual)
        self.gat_layers.append(GATConv(
            in_dim, num_hidden, heads[0],
            feat_drop, attn_drop, negative_slope, False, self.activation))
        
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(
                num_hidden * heads[l-1], num_hidden, heads[l],
                feat_drop, attn_drop, negative_slope, residual, self.activation))
        
        # output projection
        self.gat_layers.append(GATConv(
            num_hidden * heads[-2], num_classes, heads[-1],
            feat_drop, attn_drop, negative_slope, residual, None))

    def forward(self, inputs):
        h = inputs
        for l in range(self.num_layers):
            h = self.gat_layers[l](self.g, h).flatten(1)
        # output projection
        logits = self.gat_layers[-1](self.g, h).mean(1)
        return logits

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [5]:
num_heads = 8
num_out_heads = 1
num_layers = 1

heads = ([num_heads] * num_layers) + [num_out_heads]
heads

[8, 1]

In [6]:
model = GAT(g,
            in_dim = features.shape[1],
            num_classes = n_classes,
            heads = heads,
            activation = F.elu,
            num_layers = 1,
            num_hidden = 8,
            feat_drop = 0.6,
            attn_drop = 0.6,
            negative_slope = 0.2,
            residual = False)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.005, weight_decay = 5e-4)
criterion = nn.CrossEntropyLoss()
print(model)

GAT(
  (gat_layers): ModuleList(
    (0): GATConv(
      (fc): Linear(in_features=1433, out_features=64, bias=False)
      (feat_drop): Dropout(p=0.6, inplace=False)
      (attn_drop): Dropout(p=0.6, inplace=False)
      (leaky_relu): LeakyReLU(negative_slope=0.2)
    )
    (1): GATConv(
      (fc): Linear(in_features=64, out_features=7, bias=False)
      (feat_drop): Dropout(p=0.6, inplace=False)
      (attn_drop): Dropout(p=0.6, inplace=False)
      (leaky_relu): LeakyReLU(negative_slope=0.2)
    )
  )
)


In [7]:
def evaluate(model, features, mask):
    model.eval()
    with torch.no_grad():
        out = model(features)
        out = out[mask]
        return out

In [8]:
dur = []

for epoch in range(200):
    model.train()
    
    if epoch >= 3:
        t0 = time.time()
        
    out = model(features)
    loss = criterion(out[train_mask], labels[train_mask])
        
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch >= 3:
        dur.append(time.time() - t0)
       
    pred = out.argmax(1)
    train_correct = (pred[train_mask] == labels[train_mask]).sum().item()
    train_acc = train_correct / len(labels[train_mask])

    # val_acc = evaluate(model, features, labels, val_mask)
    val_out = evaluate(model, features, val_mask)
    val_correct = (val_out.argmax(1) == labels[val_mask]).sum().item()
    val_acc = val_correct / len(labels[val_mask])
    
    if epoch % 10 == 0:
        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} | ValAcc {:.4f}".
          format(epoch, np.mean(dur), loss.item(), train_acc, val_acc))

Epoch 00000 | Time(s) nan | Loss 1.9445 | TrainAcc 0.1000 | ValAcc 0.2140


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 00010 | Time(s) 0.1063 | Loss 1.8741 | TrainAcc 0.5000 | ValAcc 0.6920
Epoch 00020 | Time(s) 0.1070 | Loss 1.8062 | TrainAcc 0.5643 | ValAcc 0.6320
Epoch 00030 | Time(s) 0.1074 | Loss 1.7147 | TrainAcc 0.6500 | ValAcc 0.7860
Epoch 00040 | Time(s) 0.1083 | Loss 1.5846 | TrainAcc 0.6929 | ValAcc 0.7560
Epoch 00050 | Time(s) 0.1095 | Loss 1.4498 | TrainAcc 0.6929 | ValAcc 0.7880
Epoch 00060 | Time(s) 0.1105 | Loss 1.4195 | TrainAcc 0.6643 | ValAcc 0.7940
Epoch 00070 | Time(s) 0.1115 | Loss 1.3081 | TrainAcc 0.7000 | ValAcc 0.8020
Epoch 00080 | Time(s) 0.1119 | Loss 1.2070 | TrainAcc 0.7214 | ValAcc 0.7900
Epoch 00090 | Time(s) 0.1124 | Loss 1.1139 | TrainAcc 0.7143 | ValAcc 0.7920
Epoch 00100 | Time(s) 0.1129 | Loss 1.1664 | TrainAcc 0.7357 | ValAcc 0.8000
Epoch 00110 | Time(s) 0.1132 | Loss 1.0778 | TrainAcc 0.7214 | ValAcc 0.7960
Epoch 00120 | Time(s) 0.1136 | Loss 1.0406 | TrainAcc 0.6714 | ValAcc 0.8000
Epoch 00130 | Time(s) 0.1141 | Loss 0.9896 | TrainAcc 0.7000 | ValAcc 0.8000

In [9]:
model.eval()
out = model(features)
pred = out.argmax(1)
test_correct = (pred[test_mask] == labels[test_mask]).sum().item()
test_acc = test_correct / len(labels[test_mask])
print("Test Accuracy {:.4f}".format(test_acc))

Test Accuracy 0.8130
