In [2]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F

### Cora dataset

In [3]:
# cora dataset = 1 graph of citation network
#        nodes = papers
#        edges = connectivity (citation) between papers

import dgl.data

dataset=dgl.data.CoraGraphDataset()
g=dataset[0]

# train_mask, val_mask, test_mask= boolean indices for train, val, test
train_mask=g.ndata['train_mask']
val_mask=g.ndata['val_mask']
test_mask=g.ndata['test_mask']

print(f"num_train_nodes: {sum(train_mask)} | num_val_nodes: {sum(val_mask)} | num_test_nodes: {sum(test_mask)}")

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
num_train_nodes: 140 | num_val_nodes: 500 | num_test_nodes: 1000


### DGL framework
DGL updates node features via 3 steps:

1. **message_func(edges)**: 
Each edge has attributres edge=[src, dst, data]. This function sends info $$src \rightarrow dst$$ It stores everything needed to do node-feature update in a dict called "mailbox". 
2. **reduce_func(nodes)**: Update the node features by the update equation. All info in "mailbox" (obtained from message_func) will be used in this step.
3. **update_all(message_func,reduce_func)**: send messages through all edges (by message_func) and update features of all nodes (by reduce_func)

Additionally if the update equations involve edge updates, use **apply_edges(func)** to update edge features.

## 1. GraphSage
GraphSage is a convolution layer in a graph neural network (GNN) which updates as follows 
\begin{align*}
h_i^{(l+1)}&= W.\text{concat}(h_i^{(l)},h_{N(i)}^{(l+1)})+b \ \text{with} \\
 h_{N(i)}^{(l+1)}&=\text{Mean}\left(h_j^{(l)}: j\in N(i)\right) \\
\end{align*}

Recall: update equation for GCN
$$h_i^{(l+1)}=W^{(l)}\sum_{j\in N(i)}\dfrac{1}{c_{ji}}h_j^{(l)}+b^{(l)}.$$


In [8]:
import dgl.function as fn

class sageconv(nn.Module):
    def __init__(self,in_dim,out_dim):
        super().__init__()
        self.linear=nn.Linear(2*in_dim,out_dim)

    def forward(self,g,h):
        # g=input graph, h=input feature nodes
        with g.local_scope():
            g.ndata['h']=h
            # create a new feature "h_N" that takes the mean of neighbor nodes
            g.update_all(message_func=fn.copy_u('h','m'), reduce_func=fn.mean('m','h_N'))
            h_N=g.ndata['h_N'] # h_N(i)
            h_concat=torch.cat([h,h_N],dim=-1)
            return self.linear(h_concat)


In [9]:
# hyperparameters
in_dim=g.ndata["feat"].shape[-1]
hidden_dim=64
out_dim=dataset.num_classes

In [10]:
# GNN: input -> sageconv1 -> relu -> sageconv2 -> classification

class sageconv_net(nn.Module):
    def __init__(self,in_dim,hidden_dim,out_dim):
        super().__init__()
        self.conv1=sageconv(in_dim,hidden_dim)
        self.conv2=sageconv(hidden_dim,out_dim)


    def forward(self,g,h):
        h=self.conv1(g,h)
        h=F.relu(h)
        h=self.conv2(g,h)
        return h
    
net=sageconv_net(in_dim,hidden_dim,out_dim)
print(f"{sum(p.numel() for p in net.parameters())/1e6} million parameters")

# do 1 forward pass
with torch.no_grad():
    out=net(g,g.ndata["feat"])
    print(out.shape)

0.184391 million parameters
torch.Size([2708, 7])


## 2. Train and Test

In [13]:
# train loop
def train(model,graph,loss_fn,optimizer):
    model.train()
    features=graph.ndata['feat']
    labels=graph.ndata['label']

    # forward and backward
    optimizer.zero_grad()
    logits=model(graph,features) # prediction on the whole graph
    # loss on train nodes
    loss=loss_fn(logits[train_mask],labels[train_mask])
    loss.backward()
    optimizer.step()

    # compute 
    preds=logits.argmax(dim=-1)
    acc=(preds[train_mask]==labels[train_mask]).float().mean()

    return loss, acc

# evaluation loop
@torch.no_grad
def evaluate(model, graph, loss_fn):
    model.eval()
    features=graph.ndata['feat']
    labels=graph.ndata['label']
    # forward
    logits=model(graph,features) # prediction on the whole graph
    loss=loss_fn(logits[val_mask],labels[val_mask])
    # compute acc
    preds=logits.argmax(dim=-1)
    acc=(preds[val_mask]==labels[val_mask]).float().mean()

    return loss,acc

In [14]:
torch.manual_seed(1442)

# model, loss, optimizer
model=sageconv_net(in_dim,hidden_dim,out_dim)

# loss
loss_fn=F.cross_entropy

# optimzizer
optimizer=torch.optim.AdamW(model.parameters(),lr=0.01)

num_epochs=30

for epoch in range(num_epochs):
    train_loss, train_acc= train(model, g, loss_fn, optimizer)
    val_loss, val_acc= evaluate(model,g,loss_fn)
    print(f"Epoch: {epoch+1} | train_loss: {train_loss:.4f} | train_acc: {train_acc*100:.2f}% | val_loss: {val_loss:.4f} | val_acc: {val_acc*100:.2f}% |")


Epoch: 1 | train_loss: 1.9479 | train_acc: 14.29% | val_loss: 1.9358 | val_acc: 11.40% |
Epoch: 2 | train_loss: 1.9331 | train_acc: 20.00% | val_loss: 1.9284 | val_acc: 16.80% |
Epoch: 3 | train_loss: 1.9049 | train_acc: 39.29% | val_loss: 1.9184 | val_acc: 40.40% |
Epoch: 4 | train_loss: 1.8689 | train_acc: 97.14% | val_loss: 1.9050 | val_acc: 53.60% |
Epoch: 5 | train_loss: 1.8249 | train_acc: 99.29% | val_loss: 1.8872 | val_acc: 52.00% |
Epoch: 6 | train_loss: 1.7717 | train_acc: 98.57% | val_loss: 1.8640 | val_acc: 52.60% |
Epoch: 7 | train_loss: 1.7087 | train_acc: 98.57% | val_loss: 1.8348 | val_acc: 57.00% |
Epoch: 8 | train_loss: 1.6358 | train_acc: 99.29% | val_loss: 1.7991 | val_acc: 61.00% |
Epoch: 9 | train_loss: 1.5530 | train_acc: 99.29% | val_loss: 1.7567 | val_acc: 64.60% |
Epoch: 10 | train_loss: 1.4608 | train_acc: 99.29% | val_loss: 1.7082 | val_acc: 66.20% |
Epoch: 11 | train_loss: 1.3600 | train_acc: 99.29% | val_loss: 1.6544 | val_acc: 69.00% |
Epoch: 12 | train_l

## 3. GraphSage with built-in function
SAGEConv(in_feats, out_feats, aggregator_type, feat_drop=0.0, bias=True, norm=None, activation=None)

In [15]:
from dgl.nn import SAGEConv

# model: input -> sageconv1 -> relu -> sageconv2 -> classification

class SageConv_Net(nn.Module):
    def __init__(self, in_dim, hidden_dim,out_dim):
        super().__init__()
        self.conv1=SAGEConv(in_dim,hidden_dim,aggregator_type='mean')
        self.conv2=SAGEConv(hidden_dim,out_dim,aggregator_type='mean')
    
    def forward(self,g,h):
        h=self.conv1(g,h)
        h=F.relu(h)
        h=self.conv2(g,h)
        return h
    
model2=SageConv_Net(in_dim,hidden_dim, out_dim)
print(f"{sum(p.numel() for p in model2.parameters())/1e6} million parameters")

0.184391 million parameters


In [16]:
# model, loss, optimizer
model2=SageConv_Net(in_dim,hidden_dim, out_dim)
loss_fn=F.cross_entropy
optimizer=torch.optim.AdamW(model2.parameters(),lr=0.01)

num_epochs=30

for epoch in range(num_epochs):
    train_loss, train_acc= train(model2, g, loss_fn, optimizer)
    val_loss, val_acc= evaluate(model2,g,loss_fn)
    print(f"Epoch: {epoch+1} | train_loss: {train_loss:.4f} | train_acc: {train_acc*100:.2f}% | val_loss: {val_loss:.4f} | val_acc: {val_acc*100:.2f}% |")


Epoch: 1 | train_loss: 1.9485 | train_acc: 15.00% | val_loss: 1.9308 | val_acc: 24.00% |
Epoch: 2 | train_loss: 1.8670 | train_acc: 67.86% | val_loss: 1.8802 | val_acc: 61.80% |
Epoch: 3 | train_loss: 1.7599 | train_acc: 99.29% | val_loss: 1.8169 | val_acc: 71.20% |
Epoch: 4 | train_loss: 1.6320 | train_acc: 99.29% | val_loss: 1.7495 | val_acc: 70.80% |
Epoch: 5 | train_loss: 1.4933 | train_acc: 99.29% | val_loss: 1.6821 | val_acc: 70.60% |
Epoch: 6 | train_loss: 1.3481 | train_acc: 99.29% | val_loss: 1.6144 | val_acc: 72.40% |
Epoch: 7 | train_loss: 1.1980 | train_acc: 99.29% | val_loss: 1.5471 | val_acc: 74.20% |
Epoch: 8 | train_loss: 1.0470 | train_acc: 99.29% | val_loss: 1.4804 | val_acc: 76.60% |
Epoch: 9 | train_loss: 0.8995 | train_acc: 99.29% | val_loss: 1.4144 | val_acc: 78.20% |
Epoch: 10 | train_loss: 0.7596 | train_acc: 99.29% | val_loss: 1.3488 | val_acc: 78.20% |
Epoch: 11 | train_loss: 0.6306 | train_acc: 99.29% | val_loss: 1.2839 | val_acc: 78.60% |
Epoch: 12 | train_l