In [2]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F

# Implement GraphSAGE

In [3]:
import dgl.function as fn

In [45]:
class SAGEConv(nn.Module):
    """Graph convolution module used by the GraphSAGE model.

    Parameters
    ----------
    in_feat : int
        Input feature size.
    out_feat : int
        Output feature size.
    """
    def __init__(self, in_feat, out_feat):
        super(SAGEConv, self).__init__()
        self.linear = nn.Linear(in_feat * 2, out_feat)
        
    def forward(self, g, h):
        """Forward computation

        Parameters
        ----------
        g : Graph
            The input graph.
        h : Tensor
            The input node feature.
            
        The central piece in this code is the g.update_all function, 
        which gathers and averages the neighbor features. There are three concepts here:
        1. Message function fn.copy_u('h', 'm') that copies the node feature under name 'h' as messages sent to neighbors.

        2. Reduce function fn.mean('m', 'h_N') that averages all the received messages under name 'm' and saves the result as a new node feature 'h_N'.

        3. update_all tells DGL to trigger the message and reduce functions for all the nodes and edges.
        """
        with g.local_scope():
            g.ndata['h'] = h
            # update_all is a message passing API
            g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N'))
            h_N = g.ndata['h_N']
            # cancat h (self node) with h_N (message passed from neighbours)
            h_total = torch.cat([h, h_N], dim=1)
            return self.linear(h_total)

# Build a multi-layer GNN model

In [49]:
class GModel(nn.Module):
    def __init__(self, in_feats, h_feats, nlayers, nclasses):
        super(GModel, self).__init__()
        self.convs = []
        self.convs.append(SAGEConv(in_feats, h_feats))
        for i in range(1, nlayers):
            self.convs.append(SAGEConv(h_feats, h_feats))
        self.outlayer = nn.Linear(h_feats, nclasses)
        
    def forward(self, g, h):
        for i in range(len(self.convs)):
            layer = self.convs[i]
            h = layer(g, h)
            h = F.relu(h)
        return self.outlayer(h)

# Training

In [50]:
import dgl.data

In [40]:
dataset = dgl.data.CoraGraphDataset()
g = dataset[0]

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [51]:
def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    all_logits = []
    best_val_acc = 0
    best_test_acc = 0
    
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    nepoches = 20
    for e in range(nepoches):
        logits = model(g, features)
        pred = torch.argmax(F.softmax(logits, dim=1), dim=1)
        # Compute loss
        # Note that we should only compute the losses of the nodes in the training set,
        # i.e. with train_mask 1.
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        all_logits.append(logits.detach())

        if e % 5 == 0:
            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
                e, loss, val_acc, best_val_acc, test_acc, best_test_acc))

nlayers = 3
model = GModel(g.ndata['feat'].shape[1], 16, nlayers, dataset.num_classes)
train(g, model)
        



In epoch 0, loss: 1.954, val acc: 0.072 (best 0.072), test acc: 0.091 (best 0.091)
In epoch 5, loss: 1.954, val acc: 0.072 (best 0.072), test acc: 0.091 (best 0.091)
In epoch 10, loss: 1.953, val acc: 0.072 (best 0.072), test acc: 0.091 (best 0.091)
In epoch 15, loss: 1.952, val acc: 0.072 (best 0.072), test acc: 0.091 (best 0.091)


In [10]:
labels = torch.tensor([0, 0, 1, 1])
logits = torch.tensor([[0.1, -0.5], [0.1, -0.5], [0.2, 0.4], [0.2, 0.6]])

In [11]:
F.cross_entropy(nn.softmax(logits), labels)

tensor(0.4965)

In [14]:
F.cross_entropy(F.softmax(logits, dim=1), labels)

tensor(0.5900)

In [26]:
F.softmax(logits, dim=1)

tensor([[0.6457, 0.3543],
        [0.6457, 0.3543],
        [0.4502, 0.5498],
        [0.4013, 0.5987]])

In [27]:
torch.argmax(F.softmax(logits, dim=1), dim=1)

tensor([0, 0, 1, 1])

In [24]:
from sklearn.metrics import accuracy_score
accuracy_score(labels.numpy(), F.softmax(logits, dim=1).numpy())

ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets

In [15]:
F.softmax(logits, dim=1)

tensor([[0.6457, 0.3543],
        [0.6457, 0.3543],
        [0.4502, 0.5498],
        [0.4013, 0.5987]])

In [21]:
import math
math.exp(0.2)/(math.exp(0.2)+math.exp(0.6))

0.401312339887548