In [1]:
import numpy as np
import os
import math
import matplotlib.pyplot as plt
import networkx as nx
import torch as th
import dgl

Using backend: pytorch


## How to create DGL graph, different methods

In [None]:
# netwrokx object
g_nx = nx.petersen_graph()
g_dgl = dgl.DGLGraph(g_nx)

plt.subplot(121)
nx.draw(g_nx, with_labels=True)
plt.subplot(122)
nx.draw(g_dgl.to_networkx(), with_labels=True)
plt.show()

In [None]:
# from torch tensors
u = th.tensor([0])
v = th.tensor([1, 2, 3, 4, 5])
star1 = dgl.DGLGraph((u, v))
# Visualize the graph.
nx.draw(star1.to_networkx(), with_labels=True)
plt.show()

In [None]:
g_toy = dgl.DGLGraph()
g_toy.add_nodes(10)
g_toy.add_edges(0,v.__reversed__())
nx.draw(g_toy.to_networkx(), with_labels=True)
plt.show()

In [None]:
N = len(g_toy.nodes())

# assgin data values to nodes
g_toy.ndata['pv'] = th.ones(N) / N
g_toy.ndata['deg'] = g_toy.out_degrees(g_toy.nodes()).float()

In [None]:
# how to send msg
def pagerank_message_func(edges): # input is a `dgl.udf.EdgeBatch` class
    return {'pv' : edges.src['pv'] / edges.src['deg']}

# how you aggregate all received msg
def pagerank_reduce_func(nodesBatch): # input is a `dgl.udf.NodeBatch` class, which has attribute mailbox
    msgs = th.sum(nodesBatch.mailbox['pv'], dim=1)
    pv = (1 - DAMP) / N + DAMP * msgs
    return {'pv' : pv} # output is a dict

DAMP = 0.5
# register to this graph
g_toy.register_message_func(pagerank_message_func)
g_toy.register_reduce_func(pagerank_reduce_func)
g_toy.ndata['pv']

In [None]:
g_toy.send()
g_toy.recv()
# now the `pv` values have been updated
g_toy.ndata['pv']

In [None]:
# equivalently, use `update_all()` method
# use dgl built-in functions to handle msg
import dgl.function as fn
def pagerank_builtin(g):
    g.ndata['pv'] = g.ndata['pv'] / g.ndata['deg']
    g.update_all(message_func=fn.copy_src(src='pv', out='m'),
                 reduce_func=fn.sum(msg='m',out='m_sum'))
    g.ndata['pv'] = (1 - DAMP) / N + DAMP * g.ndata['m_sum']

## Graph Classification Demo

In [None]:
from dgl.data import MiniGCDataset
import matplotlib.pyplot as plt
import networkx as nx
# A dataset with 80 samples, each graph has [10, 20] number of nodes
dataset = MiniGCDataset(80, 10, 20)

In [None]:
# take a look at specific graph example
graph, label = dataset[33] # unpack the label
fig, ax = plt.subplots()
nx.draw(graph.to_networkx(), ax=ax)
ax.set_title('Class: {:d}'.format(label))
plt.show()

In [None]:
# collate function to combine multiple graphs into a batch
def collate(samples):
    # The input `samples` is a list of pairs (graph, label).
    graphs, labels = map(list, zip(*samples)) # similar to transpose
    batched_graph = dgl.batch(graphs)
    return batched_graph, th.tensor(labels)

from dgl.nn.pytorch import GraphConv
import torch.nn as nn
import torch.nn.functional as F

class Classifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_classes):
        super(Classifier, self).__init__()
        self.conv1 = GraphConv(in_dim, hidden_dim)
        self.conv2 = GraphConv(hidden_dim, hidden_dim)
        self.classify = nn.Linear(hidden_dim, n_classes)

    def forward(self, g):
        # Use node degree as the initial node feature. For undirected graphs, the in-degree
        # is the same as the out_degree.
        h = g.in_degrees().view(-1, 1).float() # change dimension
        # Perform graph convolution and activation function.
        h = F.relu(self.conv1(g, h))
        h = F.relu(self.conv2(g, h))
        g.ndata['h'] = h
        # Calculate graph representation by averaging all the node representations.
        hg = dgl.mean_nodes(g, 'h')
        return self.classify(hg)

In [None]:
import torch.optim as optim
from torch.utils.data import DataLoader

# Create training and test sets.
trainset = MiniGCDataset(320, 10, 20)
testset = MiniGCDataset(80, 10, 20)
# Use PyTorch's DataLoader and the collate function defined before.
data_loader = DataLoader(trainset, batch_size=32, shuffle=True,
                         collate_fn=collate)

# Create model
model = Classifier(1, 256, trainset.num_classes)
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()

epoch_losses = []
for epoch in range(80):
    epoch_loss = 0
    for iter, (bg, label) in enumerate(data_loader):
        prediction = model(bg)
        loss = loss_func(prediction, label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
    epoch_loss /= (iter + 1)
    print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
    epoch_losses.append(epoch_loss)

In [None]:
plt.title('cross entropy averaged over minibatches')
plt.plot(epoch_losses)
plt.show()