# Semi-supervised node classification using Heterogenous Graph Neural Networks


In this tutorial, you will learn:

* Build a relational graph neural network model, a popular GNN architecture proposed by [Schlichtkrull et al.](https://arxiv.org/abs/1703.06103)
* Train the model and understand the result.

In [23]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import itertools
import numpy as np
import scipy.sparse as sp
import pandas as pd
import random

We first load the graph and node labels as is covered in the [last session](./1_load_data.ipynb). Here, we have provided you a function for loading the data.

In [24]:
from dgl.data.rdf import AMDataset



In [31]:
node_features = pd.read_csv("mag_small/node-feat.csv").values[:,1:].squeeze()

node_labels = pd.read_csv("mag_small/node-label.csv").values[:,1:].squeeze()

author_write_paper = pd.read_csv("mag_small/author_write_paper_edge.csv")
author_affiliated_with_institution = pd.read_csv("mag_small/author_affiliated_with_institution_edge.csv")
paper_cites_paper = pd.read_csv("mag_small/paper_cites_paper_edge.csv")
paper_has_topic_field_of_study = pd.read_csv("mag_small/paper_has_topic_field_of_study_edge.csv")

edges = {
    ('author', 'affiliated_with', 'institution'): list(author_affiliated_with_institution.itertuples(index=False)),
    ('author', 'writes', 'paper'): list(author_write_paper.itertuples(index=False)),
    ('paper', 'cites', 'paper'): list(paper_cites_paper.itertuples(index=False)),
    ('paper', 'has_topic', 'field_of_study'): list(paper_has_topic_field_of_study.itertuples(index=False)),
}
g = dgl.heterograph(edges)
print(g)
gpu=-1
category = 'paper'
num_classes = len(np.unique(node_labels))
train_pct=0.9
ids=list(range(len(node_labels)))
random.shuffle(ids)
node_features=torch.tensor(node_features).float()
train_idx = torch.tensor(ids[:round(train_pct*len(ids))])
test_idx = torch.tensor(ids[round(train_pct*len(ids)):])
trans_id={}
c=0
for i in node_labels:
    if i not in trans_id:
        trans_id[i]=c
        c+=1
n_labels=[]
for i in node_labels:
    n_labels.append(trans_id[i])
node_labels=n_labels
labels = torch.tensor(node_labels)
category_id = len(g.ntypes)
for i, ntype in enumerate(g.ntypes):
        if ntype == category:
            category_id = i

# split dataset into train, validate, test
val_idx = train_idx[:len(train_idx) // 8]
train_idx = train_idx[len(train_idx) // 8:]

# check cuda
use_cuda = gpu >= 0 and torch.cuda.is_available()
if use_cuda:
        torch.cuda.set_device(gpu)
        g = g.to('cuda:%d' % gpu)
        labels = labels.cuda()
        train_idx = train_idx.cuda()
        test_idx = test_idx.cuda()

Graph(num_nodes={'author': 1579, 'field_of_study': 584, 'institution': 421, 'paper': 99},
      num_edges={('author', 'affiliated_with', 'institution'): 2312, ('author', 'writes', 'paper'): 1790, ('paper', 'cites', 'paper'): 197, ('paper', 'has_topic', 'field_of_study'): 959},
      metagraph=[('author', 'institution', 'affiliated_with'), ('author', 'paper', 'writes'), ('paper', 'paper', 'cites'), ('paper', 'field_of_study', 'has_topic')])


## Define a HeteroGraphConv model

HeteroGraphConv is a module-level encapsulation to run DGL NN module on heterogeneous graphs. The implementation logic is the same as message passing level API multi_update_all(), including:

DGL NN module within each relation 𝑟.

Reduction that merges the results on the same node type from multiple relations.

$$
h_{dst}^{(l+1)} = \underset{r\in\mathcal{R}, r_{dst}=dst}{AGG} (f_r(g_r, h_{r_{src}}^l, h_{r_{dst}}^l))$$

https://docs.dgl.ai/guide/nn-heterograph.html?highlight=heterogenous%20graphs


If your graph is heterogeneous, you may want to gather message from neighbors along all edge types. You can use the module dgl.nn.pytorch.HeteroGraphConv (also available in MXNet and Tensorflow) to perform message passing on all edge types, then combining different graph convolution modules for each edge type.

The following code will define a heterogeneous graph convolution module that first performs a separate graph convolution on each edge type, then sums the message aggregations on each edge type as the final result for all node types.

dgl.nn.HeteroGraphConv takes in a dictionary of node types and node feature tensors as input, and returns another dictionary of node types and node features.



In [32]:
# ----------- 2. create model -------------- #
# build a two-layer RGCN model
import dgl.nn as dglnn

class RGCN(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, rel_names):
        super().__init__()

        self.conv1 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(in_feats, hid_feats)
            for rel in rel_names}, aggregate='sum')
        self.conv2 = dglnn.HeteroGraphConv({
            rel: dglnn.GraphConv(hid_feats, out_feats)
            for rel in rel_names}, aggregate='sum')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = {k: F.relu(v) for k, v in h.items()}
        h = self.conv2(graph, h)
        return h
class NodeEmbed(nn.Module):
    def __init__(self, num_nodes, embed_size):
        super(NodeEmbed, self).__init__()
        self.embed_size = embed_size
        self.node_embeds = nn.ParameterDict()
        self.num_nodes=num_nodes
        for ntype in num_nodes:
            embed = nn.Parameter(torch.Tensor(g.number_of_nodes(ntype), self.embed_size))
            nn.init.xavier_uniform_(embed, gain=nn.init.calculate_gain('relu'))
            self.node_embeds[ntype] = embed
    
    def forward(self):

        return self.node_embeds
    



In [33]:
num_nodes = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes}
device = "cuda" if torch.cuda.is_available() else "cpu"
device="cpu"
h_embed=128
embed = NodeEmbed(num_nodes, h_embed).to(device)
ntype_with_features='paper'
h_hidden=16
model = RGCN(h_embed, h_hidden, num_classes,g.etypes).to(device)
opt = torch.optim.Adam(model.parameters())
print(model)

RGCN(
  (conv1): HeteroGraphConv(
    (mods): ModuleDict(
      (affiliated_with): GraphConv(in=128, out=16, normalization=both, activation=None)
      (writes): GraphConv(in=128, out=16, normalization=both, activation=None)
      (cites): GraphConv(in=128, out=16, normalization=both, activation=None)
      (has_topic): GraphConv(in=128, out=16, normalization=both, activation=None)
    )
  )
  (conv2): HeteroGraphConv(
    (mods): ModuleDict(
      (affiliated_with): GraphConv(in=16, out=46, normalization=both, activation=None)
      (writes): GraphConv(in=16, out=46, normalization=both, activation=None)
      (cites): GraphConv(in=16, out=46, normalization=both, activation=None)
      (has_topic): GraphConv(in=16, out=46, normalization=both, activation=None)
    )
  )
)


In [34]:
# ----------- 3. set up loss and optimizer -------------- #
# in this case, loss will in training loop
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), embed.parameters()), lr=0.01)

# ----------- 4. training -------------------------------- #
all_logits = []
for e in range(50):
    # forward
    # Get node embeddings for node types that don't have input features and copy to gpu
    embeddings = {ntype: node_embedding.cuda().to(device) for ntype, node_embedding in embed().items() if ntype!=ntype_with_features}
            
    # Get input features for node type 'paper' which has input features
    embeds = {'paper': node_features.to(device)}
            
    # Merge feature inputs with input that has features
    embeds.update(embeddings)
    logits= model(g,embeds)[category]
    
    # compute loss
    loss = F.cross_entropy(logits[train_idx], labels[train_idx])
    
    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    all_logits.append(logits.detach())
    
    if e % 5 == 0:
        train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx]).item() / len(train_idx)
        val_loss = F.cross_entropy(logits[val_idx], labels[val_idx])
        val_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx)
        print("Epoch {:05d} | Train Acc: {:.4f} | Train Loss: {:.4f} | Valid Acc: {:.4f} | Valid loss: {:.4f}".
              format(e, train_acc, loss.item(), val_acc, val_loss.item()))

Epoch 00000 | Train Acc: 0.0128 | Train Loss: 3.8620 | Valid Acc: 0.0000 | Valid loss: 3.8624
Epoch 00005 | Train Acc: 0.2564 | Train Loss: 3.1242 | Valid Acc: 0.0909 | Valid loss: 4.1033
Epoch 00010 | Train Acc: 0.3974 | Train Loss: 2.2899 | Valid Acc: 0.0909 | Valid loss: 4.7368
Epoch 00015 | Train Acc: 0.5897 | Train Loss: 1.5503 | Valid Acc: 0.0000 | Valid loss: 6.0581
Epoch 00020 | Train Acc: 0.7949 | Train Loss: 1.0077 | Valid Acc: 0.0000 | Valid loss: 7.9902
Epoch 00025 | Train Acc: 0.8205 | Train Loss: 0.7557 | Valid Acc: 0.0000 | Valid loss: 10.1148
Epoch 00030 | Train Acc: 0.8462 | Train Loss: 0.6441 | Valid Acc: 0.0000 | Valid loss: 12.1000
Epoch 00035 | Train Acc: 0.8462 | Train Loss: 0.6067 | Valid Acc: 0.0000 | Valid loss: 13.7175
Epoch 00040 | Train Acc: 0.8462 | Train Loss: 0.5891 | Valid Acc: 0.0000 | Valid loss: 14.9389
Epoch 00045 | Train Acc: 0.8462 | Train Loss: 0.5781 | Valid Acc: 0.0000 | Valid loss: 15.7831


In [35]:
# ----------- 5. check results ------------------------ #
    model.eval()
    embed.eval()
    embeds = embed()
    logits= model.forward(g,embeds)[category]
    test_loss = F.cross_entropy(logits[test_idx], labels[test_idx])
    test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx)
    print("Test Acc: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss.item()))
    print()

Test Acc: 0.0000 | Test loss: 12.4099

