In [1]:
import pandas as pd
import numpy as np

import os, sys
from copy import deepcopy

import torch as th
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

from itertools import combinations
import networkx as nx

os.environ['DGLBACKEND'] = 'pytorch'
import dgl
from dgl import function as fn

In [2]:
from dgl.nn import GraphConv, HeteroGraphConv  # type: ignore 

## dgl.heterograph(data_dict, num_nodes_dict=None, idtype=None, device=None)
Create a heterogeneous graph and return.

Parameters
data_dict (graph data) –

The dictionary data for constructing a heterogeneous graph. The keys are in the form of string triplets (src_type, edge_type, dst_type), specifying the source node, edge, and destination node types. The values are graph data in the form of (U,V), where (U[i],V[i]) forms the edge with ID i. The allowed graph data formats are:

In [24]:
# EXAMPLE

n_users = 1000
n_items = 500
n_follows = 3000
n_clicks = 5000
n_dislikes = 500
n_hetero_features = 10
n_user_classes = 5
n_max_clicks = 10

follow_src = np.random.randint(0, n_users, n_follows)
follow_dst = np.random.randint(0, n_users, n_follows)
click_src = np.random.randint(0, n_users, n_clicks)
click_dst = np.random.randint(0, n_items, n_clicks)
dislike_src = np.random.randint(0, n_users, n_dislikes)
dislike_dst = np.random.randint(0, n_items, n_dislikes)

hetero_graph = dgl.heterograph({
    ('user', 'follow', 'user'): (follow_src, follow_dst),
    ('user', 'followed-by', 'user'): (follow_dst, follow_src),
    ('user', 'click', 'item'): (click_src, click_dst),
    ('item', 'clicked-by', 'user'): (click_dst, click_src),
    ('user', 'dislike', 'item'): (dislike_src, dislike_dst),
    ('item', 'disliked-by', 'user'): (dislike_dst, dislike_src)})

hetero_graph.nodes['user'].data['feature'] = th.randn(n_users, n_hetero_features)
hetero_graph.nodes['item'].data['feature'] = th.randn(n_items, n_hetero_features)
hetero_graph.nodes['user'].data['label'] = th.randint(0, n_user_classes, (n_users,))
hetero_graph.edges['click'].data['label'] = th.randint(1, n_max_clicks, (n_clicks,)).float()
# randomly generate training masks on user nodes and click edges
hetero_graph.nodes['user'].data['train_mask'] = th.zeros(n_users, dtype=th.bool).bernoulli(0.6)
hetero_graph.edges['click'].data['train_mask'] = th.zeros(n_clicks, dtype=th.bool).bernoulli(0.6)

In [38]:
hetero_graph.nodes['user'].data['train_mask'].float().mean()

tensor(0.6190)

In [41]:
hetero_graph.ndata.keys()

dict_keys(['feature', 'label', 'train_mask'])

In [42]:
hetero_graph.etypes

['clicked-by', 'disliked-by', 'click', 'dislike', 'follow', 'followed-by']

In [61]:
hetero_graph.canonical_etypes

[('item', 'clicked-by', 'user'),
 ('item', 'disliked-by', 'user'),
 ('user', 'click', 'item'),
 ('user', 'dislike', 'item'),
 ('user', 'follow', 'user'),
 ('user', 'followed-by', 'user')]

In [62]:
hetero_graph['user','click','item']

Graph(num_nodes={'user': 1000, 'item': 500},
      num_edges={('user', 'click', 'item'): 5000},
      metagraph=[('user', 'item', 'click')])

### HeteroGraphConv

In [67]:
hetero_graph.ntypes

['item', 'user']

In [64]:
hetero_graph.ndata['feature']

{'item': tensor([[-1.3902,  1.2786, -0.4489,  ..., -0.3543,  0.2739,  1.1909],
         [ 0.7319, -0.8361,  0.7062,  ..., -1.2054, -1.0964,  1.2348],
         [-1.4620,  1.5981,  0.8273,  ...,  0.1122, -0.6016, -0.3839],
         ...,
         [-0.8961, -0.7903,  0.1389,  ..., -0.0477, -0.0211, -0.7609],
         [ 0.8304,  0.2174, -0.2034,  ..., -2.0784,  0.0956,  1.5663],
         [ 1.2691, -0.6357,  0.1227,  ...,  0.7385, -0.3411, -0.3693]]),
 'user': tensor([[ 0.0059, -0.0183, -0.3146,  ...,  0.7428,  0.4005,  1.4041],
         [-1.0220,  1.1870,  1.3856,  ..., -0.3799,  0.5323, -0.3904],
         [ 0.4420,  0.6147,  0.7522,  ..., -0.0505, -0.5075, -2.4520],
         ...,
         [ 1.5425, -0.4597, -0.3241,  ..., -1.1273, -1.2750, -0.9491],
         [-2.5829, -0.4557,  1.5716,  ..., -0.1300, -1.3180, -1.8190],
         [ 0.1802,  0.3871, -0.1857,  ..., -0.1854,  0.5063, -0.4800]])}

In [68]:
hetero_graph.etypes

['clicked-by', 'disliked-by', 'click', 'dislike', 'follow', 'followed-by']

In [69]:
hetero_graph.canonical_etypes

[('item', 'clicked-by', 'user'),
 ('item', 'disliked-by', 'user'),
 ('user', 'click', 'item'),
 ('user', 'dislike', 'item'),
 ('user', 'follow', 'user'),
 ('user', 'followed-by', 'user')]

In [70]:
hetero_graph[('user', 'click', 'item')]

Graph(num_nodes={'user': 1000, 'item': 500},
      num_edges={('user', 'click', 'item'): 5000},
      metagraph=[('user', 'item', 'click')])

In [85]:
hetero_graph.dsttypes

['item', 'user']

In [86]:
hetero_graph.srctypes

['item', 'user']

In [100]:
feat_dim = {node: feats.shape[1] for node, feats in hetero_graph.ndata['feature'].items()}
feat_dim

{'item': 10, 'user': 10}

In [81]:
in_feats = hetero_graph.ndata['feature']['user'].shape[1]
out_feats = hetero_graph.ndata['feature']['item'].shape[1]

In [103]:
conv1 = HeteroGraphConv({
            etype: GraphConv(in_feats,2) for etype in hetero_graph.etypes
            }, aggregate='sum')

In [104]:
conv1(hetero_graph,inputs={'user':hetero_graph.ndata['feature']['user'],
                            'item': hetero_graph.ndata['feature']['item']})

{'item': tensor([[ 2.7066e+00,  8.5032e-01],
         [-6.3377e-01, -1.2559e+00],
         [-2.9275e-01, -7.5024e-01],
         [ 1.9024e+00, -2.5612e-01],
         [ 7.4768e-01,  5.7439e-01],
         [-5.0223e-01, -2.6807e-01],
         [-7.0489e-01, -3.3054e-01],
         [-1.8597e-01,  1.2710e-01],
         [ 9.1076e-01, -1.7512e+00],
         [ 1.6524e-01, -5.7117e-01],
         [ 1.6481e+00, -9.0393e-01],
         [ 5.4161e-01, -1.5691e-01],
         [-1.8374e-01, -9.0231e-01],
         [ 2.6233e-01, -1.1478e-01],
         [-3.8885e-01,  1.0290e+00],
         [ 1.5064e+00,  5.9041e-01],
         [-4.7293e-01,  1.2410e+00],
         [ 1.9546e+00,  1.1443e+00],
         [-4.8884e-01,  3.6058e-01],
         [-1.3837e+00,  1.1200e-01],
         [-9.9973e-01, -1.4367e-01],
         [ 1.8729e+00, -1.4430e+00],
         [ 6.2640e-01, -1.1634e+00],
         [ 7.3308e-01, -9.6441e-01],
         [-5.4120e-02,  3.3317e-01],
         [ 4.0032e-02, -1.0339e-01],
         [-2.0861e-01,  6.6239

In [43]:
class RGCN(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats, rel_names):
        super().__init__()

        self.conv1 = HeteroGraphConv({
            rel: GraphConv(in_feats, hid_feats)
            for rel in rel_names}, aggregate='sum')
        self.conv2 = HeteroGraphConv({
            rel: GraphConv(hid_feats, out_feats)
            for rel in rel_names}, aggregate='sum')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = {k: F.relu(v) for k, v in h.items()}
        h = self.conv2(graph, h)
        return h

In [45]:
model = RGCN(n_hetero_features, 20, n_user_classes, hetero_graph.etypes)
user_feats = hetero_graph.nodes['user'].data['feature']
item_feats = hetero_graph.nodes['item'].data['feature']
labels = hetero_graph.nodes['user'].data['label']
train_mask = hetero_graph.nodes['user'].data['train_mask']

In [46]:
node_features = {'user': user_feats, 'item': item_feats}
h_dict = model(hetero_graph, {'user': user_feats, 'item': item_feats})
h_user = h_dict['user']
h_item = h_dict['item']

In [48]:
h_dict['item'].shape

torch.Size([500, 5])

In [58]:
hetero_graph.number_of_nodes('item')

500

In [49]:
h_dict['user'].shape

torch.Size([1000, 5])

In [None]:
conv1 = HeteroGraphConv({
            rel: GraphConv(in_feats, hid_feats)
            for rel in rel_names}, aggregate='sum')

In [107]:
# Case 2: Unidirectional bipartite graph
u = [0, 1, 0, 0, 1]
v = [0, 1, 2, 3, 2]

g = dgl.heterograph({('_U', '_E', '_V') : (u, v),
                     ('_V','_Z','_U'): (v,u)})  # type: ignore
u_fea = th.rand(2, 10)
v_fea = th.rand(4, 5)

g.nodes['_U'].data['feature'] = u_fea
g.nodes['_V'].data['feature'] = v_fea

In [108]:
g.ndata['feature']

{'_U': tensor([[0.6178, 0.0380, 0.2018, 0.0545, 0.0629, 0.6908, 0.4895, 0.3024, 0.9532,
          0.2960],
         [0.8435, 0.7506, 0.3210, 0.5559, 0.9857, 0.1722, 0.1887, 0.7096, 0.7330,
          0.6542]]),
 '_V': tensor([[0.1088, 0.3623, 0.4310, 0.9225, 0.7733],
         [0.5949, 0.6548, 0.0333, 0.5481, 0.7550],
         [0.1579, 0.1468, 0.3998, 0.5246, 0.5955],
         [0.8718, 0.3238, 0.1801, 0.9991, 0.5964]])}

In [117]:
conv = HeteroGraphConv({
    '_E' : GraphConv(10,5),
    '_Z' : GraphConv(5,1),
    },
    aggregate='sum')

In [118]:
conv(g, {'_U': u_fea,'_V':v_fea})

{'_U': tensor([[1.1912],
         [0.4831]], grad_fn=<SumBackward1>),
 '_V': tensor([[-0.1232,  0.5348, -0.3034, -0.3246,  0.3499],
         [-0.0290,  0.2389, -0.2739, -0.7397,  0.3943],
         [-0.1077,  0.5471, -0.4082, -0.7526,  0.5262],
         [-0.1232,  0.5348, -0.3034, -0.3246,  0.3499]], grad_fn=<SumBackward1>)}

In [19]:
# Case 2: Unidirectional bipartite graph
u = [0, 1, 0, 0, 1]
v = [0, 1, 2, 3, 2]


g = dgl.heterograph({('_U', '_E', '_V') : (u, v),
                     })  # type: ignore
u_fea = th.rand(2, 5)
v_fea = th.rand(4, 5)

In [20]:
g.etypes

['_E']

In [22]:
conv.mods['_Z'](g, (u_fea,v_fea))

tensor([[ 0.2883,  0.0416,  0.1496],
        [-0.0770, -0.5018, -0.3699],
        [ 0.1494, -0.3254, -0.1558],
        [ 0.2883,  0.0416,  0.1496]], grad_fn=<AddBackward0>)

In [28]:
conv(g, {'_U': u_fea,'_V':v_fea})

KeyError: '_U'