# Learning to use DGL for Graphs

In [70]:
import dgl
import torch

## (Directed) Homogeneous Graph using DGL

In [71]:
# in dgl, everything is 0 indexed and we can represent graphs using src->dest list
# every edge having src node id -> dest node id

src_node_ids = torch.tensor([0, 0, 0, 1, 1, 2, 2, 3])
dst_node_ids = torch.tensor([1, 2, 4, 2, 3, 3, 4, 4])


dir_h_graph = dgl.graph((src_node_ids, dst_node_ids)) # data == (Tensor, Tensor) (Src, Dest)

print(dir_h_graph)

Graph(num_nodes=5, num_edges=8,
      ndata_schemes={}
      edata_schemes={})


In [72]:
# checking for nodes and edges
dir_h_graph.nodes()


tensor([0, 1, 2, 3, 4])

In [73]:
dir_h_graph.edges()

(tensor([0, 0, 0, 1, 1, 2, 2, 3]), tensor([1, 2, 4, 2, 3, 3, 4, 4]))

In [74]:
dir_h_graph.edges(form='all') # along with the edge id's

# the third tensor refers to the Edge id's

(tensor([0, 0, 0, 1, 1, 2, 2, 3]),
 tensor([1, 2, 4, 2, 3, 3, 4, 4]),
 tensor([0, 1, 2, 3, 4, 5, 6, 7]))

---
## UnDirected (or BiDirected Graph)

In [75]:
src_node_ids = torch.tensor([0, 0, 0, 1, 1, 2, 2, 3, 1, 2, 4, 2, 3, 3, 4, 4])
dst_node_ids = torch.tensor([1, 2, 4, 2, 3, 3, 4, 4, 0, 0, 0, 1, 1, 2, 2, 3])

undir_h_graph = dgl.graph((src_node_ids, dst_node_ids)) # data == (Tensor, Tensor) (Src, Dest)

print(undir_h_graph)

Graph(num_nodes=5, num_edges=16,
      ndata_schemes={}
      edata_schemes={})


In [76]:
# applying bidir function to directed graph

undir_h_graph_new = dgl.to_bidirected(dir_h_graph)
print(undir_h_graph_new)

Graph(num_nodes=5, num_edges=16,
      ndata_schemes={}
      edata_schemes={})


---
## idtype

In [77]:
undir_h_graph.idtype # 32-bit or 64-bit tensors (default = 64-bit integer tensors)

torch.int64

In [78]:
# 32 bit integers -> 2^32 numbers : 0 to 2^32 - 1 representations
# no. of nodes exceeding this number (2^32) then we can use the 64-bit integers
# default is 64-bit integers (always good to change acc. to the number of nodes we have in our graph)

In [79]:
# Changing the type of a graph

undir_h_graph_32 = undir_h_graph.int()

undir_h_graph_32.idtype

torch.int32

In [80]:
undir_h_graph_64 = undir_h_graph_32.long()

undir_h_graph_64.idtype

torch.int64

It is good to use relevant tensors as 32 bit integers are very fast... we only use 64 bit integers when we really need it!

## Creating DGL Graphs using Python Iterables & Numpy Arrays

In [81]:
import numpy

src_node_ids = numpy.array([0, 0, 0, 1, 1, 2, 2, 3])
dst_node_ids = numpy.array([1, 2, 4, 2, 3, 3, 4, 4])

graph_numpy = dgl.graph((src_node_ids, dst_node_ids))

print(graph_numpy)

Graph(num_nodes=5, num_edges=8,
      ndata_schemes={}
      edata_schemes={})


In [82]:
graph_numpy.nodes() 

tensor([0, 1, 2, 3, 4])

giving out tensors ...

In [83]:
graph_numpy.edges()

(tensor([0, 0, 0, 1, 1, 2, 2, 3]), tensor([1, 2, 4, 2, 3, 3, 4, 4]))

even if we pass src and dst as numpy arrays, internally dgl graph method is converting them to torch tensors

(Tensor types are generally preferred throughout DGL) due to :
- Efficient internal storage in C and
- Explicit data type and device context information


### Python List

In [84]:
src_node_ids = [0, 0, 0, 1, 1, 2, 2, 3] # Python List
dst_node_ids = [1, 2, 4, 2, 3, 3, 4, 4] # Python List

graph_python = dgl.graph((src_node_ids, dst_node_ids))

print(graph_python)

Graph(num_nodes=5, num_edges=8,
      ndata_schemes={}
      edata_schemes={})


---

## Isolated Nodes in the Graphs

In [85]:
# How do we create such a graph that has isolated nodes

In [86]:
src_node_ids = torch.tensor([0, 0, 0, 1, 1, 2, 2, 3])
dst_node_ids = torch.tensor([1, 2, 4, 2, 3, 3, 4, 4])

graph = dgl.graph((src_node_ids, dst_node_ids))

print(graph)

Graph(num_nodes=5, num_edges=8,
      ndata_schemes={}
      edata_schemes={})


---
## Adding Node and Edge Features

ndata and edata schemes

In [87]:
src_node = torch.tensor([0, 1, 1, 2, 3])
dst_node = torch.tensor([2, 0, 2, 3, 3])

graph = dgl.graph((src_node, dst_node))

graph

Graph(num_nodes=4, num_edges=5,
      ndata_schemes={}
      edata_schemes={})

In [88]:
graph.ndata # empty as we didn't add any data to the nodes

{}

In [89]:
graph.nodes()

tensor([0, 1, 2, 3])

In [90]:
graph.ndata['node_vectors'] = torch.tensor([[1, 2, 3],
                                            [2, 4, 5],
                                            [3, 5, 6],
                                            [7, 8, 9]]) # new ndata with node_vectors


In [91]:
graph

Graph(num_nodes=4, num_edges=5,
      ndata_schemes={'node_vectors': Scheme(shape=(3,), dtype=torch.int64)}
      edata_schemes={})

In [92]:
graph.ndata['node_vectors']

tensor([[1, 2, 3],
        [2, 4, 5],
        [3, 5, 6],
        [7, 8, 9]])

In [93]:
graph.ndata['node_vectors'].shape

torch.Size([4, 3])

Edge Features

In [94]:
graph.edges(form = 'all')

(tensor([0, 1, 1, 2, 3]), tensor([2, 0, 2, 3, 3]), tensor([0, 1, 2, 3, 4]))

In [95]:
graph.edata # empty for now

{}

In [96]:
graph.edata['edge_weights'] = torch.tensor([100, 200, 300, 400, 500])

In [97]:
graph

Graph(num_nodes=4, num_edges=5,
      ndata_schemes={'node_vectors': Scheme(shape=(3,), dtype=torch.int64)}
      edata_schemes={'edge_weights': Scheme(shape=(), dtype=torch.int64)})

In [98]:
graph.edata['edge_weights']

tensor([100, 200, 300, 400, 500])

---

# Heterogeneous Graph

In [99]:
# different types of nodes and edges in the same graph

# making a directed heterogeneous graph

# triplets (src_type, edge_type, dst_type)

# {(src_type, edge_type, dst_type) : (src_node_id_tensor, dst_node_id_tensor), () : (), () : ()}

data_dict = {
    ('user', 'watches', 'movie') : (torch.tensor([0, 0, 1, 2]), torch.tensor([0, 1, 0, 1])),
    ('director', 'directs', 'movie') : (torch.tensor([0, 1]), torch.tensor([1, 0]))
}

(data_dict)



{('user', 'watches', 'movie'): (tensor([0, 0, 1, 2]), tensor([0, 1, 0, 1])),
 ('director', 'directs', 'movie'): (tensor([0, 1]), tensor([1, 0]))}

In [100]:
hetero_graph = dgl.heterograph(data_dict)
hetero_graph

Graph(num_nodes={'director': 2, 'movie': 2, 'user': 3},
      num_edges={('director', 'directs', 'movie'): 2, ('user', 'watches', 'movie'): 4},
      metagraph=[('director', 'movie', 'directs'), ('user', 'movie', 'watches')])

---
## UnDirected (BiDirectional) Heterogeneous Graph

In [101]:
hetero_graph.ntypes

['director', 'movie', 'user']

In [102]:
hetero_graph.nodes(ntype = 'user')

tensor([0, 1, 2])

In [103]:
hetero_graph.nodes(ntype = 'movie')

tensor([0, 1])

In [104]:
hetero_graph.nodes(ntype = 'director')

tensor([0, 1])

In [105]:
hetero_graph.etypes

['directs', 'watches']

In [106]:
hetero_graph.edges(etype = 'watches')

(tensor([0, 0, 1, 2]), tensor([0, 1, 0, 1]))

In [107]:
bidir_data_dict = {
    ('user', 'watches', 'movie') : (torch.tensor([0, 0, 1, 2]), torch.tensor([0, 1, 0, 1])),
    ('movie', 'watched_by', 'user') : (torch.tensor([0, 1, 0, 1]), torch.tensor([0, 0, 1, 2])),
    ('director', 'directs', 'movie') : (torch.tensor([0, 1]), torch.tensor([1, 0])),
    ('movie', 'directed_by', 'director') : (torch.tensor([1, 0]), torch.tensor([0, 1]))
}

bidir_data_dict

{('user', 'watches', 'movie'): (tensor([0, 0, 1, 2]), tensor([0, 1, 0, 1])),
 ('movie', 'watched_by', 'user'): (tensor([0, 1, 0, 1]), tensor([0, 0, 1, 2])),
 ('director', 'directs', 'movie'): (tensor([0, 1]), tensor([1, 0])),
 ('movie', 'directed_by', 'director'): (tensor([1, 0]), tensor([0, 1]))}

In [108]:
bidirected_hetero_graph = dgl.heterograph(bidir_data_dict)

bidirected_hetero_graph

Graph(num_nodes={'director': 2, 'movie': 2, 'user': 3},
      num_edges={('director', 'directs', 'movie'): 2, ('movie', 'directed_by', 'director'): 2, ('movie', 'watched_by', 'user'): 4, ('user', 'watches', 'movie'): 4},
      metagraph=[('director', 'movie', 'directs'), ('movie', 'director', 'directed_by'), ('movie', 'user', 'watched_by'), ('user', 'movie', 'watches')])

In [109]:
total_num_edges = 0
for etype in bidirected_hetero_graph.etypes:
    total_num_edges += (bidirected_hetero_graph.num_edges(etype=etype))
    
print(total_num_edges)

12


---
## Adding Node and Edge Features to Heterogeneous Graphs

In [110]:
hetero_graph.ntypes

['director', 'movie', 'user']

In [111]:

hetero_graph.ndata['node_vector'] = {
    'user': torch.ones(3, 2),
    'movie': torch.ones(2, 3),
    'director': torch.ones(2, 2)
}
# for users, we make 3 2-dim tensors


In [112]:
hetero_graph.ndata['node_vector']

{'director': tensor([[1., 1.],
         [1., 1.]]),
 'movie': tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 'user': tensor([[1., 1.],
         [1., 1.],
         [1., 1.]])}

In [113]:
hetero_graph.etypes

['directs', 'watches']

In [114]:
hetero_graph.num_edges('watches')

4

In [115]:
hetero_graph.num_edges('directs')

2

In [116]:
hetero_graph.edata['edge_weight'] = {'directs': torch.ones(2, 1),
                                    'watches': torch.ones(4, 1)}

In [117]:
hetero_graph.edata['edge_weight']

{('director',
  'directs',
  'movie'): tensor([[1.],
         [1.]]),
 ('user',
  'watches',
  'movie'): tensor([[1.],
         [1.],
         [1.],
         [1.]])}

## Using Nodes and Edges to add features 
### (Method 2)

In [118]:
del hetero_graph.ndata['node_vector']

In [119]:
del hetero_graph.edata['edge_weight']

In [120]:
hetero_graph.nodes(ntype='user')

tensor([0, 1, 2])

In [121]:
hetero_graph.nodes['user'] # it's an array-space

NodeSpace(data={})

In [122]:
hetero_graph.nodes['user'].data # similar to what? #(here we add the data at node level not at the feature level) 

{}

In [123]:
hetero_graph.ndata['user'] # similar to this! # with this we added data at feature level

{}

In [124]:
hetero_graph.nodes['user'].data['node_vectors'] = torch.ones(3, 2)
hetero_graph.nodes['movie'].data['node_vectors'] = torch.ones(2, 3)
hetero_graph.nodes['director'].data['node_vectors'] = torch.ones(2, 2)

hetero_graph.ndata

defaultdict(<class 'dict'>, {'node_vectors': {'director': tensor([[1., 1.],
        [1., 1.]]), 'movie': tensor([[1., 1., 1.],
        [1., 1., 1.]]), 'user': tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])}})

In [125]:
hetero_graph.ndata['node_vectors']

{'director': tensor([[1., 1.],
         [1., 1.]]),
 'movie': tensor([[1., 1., 1.],
         [1., 1., 1.]]),
 'user': tensor([[1., 1.],
         [1., 1.],
         [1., 1.]])}

In [126]:
hetero_graph.edges['watches'].data['edge_weight'] = torch.ones(4, 1)
hetero_graph.edges['directs'].data['edge_weight'] = torch.ones(2, 1)

In [127]:
hetero_graph.edata['edge_weight']

{('director',
  'directs',
  'movie'): tensor([[1.],
         [1.]]),
 ('user',
  'watches',
  'movie'): tensor([[1.],
         [1.],
         [1.],
         [1.]])}

In [129]:
hetero_graph.nodes['user'].data['node_vectors']

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [131]:
hetero_graph.edges['watches'].data['edge_weight']

tensor([[1.],
        [1.],
        [1.],
        [1.]])