In [3]:
from sklearn.metrics import roc_auc_score

In [4]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F

import itertools
import numpy as np
import scipy.sparse as sp

Using backend: pytorch


In [5]:
dataset = dgl.data.CoraGraphDataset()
g = dataset[0]

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [76]:
sum(g.ndata['feat'][0])

tensor(1.)

In [10]:
u,v = g.edges()

In [5]:
g.edges()

(tensor([   0,    0,    0,  ..., 2707, 2707, 2707]),
 tensor([ 633, 1862, 2582,  ...,  598, 1473, 2706]))

In [6]:
eids = np.arange(g.number_of_edges())
np.random.shuffle(eids)

In [7]:
test_size = int(len(eids) * 0.1)
train_size = g.number_of_edges() - test_size

In [8]:
train_size, test_size

(9501, 1055)

In [12]:
u, v

(tensor([   0,    0,    0,  ..., 2707, 2707, 2707]),
 tensor([ 633, 1862, 2582,  ...,  598, 1473, 2706]))

In [13]:
eids[test_size:]

array([ 652, 9877, 7477, ..., 2150, 5298, 9587])

In [9]:
test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]


NameError: name 'u' is not defined

In [10]:
len(train_pos_u), len(test_pos_u)

(9501, 1055)

In [14]:
adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))

In [15]:
adj.todense()

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 1., 0.]])

In [13]:
sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))

<2708x2708 sparse matrix of type '<class 'numpy.float64'>'
	with 10556 stored elements in COOrdinate format>

In [14]:
np.ones(len(u))

array([1., 1., 1., ..., 1., 1., 1.])

In [16]:
adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())

In [18]:
adj_neg


matrix([[0., 1., 1., ..., 1., 1., 1.],
        [1., 0., 0., ..., 1., 1., 1.],
        [1., 0., 0., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 0., 1., 1.],
        [1., 1., 1., ..., 1., 0., 0.],
        [1., 1., 1., ..., 1., 0., 0.]])

In [16]:
len(adj_neg[0][0]), sum(adj_neg[0])

(1, matrix([[0., 1., 1., ..., 1., 1., 1.]]))

In [17]:
neg_u, neg_v = np.where(adj_neg != 0)


In [18]:
neg_u, neg_v

(array([   0,    0,    0, ..., 2707, 2707, 2707]),
 array([   1,    2,    3, ..., 2703, 2704, 2705]))

In [19]:
train_pos_u, train_pos_v

(tensor([1705,  880, 1966,  ..., 1358, 1868,  651]),
 tensor([1624, 1013, 1964,  ...,  156,  766,  885]))

In [20]:
train_pos_v[train_pos_u == 0]

tensor([2582, 1862,  633])

In [21]:
neg_v[neg_u==0]

array([   1,    2,    3, ..., 2705, 2706, 2707])

In [22]:
len(neg_u)

7320000

In [23]:
neg_eids = np.random.choice(len(neg_u), g.number_of_edges() // 2)


In [24]:
neg_eids

array([3896589, 2414289, 3228207, ..., 1732409, 1255468, 6776960])

In [25]:
test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]

In [26]:
len(train_pos_u), len(train_pos_v), len(train_neg_u), len(train_neg_v)

(9501, 9501, 4223, 4223)

In [27]:
g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [28]:
train_g = dgl.remove_edges(g, eids[:test_size])

In [29]:
train_g

Graph(num_nodes=2708, num_edges=9501,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [30]:
from dgl.nn import SAGEConv

In [31]:
class GraphSage(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSage, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')
        
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [32]:
train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())

In [33]:
train_pos_g, train_neg_g

(Graph(num_nodes=2708, num_edges=9501,
       ndata_schemes={}
       edata_schemes={}),
 Graph(num_nodes=2708, num_edges=4223,
       ndata_schemes={}
       edata_schemes={}))

In [34]:
test_pos_g, test_neg_g

(Graph(num_nodes=2708, num_edges=1055,
       ndata_schemes={}
       edata_schemes={}),
 Graph(num_nodes=2708, num_edges=1055,
       ndata_schemes={}
       edata_schemes={}))

In [35]:
import dgl.function as fn

In [36]:
class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            g.edata['score'] = F.sigmoid(g.edata['score'])
            return g.edata['score'][:, 0]
        

In [37]:
model = GraphSage(train_g.ndata['feat'].shape[1], 16)

In [38]:
pred = DotPredictor()

In [39]:
def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return F.binary_cross_entropy_with_logits(scores, labels)

In [40]:
def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return roc_auc_score(labels, scores)

In [41]:
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=0.01)

In [44]:
for e in range(100):
    h = model(train_g, train_g.ndata['feat'])
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    
    loss = compute_loss(pos_score, neg_score)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e%5 == 0:
        print('In epoch {}, loss: {}'.format(e, loss))

In epoch 0, loss: 0.6926150918006897
In epoch 5, loss: 0.6198869943618774
In epoch 10, loss: 0.6056978106498718
In epoch 15, loss: 0.5833507776260376
In epoch 20, loss: 0.5607094168663025
In epoch 25, loss: 0.5276073813438416
In epoch 30, loss: 0.4741060733795166
In epoch 35, loss: 0.41713669896125793
In epoch 40, loss: 0.38543450832366943
In epoch 45, loss: 0.35289448499679565
In epoch 50, loss: 0.3290104568004608
In epoch 55, loss: 0.30139583349227905
In epoch 60, loss: 0.2778523564338684
In epoch 65, loss: 0.25896456837654114
In epoch 70, loss: 0.24114464223384857
In epoch 75, loss: 0.2232905924320221
In epoch 80, loss: 0.2067914456129074
In epoch 85, loss: 0.19017000496387482
In epoch 90, loss: 0.17374178767204285
In epoch 95, loss: 0.15748628973960876


In [60]:
def compute_auc(pos_score, neg_score):
    scores = F.sigmoid(torch.cat([pos_score, neg_score])).numpy()
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    
#     print(scores[0], labels[0])
    return roc_auc_score(labels, scores)

In [61]:
with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    neg_score = pred(test_neg_g, h)
    print('AUC', compute_auc(pos_score, neg_score))

AUC 0.8471422474787179




In [65]:
model

GraphSage(
  (conv1): SAGEConv(
    (feat_drop): Dropout(p=0.0, inplace=False)
    (fc_self): Linear(in_features=1433, out_features=16, bias=False)
    (fc_neigh): Linear(in_features=1433, out_features=16, bias=False)
  )
  (conv2): SAGEConv(
    (feat_drop): Dropout(p=0.0, inplace=False)
    (fc_self): Linear(in_features=16, out_features=16, bias=False)
    (fc_neigh): Linear(in_features=16, out_features=16, bias=False)
  )
)

In [3]:
import torch as th

In [4]:
g = dgl.heterograph({
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
   ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
})
g

Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')])

In [24]:
g.nodes['drug'].data

{}

In [6]:
sub_g = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'),
                                   ('drug', 'interacts', 'gene')])

In [7]:
sub_g

Graph(num_nodes={'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts')])

In [17]:
sub_g.

<bound method DGLHeteroGraph.num_nodes of Graph(num_nodes={'drug': 3, 'gene': 4},
      num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2},
      metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts')])>

In [None]:
g = dgl.heterograph({
   ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
   ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
   ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
})
sub_g = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'),
                                   ('drug', 'interacts', 'gene')])
h_sub_g = dgl.to_homogeneous(sub_g)
h_sub_g