In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [23]:
!pip install dgl



In [24]:
import dgl
from dgl import DGLGraph

# Load Pytorch as backend
dgl.load_backend('pytorch')

Using backend: pytorch


Load the rest of the necessary libraries.

In [25]:
import numpy as np

In [None]:
from dgl.nn.pytorch import conv as dgl_conv
import torch.nn.functional as F

In [53]:


class GCNModel(nn.Module):
    def __init__(self,in_feats,out_dim):
        super(GCNModel, self).__init__()
        self.layers = nn.ModuleList()

        # input layer
        self.layer1 = dgl_conv.GraphConv(in_feats, 32)
        self.dropout1 = nn.Dropout(0.3)
        self.layer2 = dgl_conv.GraphConv(32, 16)
        self.dropout2 = nn.Dropout(0.3)
        self.layer3 = dgl_conv.GraphConv(16, out_dim)

    def forward(self, g, features):
        h = self.dropout1(F.relu(self.layer1(g,features)))
        h1 = self.dropout2(F.relu(self.layer2(g,h)))
        h2 = self.layer3(g,h1)
        return h2

In [None]:
class GraphSAGEModel(nn.Module):
    def __init__(self,
                 in_feats,
                 n_hidden,
                 out_dim,
                 n_layers,
                 activation,
                 dropout,
                 aggregator_type):
        super(GraphSAGEModel, self).__init__()
        self.layers = nn.ModuleList()

        # input layer
        self.layers.append(dgl_conv.SAGEConv(in_feats, n_hidden, aggregator_type,
                                         feat_drop=dropout, activation=activation))
        
        self.layers.append(dgl_conv.SAGEConv(n_hidden, 502, aggregator_type,
                                             feat_drop=dropout, activation=activation))
        # output layer
        self.layers.append(dgl_conv.SAGEConv(502, 20, aggregator_type,
                                         feat_drop=dropout, activation=None))

    def forward(self, g, features):
        h = features
        for layer in self.layers:
            h = layer(g, h)
        return h

In [27]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
import networkx as nx

In [29]:
raw_edge_list = open('/content/drive/My Drive/Colab Notebooks/DSLab/yeast.edgelist','r')
G = nx.Graph()
G = nx.parse_edgelist(raw_edge_list, delimiter='\t', create_using=G,nodetype=str, data=(('weight', float),))
# Get graph edges and nodes from networkx graph object
nodes = G.nodes
edges = G.edges

print("Graph's Nodes : {} / Edges : {}".format(len(nodes), len(edges)))

Graph's Nodes : 6526 / Edges : 532180


In [30]:
G.remove_edges_from(nx.selfloop_edges(G))

In [None]:
!pip install stellargraph

Collecting stellargraph
[?25l  Downloading https://files.pythonhosted.org/packages/74/78/16b23ef04cf6fb24a7dea9fd0e03c8308a56681cc5efe29f16186210ba04/stellargraph-1.2.1-py3-none-any.whl (435kB)
[K     |████████████████████████████████| 440kB 3.3MB/s 
Installing collected packages: stellargraph
Successfully installed stellargraph-1.2.1


In [None]:
from stellargraph import StellarGraph
from stellargraph.data import EdgeSplitter

In [None]:
es_test = EdgeSplitter(G)
graph_test, examples_test, link_labels_test = es_test.train_test_split(
    p=0.25, method="global"
)

** Sampled 132623 positive and 132623 negative edges. **


In [None]:
from sklearn.model_selection import train_test_split
es_train = EdgeSplitter(graph_test, G)
graph_train, examples_train, link_labels_train = es_train.train_test_split(
    p=0.25, method="global"
)
examples_train,examples_val, link_labels_train, link_label_val = train_test_split(examples_train,link_labels_train,test_size=0.1)

** Sampled 99468 positive and 99468 negative edges. **


In [31]:
g = DGLGraph()
g.from_networkx(G)
g.readonly()

## Link prediction

In [32]:
adj = nx.adjacency_matrix(G)

In [37]:
features =np.matrix([[i, i] for i in range(adj.shape[0])])
#features = np.identity(adj.shape[0])
#features = Variable(torch.FloatTensor(features), requires_grad=True)

In [38]:
features

matrix([[   0,    0],
        [   1,    1],
        [   2,    2],
        ...,
        [6523, 6523],
        [6524, 6524],
        [6525, 6525]])

In [39]:
in_feats = features.shape[0]

In [40]:
# NCE loss
def NCE_loss(pos_score, neg_score, neg_sample_size):
    pos_score = F.logsigmoid(pos_score)
    neg_score = F.logsigmoid(-neg_score).reshape(-1, neg_sample_size)
    return -pos_score - torch.sum(neg_score, dim=1)

class LinkPrediction(nn.Module):
    def __init__(self, gconv_model):
        super(LinkPrediction, self).__init__()
        self.gconv_model = gconv_model

    def forward(self, g, features, neg_sample_size):
        emb = self.gconv_model(g, features)
        pos_g, neg_g = edge_sampler(g, neg_sample_size, return_false_neg=False)
        pos_score = score_func(pos_g, emb)
        neg_score = score_func(neg_g, emb)
        return torch.mean(NCE_loss(pos_score, neg_score, neg_sample_size))

In [None]:
class LinkPrediction2(nn.Module):
    def __init__(self, gconv_model):
        super(LinkPrediction2, self).__init__()
        self.gconv_model = gconv_model
        self.linear = nn.Linear(20,1)

    def forward(self, g, features, node_pairs,labels):
        emb = self.gconv_model(g, features)
        input_edges = calculate_cost(emb,node_pairs,labels)
        x = torch.sigmoid(self.linear(input_edges))
        x = x.view(x.shape[0],)
        return x

        #loss = F.binary_cross_entropy(preds,torch.FloatTensor(labels))
        #return loss

In [None]:
!pip install numpy_indexed

Collecting numpy_indexed
  Downloading https://files.pythonhosted.org/packages/4c/90/fe830d577400954db57a88f7022efef095745e1df4256ca5171d659d4177/numpy_indexed-0.3.5-py2.py3-none-any.whl
Installing collected packages: numpy-indexed
Successfully installed numpy-indexed-0.3.5


In [None]:
import numpy_indexed as npi

In [None]:
def calculate_cost(emb,node_pairs, labels):
  src_nodes = node_pairs[:,0]
  dst_nodes = node_pairs[:,1]

  nodes = np.asarray(graph_train.nodes)
  
  src_nid = npi.indices(nodes,src_nodes)
  dst_nid = npi.indices(nodes,dst_nodes)
  
  emb_src = emb[src_nid]
  emb_dst = emb[dst_nid]

  pos_emb_src = emb_src[np.where(labels==1)]
  pos_emb_src = emb_dst[np.where(labels==1)]

  neg_emb_src = emb_src[np.where(labels==0)]
  neg_emb_dst = emb_dst[np.where(labels==0)]



  
  
  #Hamdamard Product
  #emb_prod = torch.mul(emb_src,emb_dst)
  #emb_prod = torch.sum(torch.tensor(emb_prod), dim=1)

  #preds = torch.sigmoid(torch.tensor(emb_prod))

  #emb_score = torch.sum(emb_src * emb_dst,dim=1)
  emb_score = (emb_src+emb_dst)/2.0

  #emb_score = emb_score.view(emb_score.shape[0],1)

  #neg_score = torch.sum(neg_src,neg_dst,dim=1)
  return torch.sum(pos_emb_src*pos_emb_dst,dim=1),torch.sum(neg_emb_src*neg_emb_dst,dim=1)

In [41]:
def edge_sampler(g, neg_sample_size, edges=None, return_false_neg=True):
    sampler = dgl.contrib.sampling.EdgeSampler(g, batch_size=int(g.number_of_edges()/10),
                                               seed_edges=edges,
                                               neg_sample_size=neg_sample_size,
                                               negative_mode='tail',
                                               shuffle=True,
                                               return_false_neg=return_false_neg)
    sampler = iter(sampler)
    return next(sampler)

In [42]:
def score_func(g, emb):
    src_nid, dst_nid = g.all_edges(order='eid')
    # Get the node Ids in the parent graph.
    src_nid = g.parent_nid[src_nid]
    dst_nid = g.parent_nid[dst_nid]
    # Read the node embeddings of the source nodes and destination nodes.
    pos_heads = emb[src_nid]
    pos_tails = emb[dst_nid]
    # cosine similarity
    return torch.sum(pos_heads * pos_tails, dim=1)

In [43]:
from sklearn.metrics import accuracy_score

In [44]:
def LPEvaluate(gconv_model, g, features, eval_eids, neg_sample_size):
    gconv_model.eval()
    with torch.no_grad():
        emb = gconv_model(g, features)
        
        pos_g, neg_g = edge_sampler(g, neg_sample_size, eval_eids, return_false_neg=True)
        pos_score = score_func(pos_g, emb)
        neg_score = score_func(neg_g, emb).reshape(-1, neg_sample_size)
        filter_bias = neg_g.edata['false_neg'].reshape(-1, neg_sample_size)

        pos_score = F.logsigmoid(pos_score)
        neg_score = F.logsigmoid(neg_score)
        neg_score -= filter_bias.float()
        pos_score = pos_score.unsqueeze(1)
        rankings = torch.sum(neg_score >= pos_score, dim=1) + 1
        return np.mean(1.0/rankings.cpu().numpy())

In [None]:
def gcn_evaluate(link_pred_model, g, features, node_pairs_eval,labels):
    link_pred_model.eval()
    with torch.no_grad():
        val_preds = link_pred_model(g, features, node_pairs_eval,labels)
        #emb = gconv_model(g, features)
        #val_preds = calculate_cost(emb,node_pairs_eval,labels)
        val_preds = (val_preds > 0.5)
        val_preds = 1*val_preds

        return accuracy_score(labels,val_preds)

In [45]:
eids = np.random.permutation(g.number_of_edges())
train_eids = eids[:int(len(eids) * 0.8)]
valid_eids = eids[int(len(eids) * 0.8):int(len(eids) * 0.9)]
test_eids = eids[int(len(eids) * 0.9):]
train_g = g.edge_subgraph(train_eids, preserve_nodes=True)

In [54]:
#Model hyperparameters
out_dim = 2
dropout = 0.3

# create GraphSAGE model
gconv_model = GCNModel(in_feats,out_dim=out_dim)

In [None]:
#Model hyperparameters
n_hidden = 1632
n_layers = 1
dropout = 0.5
aggregator_type = 'gcn'

# create GraphSAGE model
gconv_model = GraphSAGEModel(in_feats,
                             n_hidden,
                             n_hidden,
                             n_layers,
                             F.relu,
                             dropout,
                             aggregator_type)

The training loop

In [55]:

# Model for link prediction
model = LinkPrediction(gconv_model)

# Training hyperparameters
weight_decay = 0.1
n_epochs = 30
lr = 0.005
neg_sample_size = 3

# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# initialize graph
dur = []
for epoch in range(n_epochs):
    model.train()
    loss = model(train_g, torch.FloatTensor(features), neg_sample_size)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    acc = LPEvaluate(gconv_model, g, torch.FloatTensor(features), valid_eids, neg_sample_size)
    print("Epoch {:05d} | Loss {:.4f} | MRR {:.4f}".format(epoch+1, loss.item(), acc))

RuntimeError: ignored

In [None]:
# Let's save the trained node embeddings.
acc = LPEvaluate(gconv_model, g, torch.FloatTensor(features), test_eids, neg_sample_size)
print("Test MRR {:.4f}".format(acc))

In [None]:
from torch.autograd import Variable
# Model for link prediction
model = LinkPrediction2(gconv_model)

# Training hyperparameters
weight_decay = 5e-4
n_epochs = 30
lr = 0.01
neg_sample_size = 100

dgl_graph_train = DGLGraph()
dgl_graph_train.from_networkx(graph_train)

# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# initialize graph
dur = []
for epoch in range(n_epochs):
    model.train()
    logits = model(dgl_graph_train, torch.FloatTensor(features), examples_train,link_labels_train)
    train_preds = (logits > 0.5)
    train_preds = 1*train_preds
    train_acc = accuracy_score(torch.FloatTensor(link_labels_train),train_preds)

    #loss = Variable(loss, requires_grad = True)
    loss = F.binary_cross_entropy(logits,torch.FloatTensor(link_labels_train))

    optimizer.zero_grad()
    loss.backward()
    
    
    optimizer.step()
    acc = gcn_evaluate(model, dgl_graph_train, torch.FloatTensor(features), examples_val,link_label_val)
    print("Epoch = {}|Loss = {:.4f}|train_accuracy = {:.4f}|val_accuracy = {:.4f}".format(epoch,loss.item(),train_acc,acc))

Epoch = 0|Loss = 0.6926|train_accuracy = 0.5170|val_accuracy = 0.4999
Epoch = 1|Loss = 0.6941|train_accuracy = 0.5000|val_accuracy = 0.4995
Epoch = 2|Loss = 0.6931|train_accuracy = 0.4998|val_accuracy = 0.5186


KeyboardInterrupt: ignored

In [None]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.0007, -0.0002, -0.0004,  ...,  0.0003,  0.0004,  0.0008],
         [-0.0004,  0.0005,  0.0005,  ..., -0.0006, -0.0005,  0.0006],
         [ 0.0006,  0.0001,  0.0005,  ..., -0.0004,  0.0004, -0.0004],
         ...,
         [ 0.0005,  0.0003,  0.0005,  ...,  0.0004,  0.0005, -0.0004],
         [-0.0003,  0.0005,  0.0005,  ..., -0.0006, -0.0005, -0.0001],
         [-0.0003, -0.0005,  0.0005,  ..., -0.0005,  0.0005, -0.0004]],
        requires_grad=True), Parameter containing:
 tensor([-0.0315,  0.0166, -0.0063,  ...,  0.0132, -0.0182, -0.0281],
        requires_grad=True), Parameter containing:
 tensor([[-2.4854e-03, -1.6238e-04, -2.9228e-04,  ...,  5.9227e-04,
           1.0550e-03,  4.1011e-04],
         [-1.0371e-03,  1.9347e-03,  1.1684e-03,  ..., -9.8494e-04,
           2.9430e-03,  5.2897e-04],
         [ 5.7473e-05,  4.6104e-04, -3.0452e-04,  ...,  4.8839e-04,
          -1.6132e-03, -3.8341e-04],
         ...,
         [ 3.3559e-05,  1.3606e-03,

In [None]:
link_labels_train

array([1, 1, 0, ..., 0, 0, 1])

In [None]:
# Let's save the trained node embeddings.
acc = LPEvaluate(gconv_model, g, torch.FloatTensor(features), test_eids, neg_sample_size)
print("Test MRR {:.4f}".format(acc))

Test MRR 0.1409
