In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import networkx as nx
import os

try:
  import dgl
  from dgl import DGLGraph
  dgl.load_backend('pytorch')
  from dgl.nn.pytorch import conv as dgl_conv
except ImportError as e:
  os.system('pip install dgl')

try:
  from stellargraph import StellarGraph
  from stellargraph.data import EdgeSplitter
except ImportError as e:
  os.system('pip install stellargraph')

try:
  import numpy_indexed as npi
except ImportError as e:
  os.system('pip install numpy_indexed')

DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch
Using backend: pytorch


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


In [27]:
class GCNModel(nn.Module):
    def __init__(self,in_feats,n_hidden,out_dim,layer_dim,hidden_layer_dim):
        super(GCNModel, self).__init__()
        

        self.gcn_layer_1 = dgl_conv.GraphConv(in_feats, n_hidden)

        self.gcn_layer_2 = dgl_conv.GraphConv(n_hidden, out_dim)

        self.lin_layer_1 = nn.Linear(layer_dim,hidden_layer_dim)

        self.lin_layer_2 = nn.Linear(hidden_layer_dim,2)

    def forward(self, g, features, node_pairs, link_labels):
        x = features
        x = self.gcn_layer_1(g, features)
        x = F.relu(x)

        x = self.gcn_layer_2(g,x)
        x = F.relu(x)

        src = node_pairs[:,0]
        dst = node_pairs[:,1]

        emb_src = x[src]
        emb_dst = x[dst]

        z_tensor = torch.mul(emb_src,emb_dst)
        z_tensor = self.lin_layer_1(z_tensor)
        z_tensor = F.relu(z_tensor)
        z_tensor = self.lin_layer_2(z_tensor)

        return z_tensor
    
    def calculate_loss(self, features, g, node_pairs, link_labels):
      pred = self.forward(features, g, node_pairs, link_labels)
      return F.cross_entropy(pred,link_labels)
    
    def predict(self, features, g, node_pairs, link_labels):
        pred = self.forward(features, g, node_pairs, link_labels)
        loss = F.cross_entropy(pred, link_labels)
        return loss, pred


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [4]:
raw_edge_list = open('/content/drive/My Drive/Colab Notebooks/DSLab/yeast.edgelist','r')
G = nx.Graph()
G = nx.parse_edgelist(raw_edge_list, delimiter='\t', create_using=G,nodetype=str, data=(('weight', float),))
# Get graph edges and nodes from networkx graph object
nodes = G.nodes
edges = G.edges

print("Graph's Nodes : {} / Edges : {}".format(len(nodes), len(edges)))

Graph's Nodes : 6526 / Edges : 532180


In [5]:
G.remove_edges_from(nx.selfloop_edges(G))

In [6]:
G = nx.relabel.convert_node_labels_to_integers(G, first_label=0, ordering='default', label_attribute=None)

In [9]:
es_test = EdgeSplitter(G)
graph_test, examples_test, link_labels_test = es_test.train_test_split(p=0.25, method="global")

** Sampled 132623 positive and 132623 negative edges. **


In [10]:
es_train = EdgeSplitter(graph_test, G)
graph_train, examples_train, link_labels_train = es_train.train_test_split(
    p=0.25, method="global")
examples_train,examples_val, link_labels_train, link_label_val = train_test_split(examples_train,link_labels_train,test_size=0.033)

** Sampled 99468 positive and 99468 negative edges. **


In [11]:
train_nodes = list(graph_train.nodes)

In [12]:
adj = nx.adjacency_matrix(graph_train)

In [13]:
#features =np.matrix([[i, i] for i in range(adj.shape[0])])
features = np.identity(adj.shape[0])
#features = Variable(torch.FloatTensor(features), requires_grad=True)
features

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [14]:
in_feats = features.shape[0]
n_hidden = 502
out_dim =  50
weight_decay = 5e-4
n_epochs = 30
lr = 0.01
layer_dim = 50
hidden_layer_dim = 32

In [15]:
from torch.utils.data import Dataset,DataLoader

In [16]:
class EdgeDataset(Dataset):
    def __init__(self, x, y):
        super(EdgeDataset, self).__init__()
        assert x.shape[0] == y.shape[0] # assuming shape[0] = dataset size
        self.x = x
        self.y = y
    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [17]:
type(examples_train)

numpy.ndarray

In [18]:
traindata = EdgeDataset(examples_train, link_labels_train)
edge_loader = DataLoader(traindata, batch_size=6500, shuffle=True)

In [19]:
def get_edge_batch(loader=edge_loader):
  samples = []
  for i,data in enumerate(loader):
    samples.append(data)
  num_batches = len(samples)
  return num_batches,samples

In [28]:
from torch.autograd import Variable
g = DGLGraph()
g.from_networkx(graph_train)
g.readonly()
model = GCNModel(in_feats,n_hidden, out_dim,layer_dim,hidden_layer_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
num_epochs,data_batches = get_edge_batch()
print("Training for {} epochs.".format(num_epochs))


losses = []
train_accs = []
val_losses = []
val_accs = []

for epoch in range(num_epochs):
  

  pairs, edges = data_batches[epoch]
  pairs = Variable(pairs)
  edges = Variable(edges)
  
  optimizer.zero_grad()
  loss, pred = model.predict(g,torch.FloatTensor(features), pairs, edges)

  losses.append(loss)
  
  loss.backward()
  optimizer.step()

  pred = F.log_softmax(pred, dim=1)
  pred = pred.detach().to("cpu").numpy()
  pred = np.argmax(pred, axis=1)
  y_pred = []
  y_pred = np.append(y_pred, pred)
  train_acc = accuracy_score(edges.detach().to("cpu").numpy(), y_pred)
  train_accs.append(train_acc)

  #validation scores
  val_loss, val_pred = model.predict(g,torch.FloatTensor(features), torch.tensor(examples_val), torch.tensor(link_label_val))
  val_pred = F.log_softmax(val_pred, dim=1)
  val_pred = val_pred.detach().to("cpu").numpy()
  val_pred = np.argmax(val_pred, axis=1)
  val_y_pred = []
  val_y_pred = np.append(val_y_pred, val_pred)
  val_acc = accuracy_score(link_label_val, val_y_pred)
  val_accs.append(val_acc)



  print("epoch = {} train_loss = {:.4f} train_accuracy = {:.4f} val_loss = {:.4f} val_accuracy = {:.4f}".format(epoch+1,loss,train_acc,val_loss,val_acc))

Training for 30 epochs.
epoch = 1 train_loss = 0.6937 train_accuracy = 0.4975 val_loss = 0.6929 val_accuracy = 0.4973
epoch = 2 train_loss = 0.6929 train_accuracy = 0.5000 val_loss = 0.6900 val_accuracy = 0.5176
epoch = 3 train_loss = 0.6902 train_accuracy = 0.5080 val_loss = 0.6783 val_accuracy = 0.7831
epoch = 4 train_loss = 0.6779 train_accuracy = 0.7908 val_loss = 0.6625 val_accuracy = 0.6363
epoch = 5 train_loss = 0.6629 train_accuracy = 0.6318 val_loss = 0.6375 val_accuracy = 0.5944
epoch = 6 train_loss = 0.6365 train_accuracy = 0.5932 val_loss = 0.6213 val_accuracy = 0.5726
epoch = 7 train_loss = 0.6169 train_accuracy = 0.5749 val_loss = 0.6305 val_accuracy = 0.5813
epoch = 8 train_loss = 0.6334 train_accuracy = 0.5762 val_loss = 0.5979 val_accuracy = 0.6306
epoch = 9 train_loss = 0.5931 train_accuracy = 0.6386 val_loss = 0.5851 val_accuracy = 0.7412
epoch = 10 train_loss = 0.5809 train_accuracy = 0.7463 val_loss = 0.5860 val_accuracy = 0.7867
epoch = 11 train_loss = 0.5865 trai