In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import networkx as nx
import os

try:
  import dgl
  from dgl import DGLGraph
  dgl.load_backend('pytorch')
  from dgl.nn.pytorch import conv as dgl_conv
except ImportError as e:
  os.system('pip install dgl')

try:
  from stellargraph import StellarGraph
  from stellargraph.data import EdgeSplitter
except ImportError as e:
  os.system('pip install stellargraph')

try:
  import numpy_indexed as npi
except ImportError as e:
  os.system('pip install numpy_indexed')

Using backend: pytorch
Using backend: pytorch


In [2]:
class GCNModel(nn.Module):
    def __init__(self,in_feats,n_hidden,out_dim,layer_dim,hidden_layer_dim):
        super(GCNModel, self).__init__()
        

        self.gcn_layer_1 = dgl_conv.GraphConv(in_feats, n_hidden)

        self.gcn_layer_2 = dgl_conv.GraphConv(n_hidden, out_dim)

        self.lin_layer_1 = nn.Linear(layer_dim,hidden_layer_dim)

        self.lin_layer_2 = nn.Linear(hidden_layer_dim,2)

    def forward(self, g, features, node_pairs, link_labels):
        x = features
        x = self.gcn_layer_1(g, features)
        x = F.relu(x)

        x = self.gcn_layer_2(g,x)
        x = F.relu(x)

        src = node_pairs[:,0]
        dst = node_pairs[:,1]

        emb_src = x[src]
        emb_dst = x[dst]

        z_tensor = torch.mul(emb_src,emb_dst)
        z_tensor = self.lin_layer_1(z_tensor)
        z_tensor = F.relu(z_tensor)
        z_tensor = self.lin_layer_2(z_tensor)

        return z_tensor
    
    def calculate_loss(self, features, g, node_pairs, link_labels):
      pred = self.forward(features, g, node_pairs, link_labels)
      return F.cross_entropy(pred,link_labels)
    
    def predict(self, features, g, node_pairs, link_labels):
        pred = self.forward(features, g, node_pairs, link_labels)
        loss = F.cross_entropy(pred, link_labels)
        return loss, pred


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
raw_edge_list = open('/content/drive/My Drive/Colab Notebooks/DSLab/yeast.edgelist','r')
G = nx.Graph()
G = nx.parse_edgelist(raw_edge_list, delimiter='\t', create_using=G,nodetype=str, data=(('weight', float),))
# Get graph edges and nodes from networkx graph object
nodes = G.nodes
edges = G.edges

print("Graph's Nodes : {} / Edges : {}".format(len(nodes), len(edges)))

Graph's Nodes : 6526 / Edges : 532180


In [5]:
G.remove_edges_from(nx.selfloop_edges(G))

In [6]:
G = nx.relabel.convert_node_labels_to_integers(G, first_label=0, ordering='default', label_attribute=None)

In [7]:
es_test = EdgeSplitter(G)
graph_test, examples_test, link_labels_test = es_test.train_test_split(p=0.25, method="global")

** Sampled 132623 positive and 132623 negative edges. **


In [8]:
es_train = EdgeSplitter(graph_test, G)
graph_train, examples_train, link_labels_train = es_train.train_test_split(
    p=0.25, method="global")
examples_train,examples_val, link_labels_train, link_label_val = train_test_split(examples_train,link_labels_train,test_size=0.033)

** Sampled 99468 positive and 99468 negative edges. **


In [9]:
train_nodes = list(graph_train.nodes)

In [10]:
adj = nx.adjacency_matrix(graph_train)

In [11]:
#features =np.matrix([[i, i] for i in range(adj.shape[0])])
features = np.identity(adj.shape[0])
#features = Variable(torch.FloatTensor(features), requires_grad=True)
features

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [12]:
in_feats = features.shape[0]
n_hidden = 502
out_dim =  50
weight_decay = 5e-4
n_epochs = 30
lr = 0.01
layer_dim = 50
hidden_layer_dim = 32

In [13]:
from torch.utils.data import Dataset,DataLoader

In [14]:
class EdgeDataset(Dataset):
    def __init__(self, x, y):
        super(EdgeDataset, self).__init__()
        assert x.shape[0] == y.shape[0] # assuming shape[0] = dataset size
        self.x = x
        self.y = y
    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

In [15]:
type(examples_train)

numpy.ndarray

In [16]:
traindata = EdgeDataset(examples_train, link_labels_train)
edge_loader = DataLoader(traindata, batch_size=6500, shuffle=True)

In [17]:
def get_edge_batch(loader=edge_loader):
  samples = []
  for i,data in enumerate(loader):
    samples.append(data)
  num_batches = len(samples)
  return num_batches,samples

In [None]:
from torch.autograd import Variable
g = DGLGraph()
g.from_networkx(graph_train)
g.readonly()
model = GCNModel(in_feats,n_hidden, out_dim,layer_dim,hidden_layer_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
num_epochs,data_batches = get_edge_batch()
print("Training for {} epochs.".format(num_epochs))


losses = []
train_accs = []
val_losses = []
val_accs = []

for epoch in range(num_epochs):
  

  pairs, edges = data_batches[epoch]
  pairs = Variable(pairs)
  edges = Variable(edges)
  optimizer.zero_grad()
  loss, pred = model.predict(g,torch.FloatTensor(features), pairs, edges)
  losses.append(loss)
  
  loss.backward()
  optimizer.step()

  pred = F.log_softmax(pred, dim=1)
  pred = pred.detach().to("cpu").numpy()
  pred = np.argmax(pred, axis=1)
  y_pred = []
  y_pred = np.append(y_pred, pred)
  train_acc = accuracy_score(edges.detach().to("cpu").numpy(), y_pred)
  train_accs.append(train_acc)

  #validation scores
  #val_loss, val_pred = model.predict(g,torch.FloatTensor(features), examples_val, torch.Tensor(link_label_val))
  #val_pred = F.log_softmax(val_pred, dim=1)
  #val_pred = val_pred.detach().to("cpu").numpy()
  #val_pred = np.argmax(val_pred, axis=1)
  #val_y_pred = []
  #val_y_pred = np.append(val_y_pred, val_pred)
  #val_acc = accuracy_score(link_label_val, val_y_pred)
  #val_accs.append(val_acc)



  print("epoch = {} train_loss = {:.4f} train_accuracy = {:.4f}".format(epoch+1,loss,train_acc))

Training for 30 epochs.
epoch = 1 train_loss = 0.6940 train_accuracy = 0.4934
epoch = 2 train_loss = 0.6927 train_accuracy = 0.5009
epoch = 3 train_loss = 0.6890 train_accuracy = 0.5009
epoch = 4 train_loss = 0.6756 train_accuracy = 0.4934
epoch = 5 train_loss = 0.6424 train_accuracy = 0.4969
epoch = 6 train_loss = 0.6399 train_accuracy = 0.4968
epoch = 7 train_loss = 0.6152 train_accuracy = 0.5232
epoch = 8 train_loss = 0.6109 train_accuracy = 0.5657
epoch = 9 train_loss = 0.6049 train_accuracy = 0.6252
epoch = 10 train_loss = 0.5970 train_accuracy = 0.6751
epoch = 11 train_loss = 0.5849 train_accuracy = 0.6680
epoch = 12 train_loss = 0.5849 train_accuracy = 0.6660
epoch = 13 train_loss = 0.5719 train_accuracy = 0.7077
epoch = 14 train_loss = 0.5560 train_accuracy = 0.7632
epoch = 15 train_loss = 0.5588 train_accuracy = 0.7795
epoch = 16 train_loss = 0.5403 train_accuracy = 0.7797
epoch = 17 train_loss = 0.5236 train_accuracy = 0.7792
epoch = 18 train_loss = 0.5365 train_accuracy = 0.