In [63]:
import numpy as np 
import torch 
import torch.nn as nn 
import torch.optim as optim 
import scipy.sparse as sp 
import dgl
from dgl.nn import GraphConv
import dgl.function as fn 
import matplotlib.pyplot as plt 
import itertools

from sklearn.metrics import roc_auc_score

In [53]:
edges_file = f'dataset/0.edges'
feats_file = f'dataset/0.feat'

edges_u, edges_v = [], []

'''
For example of edges file:
    uid iid
    1   1
    2   10
    3   10
    3   12
      ...
'''

with open(edges_file) as f:
    for l in f:
        src, dst = tuple(int(x) - 1 for x in l.split())
        edges_u.append(src)
        edges_v.append(dst)

edges_u = np.array(edges_u)
edges_v = np.array(edges_v)

num_nodes = 0 
feats = []

'''
For example of feats file:
    uid feats
    1   0 0 0 0 0 0 1 0 0
    2   0 0 0 1 0 1 0 0 0
      ...
'''

with open(feats_file) as f :
    for l in f:
        num_nodes += 1 
        a = [int(x) for x in l.split()[1:]]
        feats.append(torch.tensor(a, dtype=torch.float))

feats = torch.stack(feats) 

g = dgl.graph((edges_u, edges_v))
g.ndata['feat'] = feats

In [74]:
import networkx as nx 
G = nx.Graph()
G.add_nodes_from(set(list(edges_u) + list(edges_v)))
G.add_edges_from(zip(edges_u, edges_v))

# Training/Testing data construction

In [75]:
u, v = g.edges()

eids = np.arange(g.number_of_edges())
eids = np.random.permutation(eids)
test_size = int(len(eids) * 0.3)
train_size = g.number_of_edges() - test_size 

# get positive edges for test and train 
test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]

# Find all negative edges
adj_mat = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) # (u, v) 인 값만 1로 채웁니다.
adj_neg = 1 - adj_mat.todense() - np.eye(g.number_of_nodes()) # negative 인 값만 1, 아닌 값은 0으로 반환합니다. 
neg_u, neg_v = np.where(adj_neg != 0)

# split the negative edges for training and testing 
neg_eids = np.random.choice(len(neg_u), g.number_of_edges())
test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
train_neg_u, train_neg_v =neg_v[neg_eids[test_size:]], neg_v[neg_eids[test_size:]] 

train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())

train_g = dgl.remove_edges(g, eids[:test_size])
train_g = dgl.add_self_loop(train_g)

# Model definition

In [79]:
class GCN(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, h_feats)
        self.relu = nn.ReLU()
    
    def forward(self, g, in_feats):
        h = self.conv1(g, in_feats)
        h = self.relu(h)
        h = self.conv2(g, h)
        
        return h 

class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h 
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            return g.edata['score'][:, 0]

In [82]:
model = GCN(train_g.ndata['feat'].shape[1], 16)
criterion = nn.BCEWithLogitsLoss()
predictor = DotPredictor()

def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return criterion(scores, labels)

def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return roc_auc_score(labels, scores)

optimizer = optim.Adam(itertools.chain(model.parameters(), predictor.parameters()), lr=0.01)
num_epochs = 100 

all_logits = []
model.train()
for epoch in range(1, num_epochs+1):
    h = model(train_g, train_g.ndata['feat'])
    
    pos_score = predictor(train_pos_g, h)
    neg_score = predictor(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 5 == 0:
        print(f'In epoch [{epoch}/{num_epochs}], loss: {loss.item():.4f}')

model.eval()    
with torch.no_grad():
    pos_score = predictor(test_pos_g, h)
    neg_score = predictor(test_neg_g, h)
    print(f'AUC: {compute_auc(pos_score, neg_score):.4f}')



In epoch [5/100], loss: 0.6621
In epoch [10/100], loss: 0.6565
In epoch [15/100], loss: 0.6556
In epoch [20/100], loss: 0.6547
In epoch [25/100], loss: 0.6540
In epoch [30/100], loss: 0.6537
In epoch [35/100], loss: 0.6534
In epoch [40/100], loss: 0.6532
In epoch [45/100], loss: 0.6530
In epoch [50/100], loss: 0.6529
In epoch [55/100], loss: 0.6528
In epoch [60/100], loss: 0.6527
In epoch [65/100], loss: 0.6526
In epoch [70/100], loss: 0.6525
In epoch [75/100], loss: 0.6525
In epoch [80/100], loss: 0.6524
In epoch [85/100], loss: 0.6523
In epoch [90/100], loss: 0.6523
In epoch [95/100], loss: 0.6522
In epoch [100/100], loss: 0.6522
AUC: 0.8430
