In [1]:
from torch_geometric.datasets import Twitch
import os.path as osp

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

In [3]:
dataset = Twitch(root='data/Twitch', name='EN')
print(dataset[0])
#
print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

Downloading https://graphmining.ai/datasets/ptg/twitch/EN.npz


Data(x=[7126, 128], edge_index=[2, 77774], y=[7126])

Dataset: Twitch():
Number of graphs: 1
Number of features: 128
Number of classes: 2


Processing...
Done!


In [11]:
from torch_geometric.utils import train_test_split_edges


data = train_test_split_edges(dataset[0])

print('Train edges:', data.train_pos_edge_index.size(1))
print('Validation edges (positive):', data.val_pos_edge_index.size(1))
print('Validation edges (negative):', data.val_neg_edge_index.size(1))
print('Test edges (positive):', data.test_pos_edge_index.size(1))
print('Test edges (negative):', data.test_neg_edge_index.size(1))

print(data)



Train edges: 60052
Validation edges (positive): 1766
Validation edges (negative): 1766
Test edges (positive): 3532
Test edges (negative): 3532
Data(x=[7126, 128], y=[7126], val_pos_edge_index=[2, 1766], test_pos_edge_index=[2, 3532], train_pos_edge_index=[2, 60052], train_neg_adj_mask=[7126, 7126], val_neg_edge_index=[2, 1766], test_neg_edge_index=[2, 3532])


In [67]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def encode(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

    def decode(self, z, edge_label_index):
        return (z[edge_label_index[0]] * z[edge_label_index[1]]).sum(dim=-1)

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()


model = Net(dataset.num_features, 64, 32).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()
print(device)

cpu


In [53]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(data.x.to(device), data.train_pos_edge_index.to(device))

    pos_edge_index = data.train_pos_edge_index.to(device)
    pos_out = model.decode(z, pos_edge_index)
    pos_loss = criterion(pos_out, torch.ones(pos_out.size(0), device=device))

    neg_edge_index = negative_sampling(
        edge_index=data.train_pos_edge_index, 
        num_nodes=data.num_nodes,
        num_neg_samples=pos_edge_index.size(1)
    ).to(device)
    neg_out = model.decode(z, neg_edge_index)
    neg_loss = criterion(neg_out, torch.zeros(neg_out.size(0), device=device))

    loss = pos_loss + neg_loss
    loss.backward()
    optimizer.step()
    return loss.item()

In [54]:
def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(data.x.to(device), data.train_pos_edge_index.to(device))
    
    pos_out = model.decode(z, pos_edge_index.to(device))
    neg_out = model.decode(z, neg_edge_index.to(device))

    pos_y = torch.ones(pos_out.size(0), device=device)
    neg_y = torch.zeros(neg_out.size(0), device=device)
    y = torch.cat([pos_y, neg_y])
    pred = torch.cat([pos_out, neg_out])

    loss = criterion(pred, y).item()
    pred = torch.sigmoid(pred)  
    pred = pred > 0.5
    acc = pred.eq(y).sum().item() / y.size(0)
    return loss, acc

In [68]:
for epoch in range(1, 101):
    loss = train()
    val_loss, val_acc = test(data.val_pos_edge_index, data.val_neg_edge_index)
    test_loss, test_acc = test(data.test_pos_edge_index, data.test_neg_edge_index)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}, Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 1.4171, Val Loss: 0.6230, Val Acc: 0.6186, Test Loss: 0.6214, Test Acc: 0.6266
Epoch: 002, Loss: 1.1859, Val Loss: 0.6426, Val Acc: 0.5445, Test Loss: 0.6487, Test Acc: 0.5409
Epoch: 003, Loss: 1.2423, Val Loss: 0.6212, Val Acc: 0.5640, Test Loss: 0.6254, Test Acc: 0.5607
Epoch: 004, Loss: 1.1838, Val Loss: 0.6029, Val Acc: 0.6022, Test Loss: 0.6035, Test Acc: 0.5951
Epoch: 005, Loss: 1.1319, Val Loss: 0.6005, Val Acc: 0.6240, Test Loss: 0.5996, Test Acc: 0.6246
Epoch: 006, Loss: 1.1157, Val Loss: 0.5937, Val Acc: 0.6435, Test Loss: 0.5935, Test Acc: 0.6379
Epoch: 007, Loss: 1.0977, Val Loss: 0.5822, Val Acc: 0.6535, Test Loss: 0.5846, Test Acc: 0.6437
Epoch: 008, Loss: 1.0706, Val Loss: 0.5740, Val Acc: 0.6526, Test Loss: 0.5796, Test Acc: 0.6427
Epoch: 009, Loss: 1.0568, Val Loss: 0.5693, Val Acc: 0.6540, Test Loss: 0.5769, Test Acc: 0.6462
Epoch: 010, Loss: 1.0435, Val Loss: 0.5659, Val Acc: 0.6602, Test Loss: 0.5737, Test Acc: 0.6549
Epoch: 011, Loss: 1.0366, Val 

In [18]:
gData = dataset[0]
print(gData)

Data(x=[7126, 128], edge_index=[2, 77774], y=[7126])


In [21]:
def get_neighbor_count(data, node_index):
    if node_index < 0 or node_index >= data.num_nodes:
        raise ValueError("exceed the dataset")
    edge_index = data.edge_index
    neighbors = edge_index[1][edge_index[0] == node_index]
    neighbor_of_neighbor_count = 0
    for neighbor in neighbors:
        second_neighbors = edge_index[1][edge_index[0] == neighbor]
        neighbor_of_neighbor_count += second_neighbors.size(0)
    return neighbors.size(0), neighbor_of_neighbor_count

In [26]:
# print out the size of the size graph
# node i - node j - subgraph size for node i - subgraph size for node j
# (the neighbors of the target node, the neibhors of the neighbors node)
for i in range(50):
    for j in range(50):
        if i != j:
            print(i, j, get_neighbor_count(gData, i), get_neighbor_count(gData, j))

0 1 (2, 6) (27, 801)
0 2 (2, 6) (2, 339)
0 3 (2, 6) (8, 133)
0 4 (2, 6) (2, 81)
0 5 (2, 6) (5, 833)
0 6 (2, 6) (11, 194)
0 7 (2, 6) (3, 13)
0 8 (2, 6) (2, 13)
0 9 (2, 6) (13, 1840)
0 10 (2, 6) (2, 6)
0 11 (2, 6) (5, 103)
0 12 (2, 6) (3, 9)
0 13 (2, 6) (13, 290)
0 14 (2, 6) (11, 648)
0 15 (2, 6) (6, 551)
0 16 (2, 6) (6, 96)
0 17 (2, 6) (7, 131)
0 18 (2, 6) (3, 474)
0 19 (2, 6) (10, 237)
0 20 (2, 6) (8, 176)
0 21 (2, 6) (8, 329)
0 22 (2, 6) (3, 732)
0 23 (2, 6) (59, 2283)
0 24 (2, 6) (123, 5005)
0 25 (2, 6) (2, 76)
0 26 (2, 6) (92, 4983)
0 27 (2, 6) (2, 5)
0 28 (2, 6) (3, 80)
0 29 (2, 6) (4, 392)
0 30 (2, 6) (38, 1803)
0 31 (2, 6) (3, 138)
0 32 (2, 6) (10, 377)
0 33 (2, 6) (4, 49)
0 34 (2, 6) (12, 755)
0 35 (2, 6) (14, 272)
0 36 (2, 6) (7, 837)
0 37 (2, 6) (5, 1161)
0 38 (2, 6) (3, 798)
0 39 (2, 6) (5, 134)
0 40 (2, 6) (5, 810)
0 41 (2, 6) (8, 191)
0 42 (2, 6) (5, 77)
0 43 (2, 6) (14, 333)
0 44 (2, 6) (6, 114)
0 45 (2, 6) (2, 23)
0 46 (2, 6) (10, 1675)
0 47 (2, 6) (2, 46)
0 48 (2, 6) (11

In [69]:
def predict_edge(model, node_index1, node_index2):
    model.eval()
    with torch.no_grad():
        z = model.encode(data.x.to(device), data.train_pos_edge_index.to(device))
        edge_label_index = torch.tensor([[node_index1], [node_index2]], device=device)
        prediction = model.decode(z, edge_label_index)
        prediction = torch.sigmoid(prediction)
        return prediction.item()

node_index1 = 45
node_index2 = 17

for i in range(20):
    for j in range(20):
        if i != j:
            prediction = predict_edge(model, i, j)
            print(f"predict node {i} and node {j} probability that has an edge: {prediction}")

predict node 0 and node 1 probability that has an edge: 0.44241249561309814
predict node 0 and node 2 probability that has an edge: 0.5531735420227051
predict node 0 and node 3 probability that has an edge: 0.4851957857608795
predict node 0 and node 4 probability that has an edge: 0.6019557118415833
predict node 0 and node 5 probability that has an edge: 0.5103330612182617
predict node 0 and node 6 probability that has an edge: 0.4123033285140991
predict node 0 and node 7 probability that has an edge: 0.6074438095092773
predict node 0 and node 8 probability that has an edge: 0.5103007555007935
predict node 0 and node 9 probability that has an edge: 0.4361571669578552
predict node 0 and node 10 probability that has an edge: 0.5611250400543213
predict node 0 and node 11 probability that has an edge: 0.5041846036911011
predict node 0 and node 12 probability that has an edge: 0.3853101432323456
predict node 0 and node 13 probability that has an edge: 0.3839006721973419
predict node 0 and n