# Graph AutoEncoder Link prediction
---

In [1]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [1]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    COLAB = True
    print("Note: using Google CoLab")
except:
    print("Note: not using Google CoLab")
    COLAB = False

Mounted at /content/drive
Note: using Google CoLab


In [2]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.11.0+cu113


In [3]:
os.chdir('/content/drive/MyDrive/data/aesthetics') 

In [4]:
import torch
import os.path as osp
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges
from data import get_data, get_data_split
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score

from torch_geometric.utils import negative_sampling
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

In [5]:
path = ""
data = get_data(path)
data.edge_index=data.edge_index.long()
train_data, val_data, test_data = get_data_split(path)
train_data.edge_index=train_data.edge_index.long()
val_data.edge_index=val_data.edge_index.long()
test_data.edge_index=test_data.edge_index.long()
data = train_test_split_edges(data)



In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# If you use GPU, the device should be cuda
print('Device: {}'.format(device))

Device: cpu


In [7]:
class SimpleGAE(torch.nn.Module):
    def __init__(self):
        super(SimpleGAE, self).__init__()
        self.conv1 = GCNConv(len(data.x[1]), 128)
        self.conv2 = GCNConv(128, 64)

    def encode(self):
        x = self.conv1(data.x, train_data.edge_index) # convolution 1
        x = x.relu()
        return self.conv2(x, train_data.edge_index) # convolution 2

    def decode(self, z, pos_edge_index, neg_edge_index): # only pos and neg edges
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1) # concatenate pos and neg edges
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)  # dot product 
        return logits

    def decode_all(self, z): 
        prob_adj = z @ z.t() # get adj NxN
        return (prob_adj > 0).nonzero(as_tuple=False).t() # get predicted edge_list 


In [8]:
model, data = SimpleGAE().to(device), data.to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

In [13]:
def get_link_labels(pos_edge_index, neg_edge_index):
    # returns a tensor:
    # [1,1,1,1,...,0,0,0,0,0,..] with the number of ones is equel to the lenght of pos_edge_index
    # and the number of zeros is equal to the length of neg_edge_index
    E = pos_edge_index.size(1) + neg_edge_index.size(1)
    link_labels = torch.zeros(E, dtype=torch.float, device=device)
    link_labels[:pos_edge_index.size(1)] = 1.
    return link_labels


def train():
    model.train()

    neg_edge_index = negative_sampling(
        edge_index=data.train_pos_edge_index, #positive edges
        num_nodes=data.num_nodes, # number of nodes
        num_neg_samples=data.train_pos_edge_index.size(1)) # number of neg_sample equal to number of pos_edges

    optimizer.zero_grad()
    
    z = model.encode() #encode
    link_logits = model.decode(z, data.train_pos_edge_index, neg_edge_index) # decode
    
    link_labels = get_link_labels(data.train_pos_edge_index, neg_edge_index)
    loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
    loss.backward()
    optimizer.step()

    return loss


@torch.no_grad()
def test():
    model.eval()
    perfs = []
    for prefix in ["val", "test"]:
        pos_edge_index = data[f'{prefix}_pos_edge_index']
        neg_edge_index = data[f'{prefix}_neg_edge_index']

        z = model.encode() # encode train
        link_logits = model.decode(z, pos_edge_index, neg_edge_index) # decode test or val
        link_probs = link_logits.sigmoid() # apply sigmoid
        
        link_labels = get_link_labels(pos_edge_index, neg_edge_index) # get link
        
        perfs.append(roc_auc_score(link_labels.cpu(), link_probs.cpu())) #compute roc_auc score
    return perfs

In [14]:
best_val_perf = test_perf = 0
for epoch in range(1, 101):
    train_loss = train()
    val_perf, tmp_test_perf = test()
    if val_perf > best_val_perf:
        best_val_perf = val_perf
        test_perf = tmp_test_perf
    log = 'Epoch: {:03d}, Loss: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    if epoch % 10 == 0:
        print(log.format(epoch, train_loss, best_val_perf, test_perf))


Epoch: 010, Loss: 0.6158, Val: 0.7656, Test: 0.6851
Epoch: 020, Loss: 0.6112, Val: 0.8125, Test: 0.6920
Epoch: 030, Loss: 0.6314, Val: 0.8125, Test: 0.6920
Epoch: 040, Loss: 0.5745, Val: 0.8125, Test: 0.6920
Epoch: 050, Loss: 0.5864, Val: 0.8125, Test: 0.6920
Epoch: 060, Loss: 0.5764, Val: 0.8125, Test: 0.6920
Epoch: 070, Loss: 0.5666, Val: 0.8125, Test: 0.6920
Epoch: 080, Loss: 0.5938, Val: 0.8125, Test: 0.6920
Epoch: 090, Loss: 0.5719, Val: 0.8125, Test: 0.6920
Epoch: 100, Loss: 0.5496, Val: 0.8125, Test: 0.6920


In [16]:
z = model.encode()
final_edge_index = model.decode_all(z)

In [17]:
final_edge_index

tensor([[ 0,  0,  0,  ..., 43, 43, 43],
        [ 0,  1,  3,  ..., 41, 42, 43]])