# Graph Auto Encoder with PyG

In [1]:
import argparse
import os
import time

import torch

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid

from torch_geometric.nn import GAE, GCNConv

In [2]:
device = torch.device('cpu')

In [3]:
DATASET_NAME="Cora"

In [4]:
transform = T.Compose([
    T.NormalizeFeatures(),
    T.RandomLinkSplit(num_val=0., num_test=0.1, is_undirected=True,
                      split_labels=True, add_negative_train_samples=False),
])
# path = os.path.join("/home/deusebio/Personal/graph_machine_learning/data", 'data')
path = os.path.join(os.getcwd(), 'data')
dataset = Planetoid(path, DATASET_NAME, transform=transform)
train_data, val_data, test_data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [5]:
print(f"Train edges (positive): {train_data.pos_edge_label_index.shape[1]}")
print(f"Test edges (positive): {test_data.pos_edge_label_index.shape[1]}")
print(f"Test edges (negative): {test_data.neg_edge_label_index.shape[1]}")

Train edges (positive): 4751
Test edges (positive): 527
Test edges (negative): 527


In [6]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, num_node_features, num_embedding):
        super().__init__()
        self.conv1 = GCNConv(num_node_features, 2 * num_embedding)
        self.conv2 = GCNConv(2 * num_embedding, num_embedding)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

In [7]:
n_features = dataset.num_features
n_embeddings = 20

In [8]:
model = GAE(GCNEncoder(n_features, n_embeddings))

In [9]:
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [10]:
for epoch in range(20):  # loop over the dataset multiple times

    model.train()

    # zero the parameter gradients
    optimizer.zero_grad()

    z = model.encode(train_data.x, train_data.edge_index)
    loss = model.recon_loss(z, train_data.pos_edge_label_index)

    loss.backward()
    optimizer.step()
    
    # Test/Evaluate
    model.eval()
    z = model.encode(test_data.x, test_data.edge_index)
    auc, ap = model.test(z, test_data.pos_edge_label_index, test_data.neg_edge_label_index)
    
    print(f"Performance on validation set => AUC: {auc} AP: {ap}")

Performance on validation set => AUC: 0.7031746774733644 AP: 0.7408616667192883
Performance on validation set => AUC: 0.7003589830374215 AP: 0.7384835516148203
Performance on validation set => AUC: 0.7003733855665054 AP: 0.7387001033670365
Performance on validation set => AUC: 0.6996730625897907 AP: 0.738915861615899
Performance on validation set => AUC: 0.6991419693298143 AP: 0.7400909179536861
Performance on validation set => AUC: 0.6968951747926936 AP: 0.7390546559098561
Performance on validation set => AUC: 0.6940110683436012 AP: 0.7372227705695125
Performance on validation set => AUC: 0.6927454461003353 AP: 0.7363419854717088
Performance on validation set => AUC: 0.6914996273345599 AP: 0.7352590765202852
Performance on validation set => AUC: 0.6898955456578175 AP: 0.7340942320131665
Performance on validation set => AUC: 0.6872166752481736 AP: 0.732318689778636
Performance on validation set => AUC: 0.6841309333919037 AP: 0.7299620245428802
Performance on validation set => AUC: 0.68