In [31]:
import numpy as np
import torch
from torch_geometric.datasets import Planetoid

In [32]:
seed = 42
datasets = ["Cora", "Citeseer", "Pubmed"]
datasets_root = "./data"
default_tensor_data_type = torch.float32

In [33]:
torch.manual_seed(seed)
np.random.seed(seed)
torch.set_default_dtype(default_tensor_data_type)

if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Data preparation

In [34]:
import torch

dataset_name = 'Cora'

# Download the dataset
dataset = Planetoid(root='./data', name=dataset_name)

# Access the data
data = dataset[0]

# Print some information about the dataset
print(f'Dataset: {dataset_name}')
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Number of features: {data.num_features}')
print(f'Number of classes: {dataset.num_classes}')

# Save the dataset
torch.save(data, f'./data/{dataset_name.lower()}_dataset.pt')

Dataset: Cora
Number of nodes: 2708
Number of edges: 10556
Number of features: 1433
Number of classes: 7


In [35]:
def create_A(edge_index: torch.Tensor):
    # assuming edge_index has shape (2, num_edges)
    assert edge_index.shape[0] == 2

    num_nodes = torch.max(edge_index).item() + 1
    
    A = torch.zeros((num_nodes, num_nodes))
    A[edge_index[0], edge_index[1]] = 1.0
    
    return A

In [36]:
train_mask = data.train_mask
val_mask = data.test_mask
num_features = data.x.shape[1]
num_labels = len(torch.unique(data.y))
A = create_A(data.edge_index).to(device=device, dtype= default_tensor_data_type)
X = data.x.to(device=device, dtype= default_tensor_data_type)
y = data.y.to(device=device, dtype= default_tensor_data_type)

## GCN

In [43]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def normalize_A(A: torch.Tensor):
    degree = torch.diag(torch.sum(A, dim=1))

    # convert degree to torch.float32 for inverse to work
    degree_hat = torch.sqrt(
        # degree
        torch.inverse(
            degree.to(device=device, dtype=torch.float32)
        )
    ).to(device=device, dtype=default_tensor_data_type)
    
    return degree_hat @ A @ degree_hat

def add_self_loops(A:torch.Tensor, factor: int):
    num_nodes = A.shape[0]
    self_loops_mask: torch.Tensor = torch.diag(torch.ones(num_nodes)) * factor
    return A + self_loops_mask.to(device=device, dtype=default_tensor_data_type)

class GraphConvolutionLayer(nn.Module):
    def __init__(self, in_features: int, out_features: int):
        super(GraphConvolutionLayer, self).__init__()
        self.weight = nn.Parameter(
            torch.Tensor(in_features, out_features).to(
                device=device, dtype=default_tensor_data_type
            )
        )
        torch.manual_seed(seed)
        nn.init.xavier_uniform_(self.weight)

    def forward(self, X: torch.Tensor, A: torch.Tensor):
        A = A.to(device=device, dtype= default_tensor_data_type)
        X = X.to(device=device, dtype= default_tensor_data_type)
        self.weight = self.weight.to(device=device, dtype= default_tensor_data_type)
        return A @ X @ self.weight

class GCN(nn.Module):
    def __init__(self, in_features, hidden_features, output_classes, dropout_prob=0.5):
        super().__init__()
        self.layer_1 = GraphConvolutionLayer(in_features, hidden_features)
        self.layer_2 = GraphConvolutionLayer(hidden_features, output_classes)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, X, A) -> torch.Tensor:
        A = normalize_A(A)
        A = add_self_loops(A, 2)
        H = F.relu(
            self.dropout(
                self.layer_1(X, A)
            )
        )
        Y = self.layer_2(H, A)
        Y = F.log_softmax(Y, dim=1)
        return Y

## Evaluation

In [38]:
def compute_accuracy(y_true: torch.Tensor, y_pred: torch.Tensor) -> float:
    predicted_labels = torch.argmax(y_pred, dim=1)
    correct_predictions = (predicted_labels == y_true).sum().item()
    accuracy = correct_predictions / len(y_true)
    return accuracy

def compute_loss(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
    loss_function = torch.nn.NLLLoss()
    
    # target to torch.long
    return loss_function(y_pred, y_true.to(dtype=torch.long))

In [45]:
def norm_A(A):
    degrees = torch.sum(A, 1)
    A = A / degrees
    return A

def top_k_graph(scores, A, H, k):
    num_nodes = A.shape[0]
    values, idx = torch.topk(scores, max(2, int(k*num_nodes)))
    new_H = H[idx, :]
    values = torch.unsqueeze(values, -1)
    new_H = torch.mul(new_H, values)
    
    un_A = A.bool().float().to(default_tensor_data_type)
    un_A = torch.matmul(un_A, un_A).bool().float().to(default_tensor_data_type)
    un_A = un_A[idx, :]
    un_A = un_A[:, idx]
    
    A = norm_A(un_A)
    
    return A, new_H, idx

class Pool(nn.Module):
    def __init__(self, k, in_dim, p):
        super(Pool, self).__init__()
        self.k = k
        self.sigmoid = nn.Sigmoid()
        self.proj = nn.Linear(in_dim, 1)
        self.drop = nn.Dropout(p=p) if p > 0 else nn.Identity()

    def forward(self, A, H):
        Z = self.drop(H)
        weights = self.proj(Z).squeeze()
        scores = self.sigmoid(weights)
        return top_k_graph(scores, A, H, self.k)


class Unpool(nn.Module):
    def forward(self, A, H, idx):
        num_nodes = A.shape[0]
        num_features = H.shape[1]
        new_H = torch.zeros(
            [num_nodes, num_features],
            device=H.device
        )
        new_H[idx] = H
        return A, new_H

class GraphUnet(nn.Module):
    def __init__(self, in_features, hidden_features, output_classes, dropout_prob=0.8):
        super().__init__()
        k = 0.5
        self.layer_1 = GraphConvolutionLayer(in_features, hidden_features)
        self.pool = Pool(k=k, in_dim=hidden_features, p =dropout_prob)
        self.middle_gcn = GraphConvolutionLayer(hidden_features, hidden_features)
        self.unpool = Unpool()
        self.layer_2 = GraphConvolutionLayer(hidden_features, output_classes)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, X, A) -> torch.Tensor:
        A = normalize_A(A)
        A = add_self_loops(A, 2)
        H = F.relu(self.dropout(self.layer_1(X, A)))
        
        small_A, small_H, idx = self.pool(A, H)
        
        small_H = F.relu(self.dropout(self.middle_gcn(small_H, small_A)))

        A, H_FINAL = self.unpool(A, small_H, idx)
        
        Y = F.log_softmax(self.layer_2(H + H_FINAL, A), dim=1)
        return Y

In [46]:
learning_rate = 0.01
num_epoch = 200
hidden_features = 1000

model = GraphUnet(num_features, hidden_features, num_labels).to(device)
# set optimizer
optimizer = torch.optim.Adam(
    model.parameters(),
    lr = learning_rate
)
loss_train_history = []
loss_val_history = []
acc_train_history = []
acc_val_history = []

for epoch in range(1, num_epoch+1):
    # allow model parameters to be learned   
    model.train()         

    y_pred = model(X, A)

    # we will compute the loss only with respect to train data
    y_true_train: torch.Tensor = y[train_mask] 
    y_pred_train: torch.Tensor = y_pred[train_mask]
    loss_train = compute_loss(y_true_train, y_pred_train)

    acc_train = compute_accuracy(y_true_train, y_pred_train)
    
    loss_train_history.append(loss_train.item())
    acc_train_history.append(acc_train)


    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()

    loss_train.detach()
    
    ## model performance on validation data
    with torch.no_grad():
        y_true_val = y[val_mask]
        y_pred_val = y_pred[val_mask]

        loss_val = compute_loss(y_true_val, y_pred_val)
        acc_val = compute_accuracy(y_true_val, y_pred_val)
        loss_val_history.append(loss_val.item())
        acc_val_history.append(acc_val)

        # just making sure
        loss_val.detach()

    if epoch % 10 == 0:
        print(f'Epoch {epoch:3d} - ', end='')
        print()
        print(f'Train loss: {loss_train:0.4f}\tTrain acc: {acc_train:0.4f}')
        print(f'Val loss: {loss_val:0.4f}\tTest acc: {acc_val:0.4f}')
        print()


Epoch  10 - 
Train loss: 0.0822	Train acc: 0.9786
Val loss: 2.7001	Test acc: 0.5330

Epoch  20 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.2176	Test acc: 0.6770

Epoch  30 - 
Train loss: 0.0022	Train acc: 1.0000
Val loss: 2.1165	Test acc: 0.7180

Epoch  40 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.3103	Test acc: 0.7160

Epoch  50 - 
Train loss: 0.0003	Train acc: 1.0000
Val loss: 2.3399	Test acc: 0.7150

Epoch  60 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.1251	Test acc: 0.7200

Epoch  70 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.3146	Test acc: 0.7220

Epoch  80 - 
Train loss: 0.0001	Train acc: 1.0000
Val loss: 2.1181	Test acc: 0.7240

Epoch  90 - 
Train loss: 0.0121	Train acc: 0.9929
Val loss: 2.3027	Test acc: 0.7120

Epoch 100 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.2652	Test acc: 0.7230

Epoch 110 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.3172	Test acc: 0.7240

Epoch 120 - 
Train loss: 0.0000	Train acc: 1.0000
Val loss: 2.434