In [1]:
import random
import wandb
from tqdm import tqdm
import networkx as nx
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import torch_geometric
import torch_geometric.nn as pyg_nn
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from functools import partial
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
from torch_geometric.datasets import Planetoid
dataset = Planetoid(name='Cora', root='data/cora', split="public")
data = dataset[0]

In [3]:
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Number of validation nodes: {data.val_mask.sum()}')
print(f'Number of test nodes: {data.test_mask.sum()}')
print(f'Number of nodes: {data.num_nodes}')

Number of training nodes: 140
Number of validation nodes: 500
Number of test nodes: 1000
Number of nodes: 2708


In [44]:
class GNN_Refined(nn.Module):

    def __init__(self, input_dim: int, hidden_dim: int, num_classes: int,
                 num_gnn_layers: int = 1, gnn_conv: nn.Module = pyg_nn.GCNConv):
        
        super(GNN_Refined, self).__init__()
        self.gnn_layers = nn.ModuleList()
        self.gnn_layers.append(gnn_conv(input_dim, hidden_dim))
        for _ in range(num_gnn_layers - 1):
            self.gnn_layers.append(gnn_conv(hidden_dim, hidden_dim))
        self.activation = nn.ReLU()
        self.logits = nn.Linear(hidden_dim, num_classes)

    def forward(self, x: torch.Tensor, edge_index: torch.Tensor):
        for i, layer in enumerate(self.gnn_layers):
            x = layer(x, edge_index)
            if i < len(self.gnn_layers) - 1:
                x = self.activation(x)
                
                
        logits = self.logits(x)
        logits = F.log_softmax(logits, dim=1)
        return logits

In [47]:
def calculate_trained_gnn_accuracy(model, x, y, mask, edge_index):
    model.eval()
    correct = 0
    total = 0
    pred = model(x, edge_index).max(dim=1)[1]
    correct += (pred[mask]).eq(y[mask]).sum().item()
    total += torch.sum(mask).item()
    return correct / total

def use_trained_gnn(run):
    # model and optimizer
    gnn_train = GNN_Refined(dataset.num_features, 64, dataset.num_classes, 2)
    optimizer = torch.optim.Adam(gnn_train.parameters(), lr=0.01, weight_decay=5e-4)

    # train
    for epoch in range(500):
        gnn_train.train()
        feature, edge_index, label = data.x, data.edge_index, data.y
        pred = gnn_train(feature, edge_index)
        loss = F.nll_loss(pred[data.train_mask], label[data.train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # valid
        # if (epoch + 1) % 20 == 0:
        #     accuracy = calculate_trained_gnn_accuracy(gnn_train, data.x, data.y, data.val_mask, data.edge_index)
        #     print("Run: {}\tEpoch: {}\tValidation accuracy: {}".format(run + 1, epoch + 1, accuracy))

    # test
    accuracy = calculate_trained_gnn_accuracy(gnn_train, data.x, data.y, data.test_mask, data.edge_index)
    accuracies.append(accuracy)

num_runs = 3
accuracies = []

for run in range(num_runs):
    torch.manual_seed(run)
    use_trained_gnn(run)
print("Model: Trained GNN")
print("Accuracies: {}".format(accuracies))
print("Mean: {}".format(np.mean(accuracies)))
print("Standard deviation: {}".format(np.std(accuracies)))

Model: Trained GNN
Accuracies: [0.803, 0.8, 0.785]
Mean: 0.7960000000000002
Standard deviation: 0.007874007874011818
