# Benchmark for GCN

## Read data

In [2]:
from torch_geometric.datasets import PPI
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from torch_geometric.loader import DataLoader
import torch

TRAIN = "train"
VAL = "val"
TEST = "test"
set_names = [TRAIN, TEST, VAL]

train_dataset = PPI(root='/tmp/PPI', split="train")
val_dataset = PPI(root='/tmp/PPI', split="val")
test_dataset = PPI(root='/tmp/PPI', split="test")

train_loader = iter(DataLoader(train_dataset, batch_size=len(train_dataset)))
val_loader = iter(DataLoader(val_dataset, batch_size=len(val_dataset)))
test_loader = iter(DataLoader(test_dataset, batch_size=len(test_dataset)))

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

train_set = next(train_loader)
test_set = next(test_loader)
val_set = next(val_loader)

sets = dict()
sets[TRAIN] = train_dataset
sets[TEST] = test_dataset
sets[VAL] = val_dataset

## Define GNN architecture

In [3]:
import torch
from torch import nn
from torch_geometric.nn import GCNConv
from torch.nn import Linear
import torch.nn.functional as F
class GNN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout = .2, normalize = False, add_self_loops = True):
        super(GNN, self).__init__()
        
        self.conv1 = GCNConv(in_dim, hidden_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.lin1 = Linear(in_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.lin2 = Linear(hidden_dim, hidden_dim)
        self.conv3 = GCNConv(hidden_dim, out_dim, normalize = normalize, add_self_loops=add_self_loops)
        self.lin3 = Linear(hidden_dim, out_dim)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index) + self.lin1(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index) + self.lin2(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv3(x, edge_index) + self.lin3(x)
        return x

## Hyperparameter tuning for GNN

In [None]:
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm

space = {
    "WEIGHT_DECAYS": [0],
    "DROPOUT": [0.0, 0.2, 0.4],
    "HIDDEN_DIMS": [128, 256, 512],
    "LEARNING_RATES": [1e-4, 5e-3, 1e-3, 5e-4],
    "SELF_LOOPS": [True, False],
    "NORMALIZE": [True, False],
}

param_grid = ParameterGrid(space)
best_params_overall = None
best_val_overall = float("inf")

for params in tqdm(param_grid.__iter__()):    
    gnnTraining = GNNTraining(device = device,
            GNN = GNN,
            sets = sets,
            hidden_dim = params["HIDDEN_DIMS"],
            lr = params["LEARNING_RATES"],
            dropout = params["DROPOUT"],
            weight_decay=params["WEIGHT_DECAYS"],
            epochs = 1000,
            kwargs = {"normalize": params["NORMALIZE"], 'add_self_loops': params["SELF_LOOPS"]})
    gnnTraining.train()
    
    if gnnTraining.best_val_loss <= best_val_overall:
        print("Updated params")
        best_val_overall = gnnTraining.best_val_loss
        best_params_overall = params

## Training & Evaluation

In [4]:
from GNNTraining import GNNTraining
from GNNEvaluate import GNNEvaluate 

gnnTraining = GNNTraining(device = device,
            GNN = GNN,
            sets = sets,
            hidden_dim = 512,
            lr = 5e-3,
            dropout = 0.2,
            weight_decay=0.0,
            epochs = 1000,
            kwargs = {"normalize": True, 'add_self_loops': True})
best_model = gnnTraining.train()

gnnEvaluate = GNNEvaluate(device = device,
            sets = sets)
gnnEvaluate.evaluate(best_model)

  0%|          | 0/1000 [00:00<?, ?it/s]

0.9600572791884782

## Standard deviation over 10 runs

In [6]:
from GNNTraining import GNNTraining
from GNNEvaluate import GNNEvaluate 
from tqdm.notebook import tqdm

times = []
scores = []
for i in tqdm(range(10)):
    gnnTraining = GNNTraining(device = device,
             GNN = GNN,
            sets = sets,
            hidden_dim = 512,
            lr = 5e-3,
            dropout = 0.2,
            weight_decay=0.0,
            epochs = 1000,
            kwargs = {"normalize": True, 'add_self_loops': True})
    best_model = gnnTraining.train()
    times.append(gnnTraining.training_time)
    
    gnnEvaluate = GNNEvaluate(device = device,
                sets = sets)
    score = gnnEvaluate.evaluate(best_model)
    scores.append(score)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

In [7]:
import numpy as np
print(f"F1-score: {np.mean(scores)} +- {np.std(scores)}; {np.mean(times)}")

F1-score: 0.960577126981376 +- 0.000838829249817186; 343.01980459690094
