# Benchmark for ChebyNets

## Read data

In [2]:
from torch_geometric.datasets import PPI
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from torch_geometric.loader import DataLoader
import torch

TRAIN = "train"
VAL = "val"
TEST = "test"
set_names = [TRAIN, TEST, VAL]

train_dataset = PPI(root='/tmp/PPI', split="train")
val_dataset = PPI(root='/tmp/PPI', split="val")
test_dataset = PPI(root='/tmp/PPI', split="test")

train_loader = iter(DataLoader(train_dataset, batch_size=len(train_dataset)))
val_loader = iter(DataLoader(val_dataset, batch_size=len(val_dataset)))
test_loader = iter(DataLoader(test_dataset, batch_size=len(test_dataset)))

device = torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu")

train_set = next(train_loader)
test_set = next(test_loader)
val_set = next(val_loader)

sets = dict()
sets[TRAIN] = train_dataset
sets[TEST] = test_dataset
sets[VAL] = val_dataset

## Define GNN architecture

In [3]:
import torch
from torch import nn
from torch_geometric.nn import ChebConv
from torch.nn import Linear
import torch.nn.functional as F
class GNN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout = .2, normalization  = "sym", K = 1):
        super(GNN, self).__init__()
        
        self.conv1 = ChebConv(in_dim, hidden_dim, normalization  = normalization , K=K)
        self.lin1 = Linear(in_dim, hidden_dim)
        self.conv2 = ChebConv(hidden_dim, hidden_dim, normalization  = normalization , K=K)
        self.lin2 = Linear(hidden_dim, hidden_dim)
        self.conv3 = ChebConv(hidden_dim, out_dim, normalization  = normalization , K=K)
        self.lin3 = Linear(hidden_dim, out_dim)
        self.dropout = nn.Dropout(p=dropout)
        
    def forward(self, x, edge_index):
        x = self.dropout(x)
        x = self.conv1(x, edge_index) + self.lin1(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index) + self.lin2(x)
        x = F.elu(x)
        x = self.dropout(x)
        x = self.conv3(x, edge_index) + self.lin3(x)
        return x

## Hyperparameter tuning for GNN

In [None]:
from sklearn.model_selection import ParameterGrid
from tqdm.notebook import tqdm

space = {
    "WEIGHT_DECAYS": [0],#[0, 1e-3]
    "DROPOUT": [0.0, 0.2],
    "HIDDEN_DIMS": [128, 256, 512],
    "LEARNING_RATES": [1e-4, 5e-3, 1e-3],
    "K": [1,2,3],
    "NORMALIZATION": [None, "sym", "rw"],
}

param_grid = ParameterGrid(space)
best_params_overall = None
best_val_overall = float("inf")

for params in tqdm(param_grid.__iter__()):    
    gnnTraining = GNNTraining(device = device,
            GNN = GNN,
            sets = sets,
            hidden_dim = params["HIDDEN_DIMS"],
            lr = params["LEARNING_RATES"],
            dropout = params["DROPOUT"],
            weight_decay=params["WEIGHT_DECAYS"],
            epochs = 2_000,
            kwargs = {"K":params["K"], "normalization":params["NORMALIZATION"]})
    gnnTraining.train()
    
    if gnnTraining.best_val_loss <= best_val_overall:
        print("Updated params")
        best_val_overall = gnnTraining.best_val_loss
        best_params_overall = params

## Best hyperparamers

In [18]:
best_params_overall

{'DROPOUT': 0.2,
 'HIDDEN_DIMS': 512,
 'K': 2,
 'LEARNING_RATES': 0.005,
 'NORMALIZATION': 'sym',
 'WEIGHT_DECAYS': 0}

## Training & Evaluation

In [None]:
from tqdm.notebook import tqdm
from GNNTraining import GNNTraining
from GNNEvaluate import GNNEvaluate 

gnnTraining = GNNTraining(device = device,
            GNN = GNN,
            sets = sets,
            hidden_dim = 512,
            lr = 0.005,
            dropout = 0.2,
            weight_decay=0.0,
            epochs = 2_000,
            kwargs = {"K":2, "normalization":'sym'})
best_model = gnnTraining.train()

gnnEvaluate = GNNEvaluate(device = device, sets = sets)
gnnEvaluate.evaluate(best_model)

  0%|          | 0/2000 [00:00<?, ?it/s]

## Standard deviation over 10 runs

In [None]:
from GNNTraining import GNNTraining
from GNNEvaluate import GNNEvaluate 

times = []
scores = []
for i in tqdm(range(10)):
    gnnTraining = GNNTraining(device = device,
           GNN = GNN,
            sets = sets,
            hidden_dim = 512,
            lr = 0.005,
            dropout = 0.2,
            weight_decay=0.0,
            epochs = 2_000,
            kwargs = {"K":2, "normalization":'sym'})
    best_model = gnnTraining.train()
    times.append(gnnTraining.training_time)
    
    gnnEvaluate = GNNEvaluate(device = device,
                sets = sets)
    score = gnnEvaluate.evaluate(best_model)
    scores.append(score)

In [None]:
import numpy as np
print(f"F1-score: {np.mean(scores)} +- {np.std(scores)}; {np.mean(times)}")