<a href="https://colab.research.google.com/github/gbdl/ECDSep/blob/main/ECDSep_graphs_proteins.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Code to reproduce the experiments performed on the dataset `ogbn-proteins`

In [None]:
import torch

def format_pytorch_version(version):
    return version.split('+')[0]

def format_cuda_version(version):
    return 'cu' + version.replace('.', '')

TORCH_version = torch.__version__
TORCH = '2.0.0'#format_pytorch_version(TORCH_version)
CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-{TORCH}+{CUDA}.html;
!pip install ogb;

In [None]:
import sys
sys.path.append("..")

import argparse

import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

from inflation import ECDSep

import numpy as np
import random

## Dataset, useful functions and model, all from ogbn-proteins. We also set some parameters as in the OGB paper.

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(GCN, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(
            GCNConv(in_channels, hidden_channels, normalize=False))
        for _ in range(num_layers - 2):
            self.convs.append(
                GCNConv(hidden_channels, hidden_channels, normalize=False))
        self.convs.append(
            GCNConv(hidden_channels, out_channels, normalize=False))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for conv in self.convs[:-1]:
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x


class SAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers,
                 dropout):
        super(SAGE, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        self.dropout = dropout

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()

    def forward(self, x, adj_t):
        for conv in self.convs[:-1]:
            x = conv(x, adj_t)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.convs[-1](x, adj_t)
        return x


def train(model, data, train_idx, optimizer, optimizer_string):
    model.train()
    criterion = torch.nn.BCEWithLogitsLoss()

    optimizer.zero_grad()
    out = model(data.x, data.adj_t)[train_idx]
    loss = criterion(out, data.y[train_idx].to(torch.float))
    loss.backward()
    if optimizer_string is not None:
      def closure():
        return loss
      optimizer.step(closure)
    else:
      optimizer.step()
    return loss.item()


@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()

    y_pred = model(data.x, data.adj_t)

    train_rocauc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['rocauc']
    #valid_rocauc = evaluator.eval({
    #    'y_true': data.y[split_idx['valid']],
    #    'y_pred': y_pred[split_idx['valid']],
    #})['rocauc']
    test_rocauc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['rocauc']

    return train_rocauc, test_rocauc #valid_rocauc,

In [None]:
dev = 0 
device = f'cuda:{dev}' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)

dataset = PygNodePropPredDataset(
    name='ogbn-proteins', transform=T.ToSparseTensor(attr='edge_attr'))
data = dataset[0]

# Move edge features to node features.
data.x = data.adj_t.mean(dim=1)
data.adj_t.set_value_(None)

split_idx = dataset.get_idx_split()
train_idx = split_idx['train'].to(device)

In [None]:
use_sage = True
hidden_channels = 256
num_layers = 3
dropout = 0
epochs = 1000
eval_steps = 5 
log_steps = 1

In [None]:
if use_sage:
    model = SAGE(data.num_features, hidden_channels, 112,
                  num_layers, dropout).to(device)
else:
    model = GCN(data.num_features, hidden_channels, 112,
                num_layers, dropout).to(device)

    # Pre-compute GCN normalization.
    adj_t = data.adj_t.set_diag()
    deg = adj_t.sum(dim=1).to(torch.float)
    deg_inv_sqrt = deg.pow(-0.5)
    deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
    adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
    data.adj_t = adj_t

data = data.to(device)

evaluator = Evaluator(name='ogbn-proteins')

## Experiments
`opt` is the optimizer chosen for the experiments (among "ECDSep", "sgd", "adam", "adamw"). Remember to change the name of the optimizer and the hyperparameters in the first few lines of the next cell.

In [None]:
opt = "ECDSep"
runs = 5

lr = 1.8
eta = 5.
nu = 1e-5
wd = 0.
momentum = 0.999

best_losses, best_accuracies = [], []
for run in range(runs):
  seed = np.random.randint(100000000)
  torch.manual_seed(seed)
  random.seed(seed)
  torch.cuda.manual_seed(seed)

  model.reset_parameters()

  if opt == "sgd":
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=wd)

  elif opt == "adam":
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

  elif opt == "adamw":
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)

  elif opt == "ECDSep":
    s = 1
    deltaEn = 0
    consEn = True
    F0 = 0
    optimizer = ECDSep(model.parameters(), lr=lr, eta=eta, nu=nu, s=s, deltaEn=deltaEn, consEn=consEn, F0=F0, weight_decay=wd)

  for epoch in range(1, 1 + epochs):
    loss = train(model, data, train_idx, optimizer, opt)
    if epoch == 1:
      best_loss = loss
    else:
      if loss < best_loss:
        best_loss=loss
    if epoch % eval_steps == 0:
        result = test(model, data, split_idx, evaluator)
        train_rocauc, test_rocauc = result
        print(f'Run: {run + 1:02d}, '
              f'Epoch: {epoch:02d}, '
              f'Loss: {loss:.4f}, '
              f'Train: {100 * train_rocauc:.2f}%, '
              #f'Valid: {100 * valid_rocauc:.2f}% '
              f'Test: {100 * test_rocauc:.2f}%')
        if epoch == eval_steps:
          best_test_accuracy = test_rocauc
        else:
          if test_rocauc > best_test_accuracy:
            best_test_accuracy = test_rocauc
  best_losses.append(best_loss)
  best_accuracies.append(best_test_accuracy)

In [None]:
print("Average best accuracy for "+opt+" over "+str(runs)+" runs is ", np.mean(best_accuracies))
print("Average minimum loss for "+opt+" over "+str(runs)+" runs is ", np.mean(best_losses))