In [None]:
!pip install -U ogb --quiet
!pip install urllib3 --quiet
!pip install dgl --quiet
!pip install rdkit --quiet
!pip install plotnine --quiet
!pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
!pip install torch-geometric
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

In [6]:
import os
import torch
from ogb.linkproppred import PygLinkPropPredDataset
from ogb.nodeproppred import PygNodePropPredDataset
from ogb.graphproppred import PygGraphPropPredDataset
from torch_geometric.data import InMemoryDataset
from ogb.nodeproppred import NodePropPredDataset
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
import numpy as np
import pandas as pd
import urllib3
import outdated
import dgl
import sklearn
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch.autograd import Variable
from torch_geometric.nn import GraphConv
import matplotlib.pyplot as plt
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
from torch_geometric.data import DataLoader
from params import gnn_params
from sklearn.metrics import roc_auc_score
from gcn import GCN

In [7]:
dataset =PygNodePropPredDataset(name='ogbn-proteins', transform=T.ToSparseTensor(attr='edge_attr'))
device = f'cuda:{args.device}' if torch.cuda.is_available() else 'cpu'

split_idx = dataset.get_idx_split() 
train_idx = split_idx['train'].to(device)
test_idx = split_idx['test'].to(device)
val_idx=split_idx['valid'].to(device)

In [8]:
dataset

PygNodePropPredDataset()

In [9]:
dataset.data.has_isolated_nodes()

False

In [10]:
num_nodes = dataset.data.num_nodes
num_edges = dataset.data.num_edges // 2
train_len = len(train_idx)
val_len =len(val_idx)
test_len = len(test_idx)
other_len = num_nodes - train_len - val_len - test_len
print(f"Dataset: {dataset.name}")
print(f"Num. nodes: {num_nodes} (train={train_len}, val={val_len}, test={test_len}, other={other_len})")
print(f"Num. edges: {num_edges}")
print(f"Num.  features per edge: {(dataset.data.edge_attr).size(1)}")
print(f"Num. classes: {dataset.num_classes}")
print(f"Dataset len.: {dataset.len()}")
print(" ")

Dataset: ogbn-proteins
Num. nodes: 132534 (train=86619, val=21236, test=24679, other=0)
Num. edges: 39561252
Num.  features per edge: 8
Num. classes: 2
Dataset len.: 1
 


In [14]:
dataset.data

Data(num_nodes=132534, edge_index=[2, 79122504], edge_attr=[79122504, 8], node_species=[132534, 1], y=[132534, 112])

# Training and Testing loops

In [27]:
loss_fn = torch.nn.BCEWithLogitsLoss()
def train(model, data, train_idx, optimizer):
    model.train()
    running_train_loss = 0
    optimizer.zero_grad()
    data = data.to(device)
    out = model(data.x, data.adj_t)[train_idx]
    loss = loss_fn(out, data.y[train_idx].to(torch.float))
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def test(model, data, split_idx, evaluator):
    model.eval()
    y_pred = model(data.x, data.adj_t)
    train_rocauc = evaluator.eval({
        'y_true': data.y[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['rocauc']
    valid_rocauc = evaluator.eval({
        'y_true': data.y[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['rocauc']
    test_rocauc = evaluator.eval({
        'y_true': data.y[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['rocauc']
    return train_rocauc, valid_rocauc, test_rocauc

# Main

Move edge features to node features.


In [12]:
data = dataset[0]
data.x = data.adj_t.mean(dim=1)
data.adj_t.set_value_(None)

SparseTensor(row=tensor([     0,      0,      0,  ..., 132533, 132533, 132533]),
             col=tensor([     1,      2,      3,  ...,  98734, 102639, 132517]),
             size=(132534, 132534), nnz=79122504, density=0.45%)

In [23]:
parameters = gnn_params 
model = GCN(*parameters.values())

In [15]:
model

GCN(
  (convs): ModuleList(
    (0): GCNConv(8, 256)
    (1): GCNConv(256, 256)
    (2): GCNConv(256, 112)
  )
)

In [22]:
# Pre-compute GCN normalization.
adj_t = data.adj_t.set_diag()
deg = adj_t.sum(dim=1).to(torch.float)
deg_inv_sqrt = deg.pow(-0.5)
deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
adj_t = deg_inv_sqrt.view(-1, 1) * adj_t * deg_inv_sqrt.view(1, -1)
data.adj_t = adj_t
data = data.to(device)

In [21]:
evaluator = Evaluator(name='ogbn-proteins')

In [20]:
EPOCHS = 1


In [28]:
model.to(device)

loss_fn = torch.nn.BCEWithLogitsLoss()

test_perfs = []
for run in range(1,11) :
    print(f'Run {run}:')

    model.reset_parameters()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    results = {'highest_valid': 0,
            'final_train': 0,
            'final_test': 0,
            'highest_train': 0}

    for epoch in range(EPOCHS):
        loss = train(model, data, train_idx, optimizer)
        result = test(model, data, split_idx, evaluator)
        train_roc, valid_roc,test_roc=result

        if train_roc > results['highest_train']:
            results['highest_train'] = train_roc

        if valid_roc > results['highest_valid']:
            results['highest_valid'] = valid_roc
            results['final_train'] = train_roc
            results['final_test'] = test_roc

        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, '
          f'Train: {train_roc:.4f}, Val: {valid_roc:.4f}, '
          f'Test: {test_roc:.4f}') 

    #print("Post training Results {}".format(results))

    test_perfs.append(results['final_test'])

test_perf = torch.tensor(test_perfs)
print('===========================')
print("model parameters : {}".format(parameters))
print(f'Final Test: {test_perf.mean():.4f} ± {test_perf.std():.4f}')


Run 1:
Epoch: 000, Loss: 0.6924, Train: 0.4834, Val: 0.4915, Test: 0.4894
Run 2:
Epoch: 000, Loss: 0.6937, Train: 0.4951, Val: 0.5005, Test: 0.5051
Run 3:
Epoch: 000, Loss: 0.6934, Train: 0.4808, Val: 0.4936, Test: 0.4981
Run 4:
Epoch: 000, Loss: 0.6937, Train: 0.5017, Val: 0.5111, Test: 0.5120
Run 5:
Epoch: 000, Loss: 0.6930, Train: 0.5003, Val: 0.4989, Test: 0.4991
Run 6:
Epoch: 000, Loss: 0.6936, Train: 0.4955, Val: 0.4876, Test: 0.4855
Run 7:
Epoch: 000, Loss: 0.6936, Train: 0.4887, Val: 0.4866, Test: 0.4904
Run 8:


KeyboardInterrupt: 