In [1]:
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
!pip install ogb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.0+cu113.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 32.5 MB/s 
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_sparse-0.6.14-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 61.8 MB/s 
[?25hCollecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_cluster-1.6.0-cp37-cp37m-linux_x86_64.whl (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 63.7 MB/s 
[?25hCollecting torch-spline-conv
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_spline_conv-1.2.1-cp37-cp37m-linux_x86_64.whl (709 kB)
[K     |████████████████████████████████| 709 kB 53.7 MB/

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
print(torch.__version__)

from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, global_add_pool, global_mean_pool
from torch_geometric.loader import DataLoader
from torch_geometric.datasets import TUDataset
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from tqdm.notebook import tqdm
from ogb.graphproppred.mol_encoder import AtomEncoder

1.12.0+cu113


In [3]:
#Dataset
batch_size = 32
dataset = PygGraphPropPredDataset(name='ogbg-molhiv')
data_feature = dataset.num_features
data_class = dataset.num_classes
data_tasks = dataset.num_tasks

split_idx = dataset.get_idx_split()
train_loader = DataLoader(dataset[split_idx['train']], batch_size = batch_size, shuffle = True)
valid_loader = DataLoader(dataset[split_idx['valid']], batch_size = batch_size, shuffle = False)
test_loader = DataLoader(dataset[split_idx['test']], batch_size = batch_size, shuffle = False)

iter = int (len(train_loader))

"""
for data in (train_loader):
  print(data, iter)
  break
"""

Downloading http://snap.stanford.edu/ogb/data/graphproppred/csv_mol_download/hiv.zip


Downloaded 0.00 GB: 100%|██████████| 3/3 [00:02<00:00,  1.16it/s]
Processing...


Extracting dataset/hiv.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 41127/41127 [00:01<00:00, 39730.65it/s]


Converting graphs into PyG objects...


100%|██████████| 41127/41127 [00:01<00:00, 39143.50it/s]


Saving...
DataBatch(edge_index=[2, 1636], edge_attr=[1636, 3], x=[766, 9], y=[32, 1], num_nodes=766, batch=[766], ptr=[33]) 1029


Done!


In [4]:
args = {
      'hidden_dim': 256,
      'lr': 0.001,
      'epochs': 10,
  }
args

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [35]:
def eval_model(data_loader, model, evaluator):
  model.eval()
  y_true = []
  y_pred = []
  correct = 0
  for step, data in enumerate(data_loader):
    data = data.to(device)
    with torch.no_grad():
      pred = model(data)
    
    y_true.append(data.y.detach().cpu())
    y_pred.append(pred.argmax(dim=-1, keepdim=True).detach().cpu())
    
    pred = pred.argmax(dim=1)
    data_true = data.y.reshape(-1)
    correct += (pred == data_true).sum().item()
    
  y_true = torch.cat(y_true, dim = 0).numpy()
  y_pred = torch.cat(y_pred, dim = 0).numpy()

  
  input_dict = {"y_true": y_true, "y_pred": y_pred}
  rocauc = evaluator.eval(input_dict)
  acc = correct / len(data_loader.dataset)

  return rocauc, acc

In [36]:
class GCN_Graph(torch.nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(GCN_Graph, self).__init__()

        self.node_encoder = AtomEncoder(hidden_dim)

        self.conv1 = GCNConv(hidden_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim , hidden_dim)
        self.bn1 = torch.nn.BatchNorm1d(hidden_dim)
        self.softmax = nn.LogSoftmax(dim=1)
        self.pool = global_mean_pool

    def forward(self, input):
        x, edge_index, batch = input.x, input.edge_index, input.batch
        embed = self.node_encoder(x)
        net = F.relu(self.bn1(self.conv1(embed , edge_index)))
        net = F.dropout(net , training=self.training)
        net = self.conv2(net , edge_index)
        
        net = self.pool(net, batch)
        net = self.softmax(net)

        return net

In [37]:
model = GCN_Graph(args['hidden_dim'],dataset.num_tasks).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = args['lr'])
lossfunc = F.nll_loss
evaluator = Evaluator(name='ogbg-molhiv')

for epoch in range(args['epochs']):
  train_loss = 0
  #training
  model.train()
  for step, data in enumerate((train_loader)):
    data.to(device) 
    out = model(data)
    loss = lossfunc(out, data.y.squeeze(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_loss += loss.item()

  #valid eval
  val_rocauc, val_acc = eval_model(valid_loader, model, evaluator)

  #output
  print(f'Epoch: {epoch+1:02d}, '
        f'Loss: {train_loss/iter:.4f}, '
        ,f'val_rocauc: {val_rocauc},'
        ,f'val_acc: {val_acc:.4f}'
        )

Epoch: 01, Loss: 0.2141,  val_rocauc: {'rocauc': 0.49987599206349204}, val_acc: 0.9801
Epoch: 02, Loss: 0.1512,  val_rocauc: {'rocauc': 0.5}, val_acc: 0.9803
Epoch: 03, Loss: 0.1490,  val_rocauc: {'rocauc': 0.5}, val_acc: 0.9803
Epoch: 04, Loss: 0.1466,  val_rocauc: {'rocauc': 0.5}, val_acc: 0.9803
Epoch: 05, Loss: 0.1459,  val_rocauc: {'rocauc': 0.49987599206349204}, val_acc: 0.9801
Epoch: 06, Loss: 0.1443,  val_rocauc: {'rocauc': 0.49975198412698413}, val_acc: 0.9798
Epoch: 07, Loss: 0.1447,  val_rocauc: {'rocauc': 0.5}, val_acc: 0.9803
Epoch: 08, Loss: 0.1435,  val_rocauc: {'rocauc': 0.5}, val_acc: 0.9803
Epoch: 09, Loss: 0.1429,  val_rocauc: {'rocauc': 0.5178984788359788}, val_acc: 0.9798
Epoch: 10, Loss: 0.1427,  val_rocauc: {'rocauc': 0.5}, val_acc: 0.9803


In [39]:
#testing eval 
val_rocauc, test_acc = eval_model(test_loader, model, evaluator)
print("val_rocauc",val_rocauc,"test_acc",test_acc)

val_rocauc {'rocauc': 0.5} test_acc 0.9683929005592026
