In [2]:
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
!pip install ogb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-1.12.0+cu113.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 42.0 MB/s 
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_sparse-0.6.14-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 52.5 MB/s 
[?25hCollecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_cluster-1.6.0-cp37-cp37m-linux_x86_64.whl (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 15.2 MB/s 
[?25hCollecting torch-spline-conv
  Downloading https://data.pyg.org/whl/torch-1.12.0%2Bcu113/torch_spline_conv-1.2.1-cp37-cp37m-linux_x86_64.whl (709 kB)
[K     |████████████████████████████████| 709 kB 64.5 MB/

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
print(torch.__version__)

from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, global_add_pool, global_mean_pool
from torch_geometric.loader import DataLoader
from torch_geometric.datasets import TUDataset
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from tqdm.notebook import tqdm
from ogb.graphproppred.mol_encoder import AtomEncoder

1.12.0+cu113


In [4]:
#Dataset
batch_size = 32
dataset = PygGraphPropPredDataset(name='ogbg-molhiv')
data_feature = dataset.num_features
data_class = dataset.num_classes
data_tasks = dataset.num_tasks

split_idx = dataset.get_idx_split()
train_loader = DataLoader(dataset[split_idx['train']], batch_size = batch_size, shuffle = True)
valid_loader = DataLoader(dataset[split_idx['valid']], batch_size = batch_size, shuffle = False)
test_loader = DataLoader(dataset[split_idx['test']], batch_size = batch_size, shuffle = False)

iter = int (len(train_loader))

"""
for data in (train_loader):
  print(data, iter)
  break
"""

Downloading http://snap.stanford.edu/ogb/data/graphproppred/csv_mol_download/hiv.zip


Downloaded 0.00 GB: 100%|██████████| 3/3 [00:00<00:00,  3.17it/s]
Processing...


Extracting dataset/hiv.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 41127/41127 [00:00<00:00, 100409.11it/s]


Converting graphs into PyG objects...


100%|██████████| 41127/41127 [00:00<00:00, 43520.08it/s]


Saving...
DataBatch(edge_index=[2, 1660], edge_attr=[1660, 3], x=[781, 9], y=[32, 1], num_nodes=781, batch=[781], ptr=[33]) 1029


Done!


In [37]:
args = {
      'hidden_dim': 256,
      'lr': 0.001,
      'epochs': 10,
  }
args

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [38]:
def eval_model(data_loader, model, evaluator):
  model.eval()
  y_true = []
  y_pred = []
  
  for step, data in enumerate(data_loader):
    data = data.to(device)
    with torch.no_grad():
      pred = model(data)
    y_true.append(data.y.view(pred.shape).detach().cpu())
    y_pred.append(pred.detach().cpu())
    
  y_true = torch.cat(y_true, dim = 0).numpy()
  y_pred = torch.cat(y_pred, dim = 0).numpy()

  
  input_dict = {"y_true": y_true, "y_pred": y_pred}
  acc = evaluator.eval(input_dict)

  return acc

In [39]:
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
      super(GCN, self).__init__()

      self.conv1 = GCNConv(input_dim, hidden_dim)
      self.conv2 = GCNConv(hidden_dim , output_dim)
      self.bn1 = torch.nn.BatchNorm1d(hidden_dim)
      self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x, edge_index):
      x = F.relu(self.bn1(self.conv1(x , edge_index)))
      x = F.dropout(x , training=self.training)
      x = self.conv2(x , edge_index)
      x = self.softmax(x)

      return x

In [40]:
class GCN_Graph(torch.nn.Module):
    def __init__(self, hidden_dim, output_dim):
        super(GCN_Graph, self).__init__()

        self.node_encoder = AtomEncoder(hidden_dim) #節點嵌入模塊的節點屬性為類別型向量，嵌入得到節點特徵

        self.gnn_node = GCN(hidden_dim, hidden_dim, hidden_dim)
        self.pool = global_mean_pool
        self.linear = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, input):
        x, edge_index, batch = input.x, input.edge_index, input.batch
        embed = self.node_encoder(x)

        x = self.gnn_node(embed, edge_index)
        x = self.pool(x, batch)
        x = self.linear(x)

        return x

In [42]:
model = GCN_Graph(args['hidden_dim'],dataset.num_tasks).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = args['lr'])
lossfunc = torch.nn.BCEWithLogitsLoss()
evaluator = Evaluator(name='ogbg-molhiv')

for epoch in range(args['epochs']):
  train_loss = 0
  #training
  model.train()
  for step, data in enumerate((train_loader)):
    data.to(device) 
    out = model(data)
    loss = lossfunc(out[data.y], data.y[data.y].float())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_loss += loss.item()

  #valid eval
  val_acc = eval_model(valid_loader, model, evaluator)

  #output
  print(f'Epoch: {epoch+1:02d}, '
        f'Loss: {train_loss/iter:.4f}, '
        ,f'val_acc: {val_acc}'
        )

Epoch: 01, Loss: 0.2870,  val_acc: {'rocauc': 0.5751090045071526}
Epoch: 02, Loss: 0.2648,  val_acc: {'rocauc': 0.4511806780325299}
Epoch: 03, Loss: 0.2363,  val_acc: {'rocauc': 0.6193415637860082}
Epoch: 04, Loss: 0.2264,  val_acc: {'rocauc': 0.6989026063100137}
Epoch: 05, Loss: 0.2849,  val_acc: {'rocauc': 0.6491647070350773}
Epoch: 06, Loss: 0.2493,  val_acc: {'rocauc': 0.704081851361944}
Epoch: 07, Loss: 0.2542,  val_acc: {'rocauc': 0.6325568293160886}
Epoch: 08, Loss: 0.2250,  val_acc: {'rocauc': 0.6645416911620615}
Epoch: 09, Loss: 0.2621,  val_acc: {'rocauc': 0.6820589604154419}
Epoch: 10, Loss: 0.2353,  val_acc: {'rocauc': 0.6692846119929453}


In [43]:
#testing eval 
test_acc = eval_model(test_loader, model, evaluator)
print(test_acc[dataset.eval_metric])

0.6576102280847448
