In [None]:
import torch
import os
print("Pytorch has version {}".format(torch.__version__))

In [None]:
torch_version = str(torch.__version__)
scatter_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
sparse_src = f"https://pytorch-geometric.com/whl/torch-{torch_version}.html"
!pip install torch-scatter -f $scatter_src
!pip install torch-sparse -f $sparse_src
!pip install torch-geometric
!pip install ogb

In [None]:
from torch_geometric.datasets import TUDataset

root = './enzymes'
name = 'ENZYMES'

pyg_dataset = TUDataset(root, name)

print(pyg_dataset)

In [None]:
def get_num_classes(pyg_dataset):

  # ...

  num_classes =

  return num_classes

def get_num_features(pyg_dataset):

  # ...

  num_features =

  return num_features

num_classes = get_num_classes(pyg_dataset)
num_features = get_num_features(pyg_dataset)
print("{} dataset has {} classes".format(name, num_classes))
print("{} dataset has {} features".format(name, num_features))

In [None]:
def get_graph_class(pyg_dataset, idx):

  #...

  label =

  return label

graph_0 = pyg_dataset[0]
print(graph_0)
idx = 100
label = get_graph_class(pyg_dataset, idx)
print('Graph with index {} has label {}'.format(idx, label))

In [None]:
edge_index = graph_0.edge_index
print(edge_index.t())

In [None]:
def get_graph_num_edges(pyg_dataset, idx):

  # ...

  num_edges =

  return num_edges

idx = 200
num_edges = get_graph_num_edges(pyg_dataset, idx)
print('Graph with index {} has {} edges'. format(idx, num_edges))

In [None]:
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset

dataset_name = 'ogbn-arxiv'

dataset = PygNodePropPredDataset(name=dataset_name,
                                 transform=T.ToSparseTensor())
print('The {} dataset has {} graph'.format(dataset_name, len(dataset)))

data = dataset[0]
rox, col, edge_attr = data.adj_t.t().coo()
data.edge_index = torch.stack([row, col], dim=0)
print(data)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print('Device : {}'.format(device))
data = data.to(device)

split_idx = dataset.get_idx_split()
train_idx = split_idx['train'].to(device)

In [None]:
import torch
import torch.nn as nn
iport torch.nn.functional as F
print(torch.__version__)

from torch_geometric.nn import GCNConv

import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

import pandas as pd
import copy

In [None]:
class GNN(torch.nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
    super().__init__()

    # ...

    self .convs =

    self.bns =

    self.dropout = dropout

  def reset_parameters(self):
    for conv in self.convs:
      conv.reset_parameters()
    for bn in self.bns:
      bn.reset_parameters()

  def forward(self, x, edge_index):

    # ...

    out =

    return out

In [None]:
def train(model, data, train_idx, optimizer, loss_fn):
  model.train()
  loss = 0

  # ...

  loss.backward()
  optimizer.step()

  return loss.item()

In [None]:
@torch.no_grad()
def test(model, data, split_idx, evaluator):

  # ...

  model.eval()

  out = model(data.X, data.edge_index)

  y_pred = out.argmax(dim=-1, keepdim=True)

  train_acc = evaluator.eval({
      'y_true': data.y[split_idx['train']],
      'y_pred': y_pred[split_idx['train']],
      })['acc']
      valid_acc = evaluator.eval({
          'y_true': data.y[split_idx['valid']],
      })['acc']

      return train_acc, valid_acc

In [None]:
args = {
    'device': device,
    'num_layers': 3,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.01,
    'epochs': 100,
}

In [None]:
model = GNN(data.num_features, args['hidden_dim'],
            dataset.num_classes, args['num_layers'],
            args['dropout']).to(device)
evaluator = Evaluator(name='ogbn-arxiv')

In [None]:
model.reset_parameters()

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = F.cross_entropy

best_model = None
best_valid_acc = 0

print('Evaluating a randomly initialized model')
result = test(model, data, split_idx, evaluator)
train_acc, valid_acc = result
print(f'Train: {100 * train_acc:.2f}%, '
      f'Valid: {100 * valid_acc:.2f}%')

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, optimizer, loss_fn)
  result = test(model, data, split_idx, evaluator)
  train_acc, valid_acc = result
  if valid_acc > best_valid_acc:
    best_valid_acc = valid_acc
    best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}%')

In [None]:
best_result = test(best_model, data, split_idx, evaluator)
train_acc, valid_acc = best_result

print(f'Best model: '
      f'Train: {100 * train_acc:.2f}%, '
      f'Valid: {100 * valid_acc:.2f}%')

best_model.eval()
out = best_model(data.x, data.edge_index)
y_pred = out.argmax(dim=-1, keepdim=True)

print("Saving Model Predictions")

preds = {}
preds['y_pred'] = y_pred[split_idx['test']].view(-1).cpu().detach().numpy()

df = pd.DataFrame(data=preds)
df.to_csv('ogbn-arxiv_node.csv', sep=',', index=False)

In [None]:
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.data import DataLoader
from tqdm.notebook import tqdm

dataset = PygGraphPropPredDataset(name='ogbg-molhiv')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

split_idx = dataset.get_idx_split()

print('Task type: {}'.format(dataset.task_type))

In [None]:
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False, num_workers=0)

In [None]:
from ogb.graphproppred.mol_encoder import AtomEncoder
from torch_geometric.nn import global_mean_pool

class GNN_Graph(torch.nn.Module):
  def __init__(self, hidden_dim, output_dim, num_layers, dropout):
    super().__init__()

    self.node_encoder = AtomEncoder(hidden_dim)

    self.gnn_node = GNN(hidden_dim, hidden_dim, hiddem_dim, num_layers, dropout)

    # ...

    self.pool =

    self.linear = torch.nn.Linear(hidden_dim, output_dim)

  def reset_parameters(self):
    self.gnn_node.reset_parameters()
    self.linear.reset_parameters()

  def forward(self, batched_data):

    # ...

    x, edge_index, batch = batched_data.x, batched_data.edge_index, batched_data.batch
    embed = self.node_encoder(x)

    # ...

    out =

    return out

In [None]:
def train(model, device, data_loader, optimizer, loss_fn):
  model.train()
  loss = 0

  for step, batch in enumerate(tqdm(data_loader, desc="Iteration")):
    batch = batch.to(device)

    if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
      pass
    else:
      is_labeled = batch.y == batch.y

      # ...

      loss.backward()
      optimizer.step()

  return loss.item()

In [None]:
def eval(model, device, loader, evaluator):
  model.eval()
  y_true = []
  y_pred = []

  for step, batch in enumerate(tqdm(loader, desc="Iteration")):
    batch = batch.to(device)

    if batch.x.shape[0] == 1:
      pass
    else:
      with torch.no_grad():
        pred = model(batch)

      y_true.apped(batch.y.view(pred.shape).detach().cpu())
      y_pred.append(pred.detach().cpu())

  y_true = torch.cat(y_true, dim = 0).numpy()
  y_pred = torch.cat(y_pred, dim = 0).numpy()

  input_dict = {"y_true": y_true, "y_pred": y_pred}

  return evaluator.eval(input_dict)

In [None]:
args = {
    'device': device,
    'num_layers': 6,
    'hidden_dim': 256,
    'dropout': 0.2,
    'lr': 0.0001,
    'epochs': 30,
}

In [None]:
model = GNN_Graph(args['hidden_dim'],
                  dataset.num_tasks, args['num_layers'],
                  args['dropout']).to(device)
evaluator = Evaluator(name='ogbg-molhiv')

In [None]:
model.reset_parameters()

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = F.cross_entropy

best_model = None
best_valid_acc = 0

print('Evaluating a randomly initialized model')
train_result = eval(model, device, train_loader, evaluator)
val_result = eval(model, device, valid_loader, evaluator)
print(f'Train: {100 * train_acc:.2f}%, '
      f'Valid: {100 * valid_acc:.2f}%')

for epoch in range(1, 1 + args["epochs"]):
  print('Training...')
  loss = train(model, device, train_loader, optimizer, loss_fn)

  print('Evaluating...')
  train_result = eval(model, device, train_loader, evaluator)
  val_result = eval(model, device, valid_loader, evaluator)

  train_acc, valid_acc = train_result[dataset.eval_metric], val_result[dataset.eval_metric]
  if valid_acc > best_valid_acc:
    best_valid_acc = valid_acc
    best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:0.2d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}%')

In [None]:
train_auroc = eval(best_model, device, train_loader, evaluator)[dataset.eval_metric]
valid_auroc = eval(best_model, device, valid_loader, evaluator)[dataset.eval_metric]

print(f'Best model: '
      f'Train: {100 * train_auroc:.2f}%, '
      f'Valid: {100 * valid_auroc:.2f}%')

test_loader = DataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False, num_workers=0)
best_model.eval()
with torch.no_grad():
  y_pred = []
  for step, batch in enumerate(tqdm(test_loader, desc="Iteration")):
    batch = batch.to(device)

    if batch.x.shape[0] == 1:
      pass
    else:
      pred = model(batch)
      y_pred.append(pred.detach().cpu())

y_pred = torch.cat(y_pred, dim=0).numpy()

print("Saving Model Predictions")

pred = {}
preds['y_pred'] = y_pred.reshape(-1)

df = pd.DataFrame(data=preds)

df.to_csv('ogbg-molhiv_graph.csv', sep=',', index=False)