<a href="https://colab.research.google.com/github/chefPony/cs224w-ml-with-graphs/blob/main/Graph_Neural_Networks_Design_Space.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import os
print("PyTorch has version {}".format(torch.__version__))

PyTorch has version 1.10.0+cu111


In [None]:
# Install torch geometric
if 'IS_GRADESCOPE_ENV' not in os.environ:
  !pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
  !pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
  !pip install torch-geometric
  !pip install ogb

Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html


In [None]:
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset

if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  # Load the dataset and transform it to sparse tensor
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  print('The {} dataset has {} graph'.format(dataset_name, len(dataset)))

  # Extract the graph
  data = dataset[0]
  print(data)

The ogbn-arxiv dataset has 1 graph
Data(x=[169343, 128], node_year=[169343, 1], y=[169343, 1], adj_t=[169343, 169343, nnz=1166243])


In [None]:
import copy
import torch
import pandas as pd
import torch.nn.functional as F
print(torch.__version__)

# The PyG built-in GCNConv
from torch_geometric.nn import GCNConv, SAGEConv, GATConv

import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

1.10.0+cu111


In [None]:
if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  data = dataset[0]

  # Make the adjacency matrix to symmetric
  data.adj_t = data.adj_t.to_symmetric()

  device = 'cuda' if torch.cuda.is_available() else 'cpu'

  # If you use GPU, the device should be cuda
  print('Device: {}'.format(device))

  data = data.to(device)
  split_idx = dataset.get_idx_split()
  train_idx = split_idx['train'].to(device)

Device: cuda


In [None]:
def train(model, data, loss_fn, optimizer, train_idx):
    
  model.train()
  loss = 0

  optimizer.zero_grad()
  y_pred = model(data.x, data.adj_t)
  loss = loss_fn(y_pred[train_idx], data.y[train_idx].squeeze())
  loss.backward()
  optimizer.step()
  return loss.item()

In [None]:
@torch.no_grad()
def test(model, data, split_idx, evaluator):

  model.eval()

  y_pred = model(data.x, data.adj_t)
  y_pred = y_pred.argmax(dim=-1, keepdim=True)

  train_acc = evaluator.eval({
      "y_pred": y_pred[split_idx["train"]],
      "y_true": data.y[split_idx["train"]]
  })["acc"]
  val_acc = evaluator.eval({
    "y_pred": y_pred[split_idx["valid"]],
    "y_true": data.y[split_idx["valid"]]
  })["acc"]
  test_acc = evaluator.eval({
    "y_pred": y_pred[split_idx["test"]],
    "y_true": data.y[split_idx["test"]]
  })["acc"]

  return train_acc, val_acc, test_acc

In [None]:
def train_loop(model, data, optimizer, loss_fn, epochs, split_idx, evaluator):

  best_val_acc = 0.

  model.reset_parameters()

  for e in range(1, epochs+1):
    loss = train(model, data, loss_fn, optimizer, split_idx["train"])
    train_acc, val_acc, test_acc = test(model, data, split_idx, evaluator)

    if val_acc>best_val_acc:
      best_model = copy.deepcopy(gcn)
      best_val_acc = val_acc

    print(f"Epoch: {e:02d}, Loss: {loss:.3f},  "
        f"Train: {100 * train_acc:.3f}%,  "   
        f"Valid: {100 * val_acc:.3f}%,  " 
        f"Test: {100 * test_acc:.3f}%,  "
        )
  return best_model, best_val_acc


In [None]:
def print_model_accuracy(best_model, data, split_idx, evaluator):
  best_result = test(best_model, data, split_idx, evaluator)
  train_acc, valid_acc, test_acc = best_result
  print(f'Best model: '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

In [None]:
dataset_evaluator = Evaluator(name=dataset_name)

In [None]:
args = {
    "input_dim": data.num_features,
    "hidden_dim": 256,
    "output_dim": dataset.num_classes,
    "num_layers": 3,
    "heads": 2,
    "dropout": 0.5,
    "epochs": 100,
    "lr": 0.01
}



## Vanilla GCN

In [None]:
class GCN(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
      
      super(GCN, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [GCNConv(input_dim, hidden_dim)] +
          [GCNConv(hidden_dim, hidden_dim) for _ in range(num_layers - 2)]+
          [GCNConv(hidden_dim, output_dim)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

In [None]:
gcn = GCN(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"], 
          output_dim=args["output_dim"], num_layers=args["num_layers"], 
          dropout=args["dropout"])


optimizer = torch.optim.Adam(gcn.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_gcn, gcn_best_acc = train_loop(gcn.to(device), 
                                    data, optimizer, nll_loss, 
                                    args["epochs"], split_idx, 
                                    dataset_evaluator)



Epoch: 01, Loss: 4.053,  Train: 27.935%,  Valid: 30.189%,  Test: 27.239%,  
Epoch: 02, Loss: 2.356,  Train: 22.725%,  Valid: 17.692%,  Test: 19.785%,  
Epoch: 03, Loss: 1.952,  Train: 23.919%,  Valid: 18.212%,  Test: 19.793%,  
Epoch: 04, Loss: 1.806,  Train: 35.512%,  Valid: 36.971%,  Test: 37.115%,  
Epoch: 05, Loss: 1.681,  Train: 41.395%,  Valid: 39.897%,  Test: 37.959%,  
Epoch: 06, Loss: 1.599,  Train: 41.083%,  Valid: 37.823%,  Test: 37.825%,  
Epoch: 07, Loss: 1.525,  Train: 40.967%,  Valid: 38.991%,  Test: 42.467%,  
Epoch: 08, Loss: 1.464,  Train: 40.069%,  Valid: 39.545%,  Test: 43.532%,  
Epoch: 09, Loss: 1.420,  Train: 39.554%,  Valid: 39.223%,  Test: 42.995%,  
Epoch: 10, Loss: 1.383,  Train: 39.566%,  Valid: 39.448%,  Test: 43.211%,  
Epoch: 11, Loss: 1.346,  Train: 41.034%,  Valid: 42.941%,  Test: 46.252%,  
Epoch: 12, Loss: 1.313,  Train: 40.034%,  Valid: 41.934%,  Test: 45.606%,  
Epoch: 13, Loss: 1.295,  Train: 39.517%,  Valid: 42.599%,  Test: 45.516%,  
Epoch: 14, L

In [None]:
print_model_accuracy(best_gcn, data, split_idx, dataset_evaluator)

Best model: Train: 71.67%, Valid: 70.27% Test: 69.09%




## GraphSAGE

In [None]:
class SAGE(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout,
                 normalize=False):
      
      super(SAGE, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [SAGEConv(input_dim, hidden_dim, normalize=normalize)] +
          [SAGEConv(hidden_dim, hidden_dim, normalize=normalize) 
           for _ in range(num_layers - 2)]+
          [SAGEConv(hidden_dim, output_dim, normalize=normalize)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

### No normalization

In [None]:
sage = SAGE(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"], 
            output_dim=args["output_dim"], num_layers=args["num_layers"], 
            dropout=args["dropout"])

optimizer = torch.optim.Adam(sage.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_sage, sage_best_acc = train_loop(sage.to(device), 
                                      data, optimizer, nll_loss, 
                                      args["epochs"], split_idx, 
                                      dataset_evaluator)



Epoch: 01, Loss: 3.943,  Train: 31.739%,  Valid: 33.303%,  Test: 32.663%,  
Epoch: 02, Loss: 2.582,  Train: 29.385%,  Valid: 33.649%,  Test: 32.541%,  
Epoch: 03, Loss: 2.220,  Train: 35.980%,  Valid: 41.589%,  Test: 44.392%,  
Epoch: 04, Loss: 2.047,  Train: 40.289%,  Valid: 44.793%,  Test: 46.409%,  
Epoch: 05, Loss: 1.825,  Train: 44.133%,  Valid: 46.693%,  Test: 46.773%,  
Epoch: 06, Loss: 1.688,  Train: 47.782%,  Valid: 50.203%,  Test: 50.022%,  
Epoch: 07, Loss: 1.617,  Train: 49.836%,  Valid: 53.646%,  Test: 54.021%,  
Epoch: 08, Loss: 1.539,  Train: 50.304%,  Valid: 54.066%,  Test: 54.256%,  
Epoch: 09, Loss: 1.485,  Train: 51.090%,  Valid: 54.562%,  Test: 54.404%,  
Epoch: 10, Loss: 1.448,  Train: 52.172%,  Valid: 54.938%,  Test: 54.799%,  
Epoch: 11, Loss: 1.416,  Train: 53.626%,  Valid: 56.767%,  Test: 56.745%,  
Epoch: 12, Loss: 1.384,  Train: 55.081%,  Valid: 58.190%,  Test: 58.361%,  
Epoch: 13, Loss: 1.361,  Train: 56.581%,  Valid: 59.713%,  Test: 59.982%,  
Epoch: 14, L

In [None]:
print_model_accuracy(best_sage, data, split_idx, dataset_evaluator)

Best model: Train: 71.93%, Valid: 70.39% Test: 69.37%




### With normalization

In [None]:
sagenorm = SAGE(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"], 
            output_dim=args["output_dim"], num_layers=args["num_layers"], 
            dropout=args["dropout"], normalize=True)

optimizer = torch.optim.Adam(sagenorm.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_sagenorm, sagenorm_best_acc = train_loop(sagenorm.to(device), 
                                              data, optimizer, nll_loss, 
                                              args["epochs"], split_idx, 
                                              dataset_evaluator)



Epoch: 01, Loss: 3.688,  Train: 24.851%,  Valid: 22.222%,  Test: 25.470%,  
Epoch: 02, Loss: 3.432,  Train: 29.644%,  Valid: 32.340%,  Test: 36.185%,  
Epoch: 03, Loss: 3.390,  Train: 30.794%,  Valid: 36.109%,  Test: 39.921%,  
Epoch: 04, Loss: 3.353,  Train: 30.813%,  Valid: 37.807%,  Test: 41.164%,  
Epoch: 05, Loss: 3.315,  Train: 31.129%,  Valid: 38.867%,  Test: 41.777%,  
Epoch: 06, Loss: 3.282,  Train: 31.043%,  Valid: 38.521%,  Test: 40.856%,  
Epoch: 07, Loss: 3.259,  Train: 30.323%,  Valid: 37.273%,  Test: 38.382%,  
Epoch: 08, Loss: 3.243,  Train: 29.462%,  Valid: 35.719%,  Test: 36.385%,  
Epoch: 09, Loss: 3.230,  Train: 28.435%,  Valid: 34.800%,  Test: 35.018%,  
Epoch: 10, Loss: 3.216,  Train: 28.181%,  Valid: 35.233%,  Test: 35.586%,  
Epoch: 11, Loss: 3.201,  Train: 28.875%,  Valid: 36.273%,  Test: 37.269%,  
Epoch: 12, Loss: 3.187,  Train: 30.389%,  Valid: 39.001%,  Test: 40.759%,  
Epoch: 13, Loss: 3.174,  Train: 32.539%,  Valid: 41.303%,  Test: 43.954%,  
Epoch: 14, L

In [None]:
print_model_accuracy(best_sagenorm, data, split_idx, dataset_evaluator)

Best model: Train: 71.93%, Valid: 70.13% Test: 68.66%




# GAT
Scales badly need to decrease hidden_dim, otherwise cuda goes out of memory.
See this: https://github.com/pyg-team/pytorch_geometric/issues/527

In [None]:
class GAT(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, heads,
                 num_layers, dropout):
      
      super(GAT, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [GATConv(input_dim, hidden_dim, heads, concat=False)] +
          [GATConv(hidden_dim, hidden_dim, heads, concat=False) for _ in range(num_layers - 2)]+
          [GATConv(hidden_dim, output_dim, heads, concat=False)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

In [None]:
gat = GAT(input_dim=args["input_dim"], hidden_dim=16,#args["hidden_dim"], 
          output_dim=args["output_dim"], heads=2,#args["heads"],
          num_layers=args["num_layers"], dropout=args["dropout"])

optimizer = torch.optim.Adam(gat.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_gat, gat_best_acc = train_loop(gat.to(device), 
                                      data, optimizer, nll_loss, 
                                      args["epochs"], split_idx, 
                                      dataset_evaluator)

RuntimeError: ignored