<a href="https://colab.research.google.com/github/chefPony/cs224w-ml-with-graphs/blob/main/Graph_Neural_Networks_Design_Space.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import os
print("PyTorch has version {}".format(torch.__version__))

PyTorch has version 1.10.0+cu111


In [2]:
# Install torch geometric
if 'IS_GRADESCOPE_ENV' not in os.environ:
  !pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
  !pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
  !pip install torch-geometric
  !pip install ogb

Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-1.10.0%2Bcu113/torch_scatter-2.0.9-cp37-cp37m-linux_x86_64.whl (7.9 MB)
[K     |████████████████████████████████| 7.9 MB 4.1 MB/s 
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.0.9
Looking in links: https://pytorch-geometric.com/whl/torch-1.10.0+cu111.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-1.10.0%2Bcu113/torch_sparse-0.6.12-cp37-cp37m-linux_x86_64.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 4.3 MB/s 
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.12
Collecting torch-geometric
  Downloading torch_geometric-2.0.2.tar.gz (325 kB)
[K     |████████████████████████████████| 325 kB 4.1 MB/s 
Collecting rdflib
  Downloading rdflib-6.0.2-py3-none-any.whl (407 kB)
[K     |████████████████████████████████| 407 kB 40.0

In [3]:
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset

if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  # Load the dataset and transform it to sparse tensor
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  print('The {} dataset has {} graph'.format(dataset_name, len(dataset)))

  # Extract the graph
  data = dataset[0]
  print(data)

Downloading http://snap.stanford.edu/ogb/data/nodeproppred/arxiv.zip


Downloaded 0.08 GB: 100%|██████████| 81/81 [00:26<00:00,  3.09it/s]


Extracting dataset/arxiv.zip


Processing...


Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:00<00:00, 5645.09it/s]


Converting graphs into PyG objects...


100%|██████████| 1/1 [00:00<00:00, 562.01it/s]

Saving...



Done!


The ogbn-arxiv dataset has 1 graph
Data(x=[169343, 128], node_year=[169343, 1], y=[169343, 1], adj_t=[169343, 169343, nnz=1166243])


In [4]:
import copy
import torch
import pandas as pd
import torch.nn.functional as F
print(torch.__version__)

# The PyG built-in GCNConv
from torch_geometric.nn import GCNConv, SAGEConv, GATConv

import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

1.10.0+cu111


In [5]:
if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  data = dataset[0]

  # Make the adjacency matrix to symmetric
  data.adj_t = data.adj_t.to_symmetric()

  device = 'cuda' if torch.cuda.is_available() else 'cpu'

  # If you use GPU, the device should be cuda
  print('Device: {}'.format(device))

  data = data.to(device)
  split_idx = dataset.get_idx_split()
  train_idx = split_idx['train'].to(device)

Device: cuda


In [6]:
def train(model, data, loss_fn, optimizer, train_idx):
    
  model.train()
  loss = 0

  optimizer.zero_grad()
  y_pred = model(data.x, data.adj_t)
  loss = loss_fn(y_pred[train_idx], data.y[train_idx].squeeze())
  loss.backward()
  optimizer.step()
  return loss.item()

In [7]:
@torch.no_grad()
def test(model, data, split_idx, evaluator):

  model.eval()

  y_pred = model(data.x, data.adj_t)
  y_pred = y_pred.argmax(dim=-1, keepdim=True)

  train_acc = evaluator.eval({
      "y_pred": y_pred[split_idx["train"]],
      "y_true": data.y[split_idx["train"]]
  })["acc"]
  val_acc = evaluator.eval({
    "y_pred": y_pred[split_idx["valid"]],
    "y_true": data.y[split_idx["valid"]]
  })["acc"]
  test_acc = evaluator.eval({
    "y_pred": y_pred[split_idx["test"]],
    "y_true": data.y[split_idx["test"]]
  })["acc"]

  return train_acc, val_acc, test_acc

In [37]:
def train_loop(model, data, optimizer, loss_fn, epochs, split_idx, evaluator):

  best_val_acc = 0.

  model.reset_parameters()

  for e in range(1, epochs+1):
    loss = train(model, data, loss_fn, optimizer, split_idx["train"])
    train_acc, val_acc, test_acc = test(model, data, split_idx, evaluator)

    if val_acc>best_val_acc:
      best_model = copy.deepcopy(model)
      best_val_acc = val_acc

    print(f"Epoch: {e:02d}, Loss: {loss:.3f},  "
        f"Train: {100 * train_acc:.3f}%,  "   
        f"Valid: {100 * val_acc:.3f}%,  " 
        f"Test: {100 * test_acc:.3f}%,  "
        )
  return best_model, best_val_acc


In [9]:
def print_model_accuracy(best_model, data, split_idx, evaluator):
  best_result = test(best_model, data, split_idx, evaluator)
  train_acc, valid_acc, test_acc = best_result
  print(f'Best model: '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

In [10]:
dataset_evaluator = Evaluator(name=dataset_name)

In [11]:
args = {
    "input_dim": data.num_features,
    "hidden_dim": 256,
    "output_dim": dataset.num_classes,
    "num_layers": 3,
    "heads": 2,
    "dropout": 0.5,
    "epochs": 100,
    "lr": 0.01
}



## Vanilla GCN

In [None]:
class GCN(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
      
      super(GCN, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [GCNConv(input_dim, hidden_dim)] +
          [GCNConv(hidden_dim, hidden_dim) for _ in range(num_layers - 2)]+
          [GCNConv(hidden_dim, output_dim)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

In [None]:
gcn = GCN(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"], 
          output_dim=args["output_dim"], num_layers=args["num_layers"], 
          dropout=args["dropout"])


optimizer = torch.optim.Adam(gcn.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_gcn, gcn_best_acc = train_loop(gcn.to(device), 
                                    data, optimizer, nll_loss, 
                                    args["epochs"], split_idx, 
                                    dataset_evaluator)



Epoch: 01, Loss: 4.186,  Train: 20.815%,  Valid: 26.602%,  Test: 24.025%,  
Epoch: 02, Loss: 2.463,  Train: 28.852%,  Valid: 30.353%,  Test: 34.627%,  
Epoch: 03, Loss: 2.006,  Train: 25.818%,  Valid: 24.299%,  Test: 29.329%,  
Epoch: 04, Loss: 1.804,  Train: 37.233%,  Valid: 39.548%,  Test: 44.767%,  
Epoch: 05, Loss: 1.688,  Train: 37.875%,  Valid: 39.840%,  Test: 38.985%,  
Epoch: 06, Loss: 1.603,  Train: 27.291%,  Valid: 28.279%,  Test: 28.025%,  
Epoch: 07, Loss: 1.539,  Train: 20.201%,  Valid: 16.001%,  Test: 15.162%,  
Epoch: 08, Loss: 1.484,  Train: 20.213%,  Valid: 14.923%,  Test: 13.164%,  
Epoch: 09, Loss: 1.428,  Train: 23.038%,  Valid: 19.957%,  Test: 21.795%,  
Epoch: 10, Loss: 1.379,  Train: 27.408%,  Valid: 25.390%,  Test: 28.914%,  
Epoch: 11, Loss: 1.353,  Train: 30.664%,  Valid: 28.722%,  Test: 33.148%,  
Epoch: 12, Loss: 1.325,  Train: 33.128%,  Valid: 32.387%,  Test: 36.850%,  
Epoch: 13, Loss: 1.297,  Train: 36.166%,  Valid: 36.263%,  Test: 41.364%,  
Epoch: 14, L

In [None]:
print_model_accuracy(best_gcn, data, split_idx, dataset_evaluator)

Best model: Train: 71.67%, Valid: 70.27% Test: 69.09%




## GraphSAGE

In [None]:
class SAGE(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout,
                 normalize=False):
      
      super(SAGE, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [SAGEConv(input_dim, hidden_dim, normalize=normalize)] +
          [SAGEConv(hidden_dim, hidden_dim, normalize=normalize) 
           for _ in range(num_layers - 2)]+
          [SAGEConv(hidden_dim, output_dim, normalize=normalize)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

### No normalization

In [None]:
sage = SAGE(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"], 
            output_dim=args["output_dim"], num_layers=args["num_layers"], 
            dropout=args["dropout"])

optimizer = torch.optim.Adam(sage.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_sage, sage_best_acc = train_loop(sage.to(device), 
                                      data, optimizer, nll_loss, 
                                      args["epochs"], split_idx, 
                                      dataset_evaluator)



Epoch: 01, Loss: 3.943,  Train: 31.739%,  Valid: 33.303%,  Test: 32.663%,  
Epoch: 02, Loss: 2.582,  Train: 29.385%,  Valid: 33.649%,  Test: 32.541%,  
Epoch: 03, Loss: 2.220,  Train: 35.980%,  Valid: 41.589%,  Test: 44.392%,  
Epoch: 04, Loss: 2.047,  Train: 40.289%,  Valid: 44.793%,  Test: 46.409%,  
Epoch: 05, Loss: 1.825,  Train: 44.133%,  Valid: 46.693%,  Test: 46.773%,  
Epoch: 06, Loss: 1.688,  Train: 47.782%,  Valid: 50.203%,  Test: 50.022%,  
Epoch: 07, Loss: 1.617,  Train: 49.836%,  Valid: 53.646%,  Test: 54.021%,  
Epoch: 08, Loss: 1.539,  Train: 50.304%,  Valid: 54.066%,  Test: 54.256%,  
Epoch: 09, Loss: 1.485,  Train: 51.090%,  Valid: 54.562%,  Test: 54.404%,  
Epoch: 10, Loss: 1.448,  Train: 52.172%,  Valid: 54.938%,  Test: 54.799%,  
Epoch: 11, Loss: 1.416,  Train: 53.626%,  Valid: 56.767%,  Test: 56.745%,  
Epoch: 12, Loss: 1.384,  Train: 55.081%,  Valid: 58.190%,  Test: 58.361%,  
Epoch: 13, Loss: 1.361,  Train: 56.581%,  Valid: 59.713%,  Test: 59.982%,  
Epoch: 14, L

In [None]:
print_model_accuracy(best_sage, data, split_idx, dataset_evaluator)

Best model: Train: 71.93%, Valid: 70.39% Test: 69.37%




### With normalization

In [None]:
sagenorm = SAGE(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"], 
            output_dim=args["output_dim"], num_layers=args["num_layers"], 
            dropout=args["dropout"], normalize=True)

optimizer = torch.optim.Adam(sagenorm.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_sagenorm, sagenorm_best_acc = train_loop(sagenorm.to(device), 
                                              data, optimizer, nll_loss, 
                                              args["epochs"], split_idx, 
                                              dataset_evaluator)



Epoch: 01, Loss: 3.688,  Train: 24.851%,  Valid: 22.222%,  Test: 25.470%,  
Epoch: 02, Loss: 3.432,  Train: 29.644%,  Valid: 32.340%,  Test: 36.185%,  
Epoch: 03, Loss: 3.390,  Train: 30.794%,  Valid: 36.109%,  Test: 39.921%,  
Epoch: 04, Loss: 3.353,  Train: 30.813%,  Valid: 37.807%,  Test: 41.164%,  
Epoch: 05, Loss: 3.315,  Train: 31.129%,  Valid: 38.867%,  Test: 41.777%,  
Epoch: 06, Loss: 3.282,  Train: 31.043%,  Valid: 38.521%,  Test: 40.856%,  
Epoch: 07, Loss: 3.259,  Train: 30.323%,  Valid: 37.273%,  Test: 38.382%,  
Epoch: 08, Loss: 3.243,  Train: 29.462%,  Valid: 35.719%,  Test: 36.385%,  
Epoch: 09, Loss: 3.230,  Train: 28.435%,  Valid: 34.800%,  Test: 35.018%,  
Epoch: 10, Loss: 3.216,  Train: 28.181%,  Valid: 35.233%,  Test: 35.586%,  
Epoch: 11, Loss: 3.201,  Train: 28.875%,  Valid: 36.273%,  Test: 37.269%,  
Epoch: 12, Loss: 3.187,  Train: 30.389%,  Valid: 39.001%,  Test: 40.759%,  
Epoch: 13, Loss: 3.174,  Train: 32.539%,  Valid: 41.303%,  Test: 43.954%,  
Epoch: 14, L

In [None]:
print_model_accuracy(best_sagenorm, data, split_idx, dataset_evaluator)

Best model: Train: 71.93%, Valid: 70.13% Test: 68.66%




# GAT
Scales badly need to decrease hidden_dim, otherwise cuda goes out of memory.
See this: https://github.com/pyg-team/pytorch_geometric/issues/527

In [None]:
class GAT(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, heads,
                 num_layers, dropout):
      
      super(GAT, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [GATConv(input_dim, hidden_dim, heads, concat=False)] +
          [GATConv(hidden_dim, hidden_dim, heads, concat=False) for _ in range(num_layers - 2)]+
          [GATConv(hidden_dim, output_dim, heads, concat=False)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

In [None]:
gat = GAT(input_dim=args["input_dim"], hidden_dim=16,#args["hidden_dim"], 
          output_dim=args["output_dim"], heads=2,#args["heads"],
          num_layers=args["num_layers"], dropout=args["dropout"])

optimizer = torch.optim.Adam(gat.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_gat, gat_best_acc = train_loop(gat.to(device), 
                                      data, optimizer, nll_loss, 
                                      args["epochs"], split_idx, 
                                      dataset_evaluator)

RuntimeError: ignored

## Stacking multiple layers together


In [None]:
from torch_geometric.utils import to_networkx, add_self_loops
import networkx as nx

In [None]:
# Too slow
#net = nx.from_scipy_sparse_matrix(data.adj_t.to_scipy())
#d = nx.algorithms.distance_measures.diameter(net)
#print(f"Graph diameter is {d}")



In [None]:
todata.adj_t

SparseTensor(row=tensor([     0,      0,      0,  ..., 169341, 169342, 169342], device='cuda:0'),
             col=tensor([   411,    640,   1162,  ..., 163274,  27824, 158981], device='cuda:0'),
             size=(169343, 169343), nnz=2315598, density=0.01%)

## Stacking multiple linear layers
* Message passing composed of n linear layers instead of one
* Aggregation can be a mlp

**Question** in aggregation how do we mantain the order invariant property?

In [34]:
import itertools
import torch
from torch_geometric.nn import MessagePassing
from torch import Tensor
from torch_sparse import SparseTensor, matmul
from torch_geometric.utils import add_self_loops, degree, contains_self_loops
from torch_geometric.nn.inits import zeros


class DeepGCNConv(GCNConv):
    
    def __init__(self, in_channels: int, out_channels: int,
                 improved: bool = False, cached: bool = False,
                 add_self_loops: bool = True, normalize: bool = True,
                 bias: bool = True, num_msg_layers: int = 1, 
                 num_agg_layers: int = 0,
                 **kwargs):
      
        super().__init__(in_channels, out_channels, improved, cached, 
                         add_self_loops, normalize, bias, **kwargs)  

        self.num_msg_layers = num_msg_layers
        self.num_agg_layers = num_agg_layers

        if self.num_msg_layers > 1:
          mlp_start = [torch.nn.Linear(in_channels, out_channels), torch.nn.ReLU()]
          mlp = list(
              itertools.chain.from_iterable(
              [[torch.nn.Linear(out_channels, out_channels), torch.nn.ReLU()]
              for i in range(self.num_msg_layers - 2)])
              )
          mlp_end = [torch.nn.Linear(out_channels, out_channels)]
          self.lin = torch.nn.Sequential(*(mlp_start + mlp + mlp_end))

        if self.num_agg_layers == 1:
          self.lin_agg = torch.nn.Linear(out_channels, out_channels)
        elif self.num_agg_layers > 1:
          mlp_start = [torch.nn.Linear(out_channels, out_channels), torch.nn.ReLU()]
          mlp = list(
              itertools.chain.from_iterable(
              [[torch.nn.Linear(out_channels, out_channels), torch.nn.ReLU()]
              for i in range(self.num_agg_layers - 2)])
              )
          mlp_end = [torch.nn.Linear(out_channels, out_channels)]
          self.lin_agg = torch.nn.Sequential(*(mlp_start + mlp + mlp_end))

    def message_and_aggregate(self, adj_t: SparseTensor, x: Tensor) -> Tensor:
        if self.num_agg_layers > 0:
          out = matmul(adj_t, x, reduce=self.aggr)
          return self.lin_agg(out)
        else:
          return matmul(adj_t, x, reduce=self.aggr)

    def reset_parameters(self):
        #self.lin.reset_parameters()
        zeros(self.bias)
        self._cached_edge_index = None
        self._cached_adj_t = None

In [35]:
class DeepGCN(torch.nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, 
                 num_msg_layers, num_agg_layers, num_layers, dropout):
      
      super(DeepGCN, self).__init__()

      self.dropout = dropout
      
      self.convs = torch.nn.ModuleList(
          [DeepGCNConv(input_dim, hidden_dim, num_msg_layers=num_msg_layers,
                       num_agg_layers=num_agg_layers)] +
          [DeepGCNConv(hidden_dim, hidden_dim, num_msg_layers=num_msg_layers, 
                       num_agg_layers=num_agg_layers) 
           for _ in range(num_layers - 2)]+
          [DeepGCNConv(hidden_dim, output_dim, num_msg_layers=num_msg_layers, 
                       num_agg_layers=num_agg_layers)]
      )

      self.bns = torch.nn.ModuleList(
          [torch.nn.BatchNorm1d(hidden_dim) for i in range(num_layers - 1)]
      )

      self.softmax = torch.nn.LogSoftmax()

    def reset_parameters(self):
      for conv in self.convs:
          conv.reset_parameters()
      for bn in self.bns:
          bn.reset_parameters()

    def forward(self, x, adj_t):

      out = x
      for i in range(len(self.convs) - 1):
        out = self.convs[i](out, adj_t)
        out = self.bns[i](out)
        out = torch.nn.functional.relu(out)
        out = torch.nn.functional.dropout(out, p=self.dropout)
      
      out = self.convs[-1](out, adj_t)
      out = self.softmax(out)
      return out

3 layer mlp in message passing

In [38]:
dgcn = DeepGCN(input_dim=args["input_dim"], hidden_dim=args["hidden_dim"],
               output_dim=args["output_dim"], num_layers=args["num_layers"],
               num_msg_layers=3, num_agg_layers=0, dropout=args["dropout"])

optimizer = torch.optim.Adam(dgcn.parameters(), lr=args["lr"])
nll_loss = torch.nn.NLLLoss()

best_dgcn, dgcn_best_acc = train_loop(dgcn.to(device), 
                                      data, optimizer, nll_loss, 
                                      args["epochs"], split_idx, 
                                      dataset_evaluator)



Epoch: 01, Loss: 3.650,  Train: 4.766%,  Valid: 9.561%,  Test: 9.528%,  
Epoch: 02, Loss: 3.378,  Train: 18.950%,  Valid: 25.746%,  Test: 23.449%,  
Epoch: 03, Loss: 2.975,  Train: 17.906%,  Valid: 7.628%,  Test: 5.862%,  
Epoch: 04, Loss: 3.050,  Train: 11.190%,  Valid: 23.021%,  Test: 21.581%,  
Epoch: 05, Loss: 2.736,  Train: 13.210%,  Valid: 23.779%,  Test: 22.025%,  
Epoch: 06, Loss: 2.637,  Train: 28.025%,  Valid: 31.451%,  Test: 34.304%,  
Epoch: 07, Loss: 2.607,  Train: 23.661%,  Valid: 15.232%,  Test: 14.219%,  
Epoch: 08, Loss: 2.517,  Train: 26.104%,  Valid: 25.789%,  Test: 25.252%,  
Epoch: 09, Loss: 2.478,  Train: 24.761%,  Valid: 23.511%,  Test: 22.624%,  
Epoch: 10, Loss: 2.426,  Train: 23.417%,  Valid: 17.464%,  Test: 16.824%,  
Epoch: 11, Loss: 2.352,  Train: 28.838%,  Valid: 21.561%,  Test: 20.497%,  
Epoch: 12, Loss: 2.285,  Train: 27.838%,  Valid: 19.021%,  Test: 17.701%,  
Epoch: 13, Loss: 2.228,  Train: 39.093%,  Valid: 42.528%,  Test: 39.631%,  
Epoch: 14, Loss: 

KeyboardInterrupt: ignored

In [None]:
print_model_accuracy(best_sage, data, split_idx, dataset_evaluator)