In [None]:
import os
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
!pip install torch-geometric
!pip install -q git+https://github.com/snap-stanford/deepsnap.git

Looking in links: https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
Collecting torch-scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torch-scatter
  Building wheel for torch-scatter (setup.py) ... [?25l[?25hdone
  Created wheel for torch-scatter: filename=torch_scatter-2.1.2-cp310-cp310-linux_x86_64.whl size=507268 sha256=c97fe41fd54240c4e29c99b4125286b0d83a1c8a77848ae2e8a11ceccbf227f1
  Stored in directory: /root/.cache/pip/wheels/92/f1/2b/3b46d54b134259f58c8363568569053248040859b1a145b3ce
Successfully built torch-scatter
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2
Looking in links: https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
Collecting torch-sparse
  Downloading torch_sparse-0.6.18.tar.gz (

In [None]:
import torch_geometric
torch_geometric.__version__

In [None]:
import torch
import torch_scatter
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils

from torch import Tensor
from typing import Union, Tuple, Optional
from torch_geometric.typing import (Adj, Size, NoneType,
                                    OptPairTensor, OptTensor)

from torch.nn import Linear, Parameter
form torch_sparse import SparseTensor, set_diag
from torch_scatter import scatter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax

In [None]:
#defining the GNN stack
class GNNStack(nn.Module):
  def __init__(self, input_dim, hidden_dim, output_dim, args, emb=False):
    super().__init__()
    gnn_model = GAT if args.model_type.lower() == "gat" else GraphSage
    self.model = nn.Modulelist([gnn_model(input_dim, hidden_dim)])
    for layer in range(args.num_layers - 1):
      self.model.append(gnn_model(hidden_dim * args.heads, hidden_dim))

    #post-message-passing
    self.after_mp = nn.Sequential(
        nn.Linear(hidden_dim * args.heads, hidden_dim),
        nn.dropout(args.dropout),
        nn.Linear(hidden_dim, output_dim)
    )

    self.num_layers = args.num_layers
    self.dropout = args.dropout
    self.emb = emb

  def forward(self, data):
    x, edge_index, batch = data.x, data.edge_index, data.batch

    for i in range(self.num_layers):
      x = self.model[i](x, edge_index)
      x = F.relu(x)
      x = F.dropout(x, p=self.dropout, training=self.training)

    x = self.post_mp(x)

    if self.emb:
      return x

    return F.Log_softmax(x, dim=1)

  def loss(self, pred, label):
    return F.nll_loss(pred, label)



In [None]:
#implementing the GraphSage class. it inherits from messagepassing
class GraphSage(MessagePassing):
  def __init__(self, in_channels, out_channels, normalize=True, bias=False, **kwargs):
    super().__init__()

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.normalize = normalize

    self.lin_l = Linear(in_channels, out_channels, bias=bias)
    self.lin_r = Linear(in_channels, out_channels, bias=bias)

    self.reset_params()

  def reset_params(self):
    self.lin_l.reset_parameters()
    self.lin_r.reset_parameters()

  def forward(self, x, edge_index, size=None):
    agg_message = self.propagate(edge_index, x=x, size=size)
    out = self.lin_l(x) + self.lin_r(agg_message)
    if self.normalize:
      out = F.normalize(out)

    return out

  def message(self, x_j):         #x_j is the feature vector of the source node
    return x_j                    # This is computed for each neighbor iteratively by refering to edge tensor

  def aggregate(self, inputs, index, dim_size=None):
    return scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, reduce="mean")


In [None]:
#implementing graph_sage layer from scratch
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch_scatter import scatter_add, scatter_softmax
from torch_sparse import SparseTensor
#considering the use of dense tensors for edge index
class GraphSageD(nn.Module):
  def __init__(self, in_channels, out_channels, normalize=False):
    super().__init__()
    #in_channels times two because of the cat operation on the feature dim at the end. in_channels = num_features
    self.lin = nn.Linear(in_channels * 2, out_channels)   #out_channels should equal the expected embedding dimension after computation

  def forward(self, x, edge_index):
    num_nodes = x.size(0)
    loop_index = torch.arange(0, num_nodes, dtype=torch.long, device=x.device)
    loop_index = loop_index.unsqueeze(0).repeat(2, 1)  #repeat for source and target

    #loop_index tries to add nodes own message during the computation
    edge_index = torch.cat([edge_index, loop_index], dim=1)  #edge_index is of shape 2, num_edges

    #compute messages
    if isinstance(edge_index, SparseTensor):
      source, target = edge_index.to_dense()       #we can use .values() too
    else:
      source, target = edge_index

    x_j = x[target]         #indexing out the target features  (target, feature_dim)

    #Aggregate messages
    out = scatter_add(x_j, source, dim=0, dim_size=num_nodes) #adding neighbors features w.r.t source node + source node features
    #out is (num_connected_node, num_features)

    #update node embedding
    out = self.cat([x, out], dim=1)   #nodes, num_feat*2
    out = self.lin(out)     #node, out_channel
    if self.normalize:
      out = F.normalize(out)

    return out


In [None]:
#implementing the GAT layer
class GAT(MessagePassing):
  def __init__(self, in_channels, out_channels, heads=2, negative_slope=0.2,
               dropout=0., **kwargs):
    super().__init__(node_dim=0, **kwargs)   #changing node_dim value from parent class

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.heads = heads
    self.negative_slope = negative_slope
    self.dropout = dropout


    self.lin_l = self.lin_r = nn.Linear(in_channels, out_channels * heads)
    self.att_l = nn.Parameter(torch.Tensor(heads, out_channels))
    self.att_r = nn.Parameter(torch.Tensor(heads, out_channels))  #one per node

    self.reset_params()

  def reset_params(self):
    nn.init.xavier_uniform_(self.in_l.weight)
    nn.init.xavier_uniform_(self.att_l)
    nn.init.xavier_uniform_(self.att_r)

  def forward(self, x, edge_index, size=None):

    C, H = self.out_channels, self.heads

    lt_x_l = self.lin_l(x).view(-1, H, C)
    lt_x_r = self.lin_r(x).view(-1, H, C)

    alpha_l = (lt_x_l * self.att_l).sum(dim=1, keepdim=False)
    alpha_r = (lt_x_r * self.att_r).sum(dim=1, keepdim=False)
    out = self.propagate(edge_index, x=(lt_x_l, lt_x_r), alpha=(alpha_l, alpha_r), size=size)
    return out.view(-1, H * C)


  def message(self, x_j, alpha_j, alpha_i, index, ptr, size_i):
    alpha = F.leaky_relu(alpha_i + alpha_j, self.negative_slope)
    alpha = softmax(alpha, index, ptr, size_i)
    alpha = F.dropout(alpha, p=self.dropout, training=self.training)
    return x_j * alpha.unsqueeze(1)


  def aggregate(self, inputs, index, dim_size=None):
    return scatter(inputs, index, dim=self.node_dim, dim_size=dim_size, reduce="sum")     #node_dim is the axis along which to propagate message passing.(N, H, C)  node dim is N, hence 0


In [None]:
#implementing the GAT using pytorch
class GATD(nn.Module):
  def __init__(self, in_channels, out_channels, heads=2, negative_slope=0.2, dropout=0.):
    super().__init__()
    self.in_channels = in_channels
    self.out_channels = out_channels
    self.heads = heads
    self.negative_slope = negative_slope
    self.dropout = dropout

    self.lin = nn.Linear(in_channels, out_channels * heads, bias=False)
    self.att_l = nn.Parameter(torch.Tensor(1, heads, out_channels)) #NB torch.tensor takes input tensor not shape
    self.att_r = nn.Parameter(torch.Tensor(1, heads, out_channels))
    self.reset_parameters()

  def reset_parameters(self):
    nn.init.xavier_uniform_(self.lin.weight)
    nn.init.xavier_uniform_(self.att_l)
    nn.init.xavier_uniform_(self.att_r)

  def forward(self, x, edge_index, size=None):
    H, C = self.heads, self.out_channels

    #accounting for sparseTensor
    if isinstance(edge_index, SparseTensor):
      source, target = edge_index.to_dense()
    else:
      source, target = edge_index

    #Add self-loops to the adjacency list
    num_nodes = x.size(0)
    loop_index = torch.arange(0, num_nodes, dtype=torch.long, device=x.device)
    loop_index = loop_index.unsqueeze(0).repeat(2, 1)
    edge_index = torch.cat([edge_index, loop_index], dim=1)

    #Linearly Transform feature matrix (Node, C*H)
    x = self.lin(x).view(-1, H, C)

    #Compute attention coefficients
    alpha_l = (x * self.att_l).sum(dim=-1)    #for source node  NE, H (one sum for each head)
    alpha_r = (x * self.att_r).sum(dim=-1)    #for target node
    alpha = F.leaky_relu(alpha_l[source] + alpha_r[target], self.negative_slope) #sum alpha from source and target for each edge

    #normalize w.r.t each source node
    alpha = scatter_softmax(alpha, source, dim=0)  #NE, H  normalize each head separately w.r.t source hence dim=0

    #add dropout
    alpha = F.dropout(alpha, p=self.dropout, training=self.training)

    #neighborhood aggregation
    out = scatter_add(alpha.unsqueeze(-1) * x[target], source, dim=0, dim_size=num_nodes)

    return out.view(-1, H * C)   #output is H*C which will serve as node embedding



In [None]:
import torch.optim as optim

def build_optimizer(args, params):
  weight_decay = args.weight_decay
  fliter_fn = filter(lambda p: p.requires_grad, params)
  if args.opt == "adam":
    optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)
  elif args.opt == "sgd":
    optimizer = optim.SGD(filter_fn, lr=args.lr, weight_decay=weight_decay)
  elif args.opt == "rmsprop":
    optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)
  elif args.opt == "adagrad":
    optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)
  if not args.opt_scheduler:
    return None, optimizer
  elif args.opt_scheduler == "step":
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)
  elif args.opt_scheduler == "cos":
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)
  return scheduler, optimizer

In [None]:
import time
import networkx as nx
import numpy as np
import torch.optim as optim
from tqdm import trange
import pandas as pd
import copy

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
import torch_geometric.nn as pyg_nn
import matplotlib.pyplot as plt

def train(dataset, args):
  print("Node task test size:", dataset[0]['test_mask'].sum().item())
  test_loader = loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False)

  #model
  model = GNNStack(dataset.num_node_features, args.hidden_dim, dataset.num_classes, args)
  sch, opt = build_optimizer(args, model.parameters())

  # training
  losses = []
  best_acc = 0
  test_accs = []
  best_model = None
  for epoch in trange(args.epochs, desc="Training", unit="Epoch"):
    total_loss = 0
    model.train()
    for batch in loader:
      opt.zero_grad()
      pred = model(batch)
      label = batch.y
      pred = pred[batch.train_mask]    #semi-supervised. not all nodes habe label
      label = label[batch.train_mask]
      loss = model.loss(pred, label)
      loss.backward()
      opt.step()
      total_loss += loss.item() * batch.num_graphs
    total_loss /= len(loader.dataset)
    losses.append(total_loss)

    if epoch % 10 == 0:
      test_acc = test(test_loader, model)
      test_accs.append(test_acc)
      if test_acc > best_acc:
        best_acc = test_acc
        best_model = copy.deepcopy(model)
  return test_accs, losses, best_model, best_acc, test_loader



def test(loader, test_model, is_validation=False, save_model_preds=False,
         model_type=None):
  test_model.eval()
  correct = 0
  for data in loader:
    with torch.no_grad():
      #pred = test_model(data).max(dim=1)[1]  max returns the indicies that contains the max
      pred = test_model(data)
      pred = pred.argmax(dim=1) # Batch, !

    mask = data.val_mask if is_validation else data.test_mask

    pred = pred[mask]
    label = data.y[mask]

    if save_model_preds:
      print("Saving Model Predictions", model_type)

      data = {}
      data['pred'] = pred.view(-1).cpu().detach().numpy()
      data['label'] = label.view(-1).cpu().detach().numpy()

      df = pd.DataFrame(data=data)
      df.to_csv(f"Node-{model_type}.csv", sep=',', index=False)

    correct += pred.eq(label).sum().item()   #eq is element-wise comparison. returns same shape of 1 if T and 0 if F

  total = 0
  for data in loader.dataset:
    total += torch.sum(data.test_mask).item()
  return correct / total



class objectview(object):      #base class for python object
  def __init__(self, d):
    self.__dict__ = d







In [None]:
for args in [
        {'model_type': 'GAT', 'dataset': 'cora', 'num_layers': 2, 'heads': 1, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 500, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},
    ]:
    args = objectview(args)

    for model in ["GAT", "GraphSage", "GATD", "GraphSageD"]:
      args.model_type = model
      if model in ["GAT", "GATD"]:
        args.heads = 2
      else:
        args.heads = 1

      for dataset in ["cora", "citeseer", "pubmed"]:
        args.dataset = dataset

        if args.dataset == "cora":
          dataset = Planetoid(root="/tmp/cora", name="Cora")
        elif args.dataset == "citeseer":
          dataset = Planetoid(root="/tmp/citeseer", name="Citeseer")
        elif args.dataset == "pubmed":
          dataset = Planetoid(root="/tmp/pubmed", name="PubMed")
        else:
          raise NotImplementedError(f"Unknown dataset: {args.dataset}")

        test_accs, losses, best_model, best_acc, test_loader = train(dataset, args)

         print("Maximum test set accuracy: {0}".format(max(test_accs)))
            print("Minimum loss: {0}".format(min(losses)))

            # Run test for our best model to save the predictions!
            test(test_loader, best_model, is_validation=False, save_model_preds=True, model_type=model)
            print()

            plt.title(dataset.name)
            plt.plot(losses, label="training loss" + " - " + args.model_type)
            plt.plot(test_accs, label="test accuracy" + " - " + args.model_type)
        plt.legend()
        plt.show()

