In [33]:
!pip install torchvision
!pip install torch-scatter torch-sparse torch-geometric



In [34]:
import torch
from torch_geometric.data import Data

# Example:
edge_index = torch.tensor([[1, 2, 3], [0, 0, 0]], dtype = torch.long)
x = torch.tensor([[1], [1], [1]], dtype = torch.float)

data = Data(edge_index = edge_index, x = x)
data

Data(x=[3, 1], edge_index=[2, 3])

GCN node classifier implementation

In [35]:
import numpy as np
x = torch.tensor([[1], [1], [1]], dtype=torch.float)
print(x.shape)
i = x.shape[0]
print(x.size(0))

torch.Size([3, 1])
3


In [46]:
import torch
from torch_geometric.nn import MessagePassing
import math
import os
import os.path as osp
import torch.nn.functional as fct
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
import time
from torch import tensor
from torch.optim import Adam

path = osp.join(os.getcwd(), 'data', 'Cora')
dataset = Planetoid(path, 'Cora')

''' GCN layer '''
def tensor_initialization(tensor):
  # Using Glorot method for setting the weights of the tensor to values chosen from
  # a uniform distribution within a specific range
  if tensor == None:
    return
  std = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
  tensor.data.uniform_(-std, std)

def zeros(tensor):
  tensor.data.fill_(0)

def self_loops(edge_index, num_nodes=None):
  loop_idx = torch.arange(0, num_nodes, dtype=torch.long, device=edge_index.device)
  loop_idx = loop_idx.unsqueeze(0).repeat(2, 1)
  edge_index = torch.cat([edge_index, loop_idx], dim=1)
  return edge_index

def degree(index, num_nodes, dtype=None):
  out = torch.zeros((num_nodes), dtype=dtype, device=index.device)
  return out.scatter_add_(0, index, out.new_ones((index.size(0))))

class GCN(MessagePassing):
  def __init__(self, in_channels, out_channels):
    super(GCN, self).__init__(aggr='add')
    self.linear = torch.nn.Linear(in_channels, out_channels)

    self.reset_parameters()

  def reset_parameters(self):
    tensor_initialization(self.linear.weight)
    zeros(self.linear.bias)

  def forward(self, x, edge_index):
    # x has shape [N, in_channels]
    # edge_index has shape [2, E]
    self.edge_index = self_loops(edge_index, x.size(0))
    x = self.linear(x)
    return self.propagate(edge_index, x = x)

  def message(self, x_j, edge_index, size):
    row, col = edge_index
    D = degree(row, size[0], dtype = x_j.dtype)
    D_inv_sqrt = D.pow(-0.5)
    D_inv_sqrt[D_inv_sqrt == float('inf')] = 0
    norm = D_inv_sqrt[row] * D_inv_sqrt[col]

    return norm.view(-1, 1) * x_j

  def update(self, aggr_out):
    return aggr_out


''' Model for vertex classification '''
def train(model, optimizer, data):
  model.train()
  optimizer.zero_grad()
  out = model(data)
  loss = fct.nll_loss(out[data.train_mask], data.y[data.train_mask])
  loss.backward()
  optimizer.step()

def evaluate(model, data):
  model.eval()
  with torch.no_grad():
    logits = model(data)

  outs = dict()
  for key in ['train', 'val', 'test']:
    mask = data['{}_mask'.format(key)]
    loss = fct.nll_loss(logits[mask], data.y[mask]).item()
    pred = logits[mask].max(1)[1]
    acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()

    outs['{}_loss'.format(key)] = loss
    outs['{}_acc'.format(key)] = acc

  return outs

def run(dataset, model, iterations, epochs, lr, weight_decay, early_stop):
  val_losses, accs, durations = [], [], []
  for _ in range(iterations):
    data = dataset[0]
    data = data.to(device)
    model.to(device).reset_parameters()
    optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    time_start = time.perf_counter()

    best_val_loss = float('inf')
    test_accuracy = 0
    val_loss_history = []

    for epoch in range(1, epochs + 1):
      train(model, optimizer, data)
      eval_info = evaluate(model, data)
      eval_info['epoch'] = epoch

      if eval_info['val_loss'] < best_val_loss:
          best_val_loss = eval_info['val_loss']
          test_acc = eval_info['test_acc']

      val_loss_history.append(eval_info['val_loss'])
      if early_stop > 0 and epoch > epochs // 2:
        tmp = tensor(val_loss_history[-(early_stop + 1):-1])
        if eval_info['val_loss'] > tmp.mean().item():
          break

    time_end = time.perf_counter()
    val_losses.append(best_val_loss)
    accs.append(test_acc)
    durations.append(time_end - time_start)

    loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations)

    print('Test Accuracy: {:.3f} ± {:.3f}, Value Loss: {:.4f}, Duration: {:.3f}'.
          format(loss.mean().item(),
                 acc.mean().item(),
                 acc.std().item(),
                 duration.mean().item()))


iterations = 10
epochs = 200
lr = 0.01
weight_decay = 0.0005
early_stop = 10
hidden = 16
dropout = 0.5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class GCNNet(torch.nn.Module):
  def __init__(self, dataset):
    super(GCNNet, self).__init__()

    self.conv_1 = GCN(dataset.num_features, hidden)
    self.conv_2 = GCN(hidden, dataset.num_classes)

  def reset_parameters(self):
    self.conv_1.reset_parameters()
    self.conv_2.reset_parameters()

  def forward(self, data):
    x, edge_index = data.x, data.edge_index
    x = fct.relu(self.conv_1(x, edge_index))
    x = fct.dropout(x, p=dropout, training=self.training)
    x = self.conv_2(x, edge_index)
    return fct.log_softmax(x, dim=1)

run(dataset, GCNNet(dataset), iterations, epochs, lr, weight_decay, early_stop)

Test Accuracy: 0.779 ± 0.786, Value Loss: nan, Duration: 2.822
Test Accuracy: 0.775 ± 0.784, Value Loss: 0.0028, Duration: 2.793
Test Accuracy: 0.784 ± 0.784, Value Loss: 0.0020, Duration: 3.019
Test Accuracy: 0.773 ± 0.782, Value Loss: 0.0039, Duration: 2.905
Test Accuracy: 0.763 ± 0.782, Value Loss: 0.0033, Duration: 2.833
Test Accuracy: 0.757 ± 0.784, Value Loss: 0.0047, Duration: 2.770
Test Accuracy: 0.752 ± 0.783, Value Loss: 0.0043, Duration: 2.786
Test Accuracy: 0.744 ± 0.786, Value Loss: 0.0071, Duration: 2.852
Test Accuracy: 0.743 ± 0.786, Value Loss: 0.0068, Duration: 2.835
Test Accuracy: 0.742 ± 0.787, Value Loss: 0.0067, Duration: 2.817


## Graph Classification

Using GINs since they are expressive and powerful in what concerns the Weisfeiler-Lehman graph isomorphism test.  

Graph Isomorphism Network

Models for graph classification


In [72]:
from torch_geometric.datasets import TUDataset
from torch_geometric.utils import degree
import torch_geometric.transforms as T
import time
import torch
import torch.nn.functional as F
from torch import tensor
from torch.optim import Adam
from sklearn.model_selection import StratifiedKFold
from torch_geometric.data import DataLoader, DenseDataLoader as DenseLoader


class NormalizedDegree(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, data):
        D = degree(data.edge_index[0], dtype=torch.float)
        D = (D - self.mean) / self.std
        data.x = D.view(-1, 1)
        return data


def access_dataset(name, cleaned=False):
    path = osp.join(os.getcwd(), 'data', name)
    dataset = TUDataset(path, name, cleaned=cleaned)
    dataset.data.edge_attr = None

    if dataset.data.x is None:
        max_degree = 0
        degs = []
        for data in dataset:
            degs += [degree(data.edge_index[0], dtype=torch.long)]
            max_degree = max(max_degree, degs[-1].max().item())

        if max_degree < 1000:
            dataset.transform = T.OneHotDegree(max_degree)
        else:
            deg = torch.cat(degs, dim=0).to(torch.float)
            mean, std = deg.mean().item(), deg.std().item()
            dataset.transform = NormalizedDegree(mean, std)

    return dataset

def print_dataset(dataset):
    num_nodes = num_edges = 0
    for data in dataset:
        num_nodes += data.num_nodes
        num_edges += data.num_edges

    print('Name', dataset)
    print('Graphs', len(dataset))
    print('Nodes', num_nodes / len(dataset))
    print('Edges', (num_edges // 2) / len(dataset))
    print('Features', dataset.num_features)
    print('Classes', dataset.num_classes)
    print()


for name in ['IMDB-BINARY']:
    print_dataset(access_dataset(name))

def cross_validation_with_val_set(dataset, model, folds, epochs, batch_size,
                                  lr, lr_decay_factor, lr_decay_step_size,
                                  weight_decay, logger=None):

    val_losses, accuracies, durations = [], [], []
    for fold, (train_idx, test_idx,
               val_idx) in enumerate(zip(*k_fold(dataset, folds))):

        train_dataset = dataset[train_idx]
        test_dataset = dataset[test_idx]
        val_dataset = dataset[val_idx]

        train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

        model.to(device).reset_parameters()
        optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_start = time.perf_counter()

        for epoch in range(1, epochs + 1):
            train_loss = train(model, optimizer, train_loader)
            val_losses.append(eval_loss(model, val_loader))
            accuracies.append(eval_acc(model, test_loader))
            eval_info = {
                'fold': fold,
                'epoch': epoch,
                'train_loss': train_loss,
                'val_loss': val_losses[-1],
                'test_acc': accuracies[-1],
            }

            if logger is not None:
                logger(eval_info)

            if epoch % lr_decay_step_size == 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_decay_factor * param_group['lr']

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_end = time.perf_counter()
        durations.append(t_end - t_start)

    loss, acc, duration = tensor(val_losses), tensor(accuracies), tensor(durations)
    loss, acc = loss.view(folds, epochs), acc.view(folds, epochs)
    loss, argmin = loss.min(dim=1)
    acc = acc[torch.arange(folds, dtype=torch.long), argmin]

    loss_mean = loss.mean().item()
    acc_mean = acc.mean().item()
    acc_std = acc.std().item()
    duration_mean = duration.mean().item()
    print('Val Loss: {:.4f}, Test Accuracy: {:.3f} ± {:.3f}, Duration: {:.3f}'.
          format(loss_mean, acc_mean, acc_std, duration_mean))

    return loss_mean, acc_mean, acc_std


def k_fold(dataset, folds):
    skf = StratifiedKFold(folds, shuffle=True, random_state=12345)

    test_indices, train_indices = [], []
    for _, idx in skf.split(torch.zeros(len(dataset)), dataset.data.y):
        test_indices.append(torch.from_numpy(idx))

    val_indices = [test_indices[i - 1] for i in range(folds)]

    for i in range(folds):
        train_mask = torch.ones(len(dataset), dtype=torch.bool)
        train_mask[test_indices[i]] = 0
        train_mask[val_indices[i]] = 0
        train_indices.append(train_mask.nonzero().view(-1))

    return train_indices, test_indices, val_indices


def num_graphs(data):
    if data.batch is not None:
        return data.num_graphs
    else:
        return data.x.size(0)


def train(model, optimizer, loader):
    model.train()

    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        data = data.to(device)
        out = model(data)
        loss = F.nll_loss(out, data.y.view(-1))
        loss.backward()
        total_loss += loss.item() * num_graphs(data)
        optimizer.step()
    return total_loss / len(loader.dataset)


def eval_acc(model, loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            pred = model(data).max(1)[1]
        correct += pred.eq(data.y.view(-1)).sum().item()
    return correct / len(loader.dataset)


def eval_loss(model, loader):
    model.eval()

    loss = 0
    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            out = model(data)
        loss += F.nll_loss(out, data.y.view(-1), reduction='sum').item()
    return loss / len(loader.dataset)

import torch
import torch.nn.functional as F
from torch.nn import Linear, Sequential, ReLU, BatchNorm1d as BN
from torch_geometric.nn import global_mean_pool, MessagePassing
from torch_geometric.utils import remove_self_loops

def reset(nn):
    def _reset(item):
        if hasattr(item, 'reset_parameters'):
            item.reset_parameters()

    if nn is not None:
        if hasattr(nn, 'children') and len(list(nn.children())) > 0:
            for item in nn.children():
                _reset(item)
        else:
            _reset(nn)


class GINConv(MessagePassing):
    def __init__(self, nn, eps=0, train_eps=False, **kwargs):
        super(GINConv, self).__init__(aggr='add', **kwargs)
        self.nn = nn
        self.initial_eps = eps
        if train_eps:
            self.eps = torch.nn.Parameter(torch.Tensor([eps]))
        else:
            self.register_buffer('eps', torch.Tensor([eps]))
        self.reset_parameters()

    def reset_parameters(self):
        reset(self.nn)
        self.eps.data.fill_(self.initial_eps)

    def forward(self, x, edge_index):
        edge_index, _ = remove_self_loops(edge_index)
        out = self.nn((1 + self.eps) * x + self.propagate(edge_index, x=x))
        return out

    def message(self, x_j):
        return x_j

class GIN0(torch.nn.Module):
    def __init__(self, dataset, num_layers, hidden):
        super(GIN0, self).__init__()
        self.conv_1 = GINConv(Sequential(
            Linear(dataset.num_features, hidden),
            ReLU(),
            Linear(hidden, hidden),
            ReLU(),
            BN(hidden),
        ),
                              train_eps=False)
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(
                GINConv(Sequential(
                    Linear(hidden, hidden),
                    ReLU(),
                    Linear(hidden, hidden),
                    ReLU(),
                    BN(hidden),
                ),
                        train_eps=False))
        self.lin1 = Linear(hidden, hidden)
        self.lin2 = Linear(hidden, dataset.num_classes)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index, current_batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        for conv in self.convs:
            x = conv(x, edge_index)
        x = global_mean_pool(x, current_batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)

class GIN(torch.nn.Module):
    def __init__(self, dataset, num_layers, hidden):
        super(GIN, self).__init__()
        self.conv1 = GINConv(Sequential(
            Linear(dataset.num_features, hidden),
            ReLU(),
            Linear(hidden, hidden),
            ReLU(),
            BN(hidden),
        ),
                             train_eps=True)
        self.convs = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(
                GINConv(Sequential(
                    Linear(hidden, hidden),
                    ReLU(),
                    Linear(hidden, hidden),
                    ReLU(),
                    BN(hidden),
                ),
                        train_eps=True))
        self.lin1 = Linear(hidden, hidden)
        self.lin2 = Linear(hidden, dataset.num_classes)

    def reset_parameters(self):
        self.conv1.reset_parameters()
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        for conv in self.convs:
            x = conv(x, edge_index)
        x = global_mean_pool(x, batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)

from itertools import product

epochs = 10
batch_size = 12
lr = 0.01
lr_decay_factor = 0.5
lr_decay_step_size = 50

layers = [5]
hiddens = [64]
datasets = ['IMDB-BINARY']
nets = [
    GIN0,
    GIN,
]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def logger(info):
    fold, epoch = info['fold'] + 1, info['epoch']
    val_loss, test_acc = info['val_loss'], info['test_acc']
    print('{:02d}/{:03d}: Val Loss: {:.4f}, Test Accuracy: {:.3f}'.format(
        fold, epoch, val_loss, test_acc))


results = []
for dataset_name, Net in product(datasets, nets):
    best_result = (float('inf'), 0, 0)  # (loss, acc, std)
    print('-----\n{} - {}'.format(dataset_name, Net.__name__))
    for num_layers, hidden in product(layers, hiddens):
        dataset = access_dataset(dataset_name)
        model = Net(dataset, num_layers, hidden)
        loss, acc, std = cross_validation_with_val_set(
            dataset,
            model,
            folds=10,
            epochs=epochs,
            batch_size=batch_size,
            lr=lr,
            lr_decay_factor=lr_decay_factor,
            lr_decay_step_size=lr_decay_step_size,
            weight_decay=0,
            logger=None,
        )
        if loss < best_result[0]:
            best_result = (loss, acc, std)

    desc = '{:.5f} +- {:.5f}'.format(best_result[1], best_result[2])
    print('The best ans = {}'.format(desc))
    results += ['Name = {}, Model = {}, Result = {}'.format(dataset_name, model, desc)]
print('-----\n{}'.format('\n'.join(results)))



Name IMDB-BINARY(1000)
Graphs 1000
Nodes 19.773
Edges 96.531
Features 136
Classes 2

-----
IMDB-BINARY - GIN0




Val Loss: 0.5639, Test Accuracy: 0.670 ± 0.067, Duration: 12.266
The best ans = 0.67000 +- 0.06700
-----
IMDB-BINARY - GIN




Val Loss: 0.5685, Test Accuracy: 0.672 ± 0.050, Duration: 14.453
The best ans = 0.67200 +- 0.05029
-----
Name = IMDB-BINARY - Mode; = GIN0(
  (conv1): GINConv()
  (convs): ModuleList(
    (0-3): 4 x GINConv()
  )
  (lin1): Linear(in_features=64, out_features=64, bias=True)
  (lin2): Linear(in_features=64, out_features=2, bias=True)
): Desc = 0.67000 +- 0.06700
Name = IMDB-BINARY - Mode; = GIN(
  (conv1): GINConv()
  (convs): ModuleList(
    (0-3): 4 x GINConv()
  )
  (lin1): Linear(in_features=64, out_features=64, bias=True)
  (lin2): Linear(in_features=64, out_features=2, bias=True)
): Desc = 0.67200 +- 0.05029
