In [1]:
import torch
from tqdm import trange
from torch_geometric.datasets import TUDataset
%load_ext autoreload

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7
Number of classes: 2

Data(edge_attr=[38, 4], edge_index=[2, 38], x=[17, 7], y=[1])
Number of nodes: 17
Number of edges: 38
Average node degree: 2.24
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [8]:
from torch.nn import Linear
from CayleyNet import CayleyConv
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGPooling, TopKPooling
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = CayleyConv(dataset.num_node_features, hidden_channels, 64)
        self.pool = TopKPooling(hidden_channels, ratio=0.9)
        self.lin = Linear(hidden_channels, dataset.num_classes)


    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()

        x, edge_index, _, batch, _, _ =  self.pool(x, edge_index)

        x = global_mean_pool(x, batch) 

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x


In [5]:
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
train_dataset = dataset[:150]
test_dataset = dataset[150:]
model.train()
for epoch in trange(10):
    for data in train_dataset: 
        out = model(data.x, data.edge_index) 
        loss = criterion(out, data.y) 
        loss.backward() 
        optimizer.step() 
        optimizer.zero_grad() 


100%|██████████| 10/10 [00:41<00:00,  4.17s/it]


In [6]:
model.eval()

correct = 0
for data in test_dataset:  # Iterate in batches over the training/test dataset.
    out = model(data.x, data.edge_index)  
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    correct += int((pred == data.y).sum())  # Check against ground-truth labels.
print("{} test accuracy".format(correct / len(test_dataset)))  # Derive ratio of correct predictions.


0.7631578947368421 test accuracy
