In [1]:
import torch
import torch.nn.functional as F
from torch.nn import Linear
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GraphConv, TopKPooling, global_mean_pool, GCNConv

In [2]:
dataset = TUDataset(root='data/TUDataset', name='MUTAG')

In [3]:
# Define your model here
class GCN(torch.nn.Module):
    def __init__(self, n_ftrs, n_cls, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(n_ftrs, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, n_cls)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

In [4]:
loader = DataLoader(dataset, batch_size=64, shuffle=True)
n_ftr = dataset.num_node_features
n_cls = dataset.num_classes

model = GCN(n_ftr, n_cls, 64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

In [5]:
# split dataset into train and test
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[:int(len(dataset) * 0.8)]
test_dataset = dataset[int(len(dataset) * 0.8):]
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [6]:
def train(train_loader):
    model.train()
    
    for data in train_loader:  # Iterate in batches over the training dataset.
        out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
        loss = criterion(out, data.y)  # Compute the loss.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
        optimizer.zero_grad()  # Clear gradients.

In [7]:
def evl(test_loader):
    model.eval()
    
    correct = 0
    for data in test_loader:  # Iterate in batches over the training/test dataset.
        out = model(data.x, data.edge_index, data.batch)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    return correct / len(test_loader.dataset)  # Derive ratio of correct predictions.

In [8]:
epochs = 200
for epoch in range(epochs):
    train(train_loader)
    train_acc = evl(train_loader)
    test_acc = evl(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 000, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 001, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 002, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 003, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 004, Train Acc: 0.6467, Test Acc: 0.7368
Epoch: 005, Train Acc: 0.7067, Test Acc: 0.7632
Epoch: 006, Train Acc: 0.7133, Test Acc: 0.7632
Epoch: 007, Train Acc: 0.7133, Test Acc: 0.7632
Epoch: 008, Train Acc: 0.7200, Test Acc: 0.8158
Epoch: 009, Train Acc: 0.7200, Test Acc: 0.8158
Epoch: 010, Train Acc: 0.7467, Test Acc: 0.7632
Epoch: 011, Train Acc: 0.7333, Test Acc: 0.7632
Epoch: 012, Train Acc: 0.7067, Test Acc: 0.7895
Epoch: 013, Train Acc: 0.7200, Test Acc: 0.7895
Epoch: 014, Train Acc: 0.7333, Test Acc: 0.7632
Epoch: 015, Train Acc: 0.7400, Test Acc: 0.7632
Epoch: 016, Train Acc: 0.7133, Test Acc: 0.7895
Epoch: 017, Train Acc: 0.7267, Test Acc: 0.7895
Epoch: 018, Train Acc: 0.7400, Test Acc: 0.7895
Epoch: 019, Train Acc: 0.7200, Test Acc: 0.8158
Epoch: 020, Train Acc: 0.7200, Test Acc: