In [21]:
import os.path as osp
import torch_geometric.transforms as T
import torch
from torch_geometric.datasets import TUDataset

dataset = 'PROTEINS'
path = osp.join(osp.dirname(osp.realpath("__file__")), '..', 'data', 'TUDataset')
transform = T.Compose([T.GCNNorm(), T.NormalizeFeatures()])
dataset = TUDataset(path, dataset, transform=transform)
data = dataset[0]

In [22]:
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:800]
test_dataset = dataset[800:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 800
Number of test graphs: 313


In [23]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [24]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)

In [25]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 20):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.6188, Test Acc: 0.6038
Epoch: 002, Train Acc: 0.6587, Test Acc: 0.6102
Epoch: 003, Train Acc: 0.6150, Test Acc: 0.5974
Epoch: 004, Train Acc: 0.6600, Test Acc: 0.6198
Epoch: 005, Train Acc: 0.6525, Test Acc: 0.6390
Epoch: 006, Train Acc: 0.6963, Test Acc: 0.6805
Epoch: 007, Train Acc: 0.6913, Test Acc: 0.6837
Epoch: 008, Train Acc: 0.6700, Test Acc: 0.6518
Epoch: 009, Train Acc: 0.6937, Test Acc: 0.6645
Epoch: 010, Train Acc: 0.6400, Test Acc: 0.6358
Epoch: 011, Train Acc: 0.6925, Test Acc: 0.6869
Epoch: 012, Train Acc: 0.6963, Test Acc: 0.6805
Epoch: 013, Train Acc: 0.6987, Test Acc: 0.6709
Epoch: 014, Train Acc: 0.6925, Test Acc: 0.6837
Epoch: 015, Train Acc: 0.6975, Test Acc: 0.6773
Epoch: 016, Train Acc: 0.7000, Test Acc: 0.6901
Epoch: 017, Train Acc: 0.6863, Test Acc: 0.6581
Epoch: 018, Train Acc: 0.6925, Test Acc: 0.6709
Epoch: 019, Train Acc: 0.7013, Test Acc: 0.6933
