In [28]:
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

In [30]:
path = osp.join('data', 'PROTEINS')
dataset = TUDataset(path, name='PROTEINS')
dataset = dataset.shuffle()
n = len(dataset) // 10
test_dataset = dataset[:n]
train_dataset = dataset[n:]
test_loader = DataLoader(test_dataset, batch_size=60)
train_loader = DataLoader(train_dataset, batch_size=60)

Downloading https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/PROTEINS.zip
Extracting data/PROTEINS/PROTEINS/PROTEINS.zip
Processing...
Done!


In [72]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = GraphConv(dataset.num_features, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, dataset.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        print(x, edge_index, batch)
        print(self.conv1(x, edge_index))

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x

In [73]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)

In [93]:
next(iter(train_loader)).num_nodes

2264

In [91]:
len(next(iter(train_loader)).batch.numpy())

2264

In [81]:
np.unique(next(iter(train_loader)).batch.numpy())

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59])

In [75]:
model.train
for data in train_loader:
    data = data.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(output, data.y)
    break

tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        ...,
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.]], device='cuda:0') tensor([[   0,    0,    1,  ..., 2263, 2263, 2263],
        [   1,   61,    0,  ..., 2258, 2259, 2262]], device='cuda:0') tensor([ 0,  0,  0,  ..., 59, 59, 59], device='cuda:0')
tensor([[-0.9557, -0.6853,  0.0559,  ...,  0.1344, -0.5559, -0.8847],
        [-1.0450, -1.0113, -0.1212,  ...,  0.1851, -0.5931, -1.3067],
        [-0.7818, -0.8083, -0.1648,  ...,  0.4937, -0.2351, -0.2058],
        ...,
        [ 0.3100, -1.7444, -1.3571,  ...,  2.1118,  0.9566,  0.1684],
        [ 0.3100, -1.7444, -1.3571,  ...,  2.1118,  0.9566,  0.1684],
        [ 0.3148, -0.9694, -0.7822,  ...,  1.6512,  0.7101,  0.3336]],
       device='cuda:0', grad_fn=<AddBackward0>)


In [74]:
def train(epoch):
    model.train()

    loss_all = 0
    for data in train_loader:
        data = data.to(device)
        output = model(data)
        loss = F.nll_loss(output, data.y)
        optimizer.zero_grad()
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_dataset)


def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        pred = model(data).max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)

In [71]:
for epoch in range(1, 201):
    loss = train(epoch)
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
          format(epoch, loss, train_acc, test_acc))

Epoch: 001, Loss: 0.64992, Train Acc: 0.71956, Test Acc: 0.71171
Epoch: 002, Loss: 0.58320, Train Acc: 0.72156, Test Acc: 0.72973
Epoch: 003, Loss: 0.59073, Train Acc: 0.73553, Test Acc: 0.76577
Epoch: 004, Loss: 0.56426, Train Acc: 0.74052, Test Acc: 0.73874
Epoch: 005, Loss: 0.55536, Train Acc: 0.74750, Test Acc: 0.78378
Epoch: 006, Loss: 0.54242, Train Acc: 0.75948, Test Acc: 0.78378
Epoch: 007, Loss: 0.53594, Train Acc: 0.75649, Test Acc: 0.78378
Epoch: 008, Loss: 0.53390, Train Acc: 0.75749, Test Acc: 0.78378
Epoch: 009, Loss: 0.53351, Train Acc: 0.75649, Test Acc: 0.77477
Epoch: 010, Loss: 0.53321, Train Acc: 0.75749, Test Acc: 0.74775
Epoch: 011, Loss: 0.53031, Train Acc: 0.76248, Test Acc: 0.77477
Epoch: 012, Loss: 0.53097, Train Acc: 0.75848, Test Acc: 0.79279
Epoch: 013, Loss: 0.52178, Train Acc: 0.76347, Test Acc: 0.79279
Epoch: 014, Loss: 0.51979, Train Acc: 0.76747, Test Acc: 0.75676
Epoch: 015, Loss: 0.51881, Train Acc: 0.76647, Test Acc: 0.74775
Epoch: 016, Loss: 0.50889

KeyboardInterrupt: 