<a href="https://colab.research.google.com/github/isebenius/Projects/blob/master/Toy_Graph_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-geometric

In [11]:
import torch
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='data/TUDataset', name='MUTAG')
data = dataset[0]  # Get the first graph object.


In [23]:
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:150]
test_dataset = dataset[150:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 150
Number of test graphs: 38


In [24]:
from torch_geometric.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [25]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool



In [26]:
from IPython.display import Javascript
def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


In [27]:
from torch_geometric.nn import GraphConv

class GNN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GNN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GraphConv(dataset.num_node_features, hidden_channels)  # TODO
        self.conv2 = GraphConv(hidden_channels, hidden_channels)  # TODO
        self.conv3 = GraphConv(hidden_channels, hidden_channels)  # TODO
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x, batch)

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GNN(hidden_channels=64)
print(model)

GNN(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [28]:
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GNN(hidden_channels=64)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(1, 201):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

<IPython.core.display.Javascript object>

GNN(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)
Epoch: 001, Train Acc: 0.3133, Test Acc: 0.4211
Epoch: 002, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 003, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 004, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 005, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 006, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 007, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 008, Train Acc: 0.7067, Test Acc: 0.6579
Epoch: 009, Train Acc: 0.7333, Test Acc: 0.7368
Epoch: 010, Train Acc: 0.7333, Test Acc: 0.7368
Epoch: 011, Train Acc: 0.7400, Test Acc: 0.7632
Epoch: 012, Train Acc: 0.7933, Test Acc: 0.8158
Epoch: 013, Train Acc: 0.7533, Test Acc: 0.7632
Epoch: 014, Train Acc: 0.7800, Test Acc: 0.7895
Epoch: 015, Train Acc: 0.7667, Test Acc: 0.8684
Epoch: 016, Train Acc: 0.7733, Test Acc: 0.7895
Epoch: 017, Train Acc: 0.7800, Test Acc: 0.7895
Epoch: 018, Train Acc: 0.7800, T

In [83]:
from torch_geometric.nn import GraphConv
from torch_geometric.nn import TopKPooling
from torch_geometric.nn import SAGPooling
import math

class GNN_topk(torch.nn.Module):
    def __init__(self, hidden_channels, ratio):
        super(GNN_topk, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GraphConv(dataset.num_node_features, hidden_channels)  # TODO
        self.conv2 = GraphConv(hidden_channels, hidden_channels) # TODO
        #self.pool1 = TopKPooling(hidden_channels, ratio = ratio)
        self.pool1 = SAGPooling(hidden_channels, ratio = ratio)
        self.conv3 = GraphConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x, edge_index, _,batch,_, score  = self.pool1(x, edge_index, None, batch)
        self.score = score, edge_index, batch
        x = self.conv3(x, edge_index)
        x = x.relu()

        x = global_mean_pool(x, batch)
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.lin(x)
      
        return x

model = GNN_topk(hidden_channels=64, ratio = 0.5)
print(model)

GNN_topk(
  (conv1): GraphConv(7, 64)
  (conv2): GraphConv(64, 64)
  (pool1): SAGPooling(GraphConv, 64, ratio=0.5, multiplier=1)
  (conv3): GraphConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [84]:
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GNN_topk(hidden_channels=12, ratio = 0.5)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(1, 3):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

<IPython.core.display.Javascript object>

GNN_topk(
  (conv1): GraphConv(7, 12)
  (conv2): GraphConv(12, 12)
  (pool1): SAGPooling(GraphConv, 12, ratio=0.5, multiplier=1)
  (conv3): GraphConv(12, 12)
  (lin): Linear(in_features=12, out_features=2, bias=True)
)
Epoch: 001, Train Acc: 0.6867, Test Acc: 0.5789
Epoch: 002, Train Acc: 0.6867, Test Acc: 0.5789


In [85]:
model.score

(tensor([0.9538, 0.9399, 0.9146, 0.9055, 0.8977, 0.9851, 0.9851, 0.9384, 0.9384,
         0.9384, 0.9384, 0.9347, 0.9347, 0.9145, 0.9680, 0.9680, 0.9680, 0.9622,
         0.9507, 0.9497, 0.9497, 0.9358, 0.9348, 0.9348, 0.9753, 0.9753, 0.9419,
         0.9419, 0.9355, 0.9355, 0.9347, 0.9316, 0.9316, 0.9571, 0.9545, 0.9307,
         0.9229, 0.9096, 0.9057, 0.9057, 0.9857, 0.9857, 0.9641, 0.9619, 0.9384,
         0.9346, 0.9307, 0.9227, 0.9145, 0.9145, 0.9054, 0.9919, 0.9919, 0.9872,
         0.9871, 0.9681, 0.9572, 0.9476, 0.9445, 0.9441, 0.9428, 0.9538, 0.9460,
         0.9457, 0.9213, 0.9130, 0.9078, 0.9866, 0.9832, 0.9641, 0.9531, 0.9419,
         0.9384, 0.9365, 0.9149, 0.9956, 0.9956, 0.9872, 0.9872, 0.9872, 0.9872,
         0.9477, 0.9477, 0.9477, 0.9477, 0.9476, 0.9961, 0.9953, 0.9935, 0.9919,
         0.9892, 0.9891, 0.9879, 0.9879, 0.9681, 0.9641, 0.9529, 0.9477, 0.9866,
         0.9832, 0.9619, 0.9476, 0.9419, 0.9412, 0.9346, 0.9307, 0.9227, 0.9145,
         0.9052, 0.9880, 0.9

In [None]:
def get_scores(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.

## Conclusion

In this chapter, you have learned how to apply GNNs to the task of graph classification.
You have learned how graphs can be batched together for better GPU utilization, and how to apply readout layers for obtaining graph embeddings rather than node embeddings.

In the next session, you will learn how you can utilize PyTorch Geometric to let Graph Neural Networks scale to single large graphs.

[Next: Scaling Graph Neural Networks](https://colab.research.google.com/drive/1XAjcjRHrSR_ypCk_feIWFbcBKyT4Lirs)