In [None]:
# Install torch geometric
#This cell will take almost 10-15 mins
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-geometric

In [1]:
import torch
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='data/TUDataset', name='MUTAG')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip



Dataset: MUTAG(188):
Number of graphs: 188
Number of features: 7
Number of classes: 2

Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])
Number of nodes: 17
Number of edges: 38
Average node degree: 2.24
Has isolated nodes: False
Has self-loops: False
Is undirected: True


Extracting data\TUDataset\MUTAG\MUTAG.zip
Processing...
Done!


In [2]:
data = dataset[1]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Data(edge_index=[2, 28], x=[13, 7], edge_attr=[28, 4], y=[1])
Number of nodes: 13
Number of edges: 28
Average node degree: 2.15
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [3]:
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:150]
test_dataset = dataset[150:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 150
Number of test graphs: 38


In [4]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()

Step 1:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 2636], x=[1188, 7], edge_attr=[2636, 4], y=[64], batch=[1188], ptr=[65])

Step 2:
Number of graphs in the current batch: 64
DataBatch(edge_index=[2, 2506], x=[1139, 7], edge_attr=[2506, 4], y=[64], batch=[1139], ptr=[65])

Step 3:
Number of graphs in the current batch: 22
DataBatch(edge_index=[2, 852], x=[387, 7], edge_attr=[852, 4], y=[22], batch=[387], ptr=[23])



In [13]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        
        # Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [14]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
model.train()
for epoch in range(1, 50):
  correct = 0  
  for data in train_loader:  # Iterate in batches over the training dataset.
    print(data.edge_index)
    out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
    loss = criterion(out, data.y)  # Compute the loss.
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    optimizer.zero_grad()  # Clear gradients.
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    correct += int((pred == data.y).sum())  # Check against ground-truth labels.
  
  train_acc = correct / len(train_loader.dataset)  
  
  print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}')

tensor([[   0,    0,    1,  ..., 1151, 1152, 1153],
        [   1,    5,    0,  ..., 1153, 1151, 1151]])
tensor([[   0,    1,    1,  ..., 1177, 1178, 1179],
        [   1,    0,    2,  ..., 1179, 1177, 1177]])
tensor([[  0,   0,   1,  ..., 377, 378, 379],
        [  1,   5,   0,  ..., 379, 377, 377]])
Epoch: 001, Train Acc: 0.6467
tensor([[   0,    0,    1,  ..., 1169, 1169, 1170],
        [   1,    5,    0,  ..., 1164, 1168, 1168]])
tensor([[   0,    0,    1,  ..., 1156, 1157, 1158],
        [   1,    5,    0,  ..., 1158, 1156, 1156]])
tensor([[  0,   0,   1,  ..., 382, 383, 383],
        [  1,   5,   0,  ..., 383, 378, 382]])
Epoch: 002, Train Acc: 0.6467
tensor([[   0,    0,    1,  ..., 1144, 1145, 1146],
        [   1,   13,    0,  ..., 1146, 1144, 1144]])
tensor([[   0,    0,    1,  ..., 1147, 1148, 1149],
        [   1,    9,    0,  ..., 1149, 1147, 1147]])
tensor([[  0,   0,   1,  ..., 414, 415, 416],
        [  1,   5,   0,  ..., 416, 414, 414]])
Epoch: 003, Train Acc: 0.6467
t

tensor([[  0,   0,   1,  ..., 380, 381, 382],
        [  1,   9,   0,  ..., 382, 380, 380]])
Epoch: 026, Train Acc: 0.7200
tensor([[   0,    0,    1,  ..., 1187, 1188, 1189],
        [   1,    5,    0,  ..., 1189, 1187, 1187]])
tensor([[   0,    0,    1,  ..., 1151, 1152, 1153],
        [   1,   13,    0,  ..., 1153, 1151, 1151]])
tensor([[  0,   1,   1,  ..., 367, 368, 369],
        [  1,   0,   2,  ..., 369, 367, 367]])
Epoch: 027, Train Acc: 0.7467
tensor([[   0,    0,    1,  ..., 1159, 1160, 1161],
        [   1,    9,    0,  ..., 1161, 1159, 1159]])
tensor([[   0,    0,    1,  ..., 1167, 1168, 1169],
        [   1,    9,    0,  ..., 1169, 1167, 1167]])
tensor([[  0,   0,   1,  ..., 379, 380, 381],
        [  1,   5,   0,  ..., 381, 379, 379]])
Epoch: 028, Train Acc: 0.7600
tensor([[   0,    0,    1,  ..., 1171, 1172, 1173],
        [   1,    9,    0,  ..., 1173, 1171, 1171]])
tensor([[   0,    0,    1,  ..., 1113, 1114, 1115],
        [   1,    8,    0,  ..., 1115, 1113, 1113]])
t

In [10]:
model.eval()
correct = 0
for data in test_loader:  # Iterate in batches over the training dataset.
  out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
  loss = criterion(out, data.y)  # Compute the loss.
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  correct += int((pred == data.y).sum())  # Check against ground-truth labels.

test_acc = correct / len(test_loader.dataset)  
  
print(f'Test Acc: {test_acc:.4f}')

Test Acc: 0.7632
