In [1]:
import torch
from torch_geometric.datasets import TUDataset

In [2]:
dataset = TUDataset(root='data/TUDataset', name='MUTAG')

In [3]:
data = dataset[0]  # Get the first graph object.

In [4]:
data.edge_attr

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.]])

In [5]:
# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

Number of nodes: 17
Number of edges: 38
Average node degree: 2.24
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


In [13]:
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:100]
test_dataset = dataset[100:150]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 100
Number of test graphs: 50


In [15]:
dasd = train_dataset[1]
dasd.edge_attr

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1.,

In [16]:
from torch_geometric.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print("Dasd", type(data.y))
    

Step 1:
Number of graphs in the current batch: 64
Batch(batch=[1137], edge_attr=[2520, 4], edge_index=[2, 2520], x=[1137, 7], y=[64])
Dasd <class 'torch.Tensor'>
Step 2:
Number of graphs in the current batch: 36
Batch(batch=[674], edge_attr=[1484, 4], edge_index=[2, 1484], x=[674, 7], y=[36])
Dasd <class 'torch.Tensor'>


In [17]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [18]:
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(loader):
     model.eval()

     correct = 0
     for data in loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 201):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 002, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 003, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 004, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 005, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 006, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 007, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 008, Train Acc: 0.6700, Test Acc: 0.7200
Epoch: 009, Train Acc: 0.6800, Test Acc: 0.7200
Epoch: 010, Train Acc: 0.7300, Test Acc: 0.7400
Epoch: 011, Train Acc: 0.7600, Test Acc: 0.7400
Epoch: 012, Train Acc: 0.7400, Test Acc: 0.7200
Epoch: 013, Train Acc: 0.7400, Test Acc: 0.7400
Epoch: 014, Train Acc: 0.7400, Test Acc: 0.7600
Epoch: 015, Train Acc: 0.7400, Test Acc: 0.7400
Epoch: 016, Train Acc: 0.7100, Test Acc: 0.7600
Epoch: 017, Train Acc: 0.7000, Test Acc: 0.7600
Epoch: 018, Train Acc: 0.7100, Test Acc: 0.7600
Epoch: 019, Train Acc: 0.7400, Test Acc: 0.7200
Epoch: 020, Train Acc: 0.7600, Test Acc: 0.7200
Epoch: 021, Train Acc: 0.7600, Test Acc:

Epoch: 174, Train Acc: 0.8300, Test Acc: 0.7200
Epoch: 175, Train Acc: 0.8400, Test Acc: 0.7800
Epoch: 176, Train Acc: 0.8300, Test Acc: 0.7600
Epoch: 177, Train Acc: 0.8400, Test Acc: 0.7800
Epoch: 178, Train Acc: 0.8300, Test Acc: 0.8000
Epoch: 179, Train Acc: 0.8300, Test Acc: 0.7400
Epoch: 180, Train Acc: 0.8200, Test Acc: 0.7600
Epoch: 181, Train Acc: 0.8500, Test Acc: 0.7600
Epoch: 182, Train Acc: 0.8200, Test Acc: 0.7600
Epoch: 183, Train Acc: 0.8500, Test Acc: 0.7600
Epoch: 184, Train Acc: 0.8200, Test Acc: 0.7800
Epoch: 185, Train Acc: 0.8200, Test Acc: 0.7400
Epoch: 186, Train Acc: 0.8400, Test Acc: 0.7400
Epoch: 187, Train Acc: 0.8500, Test Acc: 0.7600
Epoch: 188, Train Acc: 0.8400, Test Acc: 0.7400
Epoch: 189, Train Acc: 0.8100, Test Acc: 0.7400
Epoch: 190, Train Acc: 0.8300, Test Acc: 0.7800
Epoch: 191, Train Acc: 0.8400, Test Acc: 0.7600
Epoch: 192, Train Acc: 0.8400, Test Acc: 0.7400
Epoch: 193, Train Acc: 0.8200, Test Acc: 0.7400
Epoch: 194, Train Acc: 0.8100, Test Acc:

In [11]:
sample1 = DataLoader(train_dataset, batch_size=10, shuffle=True)

sample1 = iter(sample1)
sample1 = next(sample1)

edgelist = sample1.edge_index.t().contiguous()
edgelist = edgelist.tolist()
print(sample1.y)

tensor([1, 0, 1, 1, 0, 0, 1, 1, 1, 0])


In [12]:
import networkx as nx
from pyvis.network import Network

G = nx.Graph(edgelist)

# view the result in pyviz
nt = Network(width="1000", height="500")
nt.show_buttons()
nt.from_nx(G)
nt.show("nx.html")