In [1]:
import torch, os

In [2]:
from torch_geometric.datasets import TUDataset

In [8]:
dataset = TUDataset(root=os.path.join('data_processing','neural_networks', 'data','TUDataset'), name='MUTAG')
print(f'{len(dataset) = }, {dataset.num_features = }, {dataset.num_classes = }')
data = dataset[0]; 
print(data)
torch.manual_seed(69)
dataset = dataset.shuffle()
train_dataset = dataset[:150]
test_dataset  = dataset[150:]
print(f'{len(train_dataset) = }, {len(test_dataset) = }')

len(dataset) = 188, dataset.num_features = 7, dataset.num_classes = 2
Data(edge_index=[2, 38], x=[17, 7], edge_attr=[38, 4], y=[1])
len(train_dataset) = 150, len(test_dataset) = 38


### Batching concatenates adjacency matrices of multiple graphs into single 2D matrix

In [27]:
from torch_geometric.loader import DataLoader
BATCH_SIZE = 64
print(dataset[0])
train_loader = DataLoader(train_dataset , batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(test_dataset   , batch_size = BATCH_SIZE, shuffle = False)
edges_num = 0
for step, data in enumerate(train_loader):
    print(f'{step = }, {data = }')
    edges_num += data.edge_index.size(1)
edges_num

Data(edge_index=[2, 44], x=[19, 7], edge_attr=[44, 4], y=[1])
step = 0, data = DataBatch(edge_index=[2, 2424], x=[1111, 7], edge_attr=[2424, 4], y=[64], batch=[1111], ptr=[65])
step = 1, data = DataBatch(edge_index=[2, 2674], x=[1199, 7], edge_attr=[2674, 4], y=[64], batch=[1199], ptr=[65])
step = 2, data = DataBatch(edge_index=[2, 900], x=[405, 7], edge_attr=[900, 4], y=[22], batch=[405], ptr=[23])


5998

In [24]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

In [28]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels) -> None:
        super().__init__()
        torch.manual_seed(69)
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin   = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = x.relu()

        x = global_mean_pool(x, batch)

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)

        return x

HIDDEN_CHANNELS = 64 
GCN(hidden_channels=HIDDEN_CHANNELS)

GCN(
  (conv1): GCNConv(7, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)

In [30]:
model = GCN(hidden_channels=HIDDEN_CHANNELS)
criterion = torch.nn.CrossEntropyLoss()
optimizer  = torch.optim.Adam(model.parameters(), lr = 0.01)

def train():
    model.train()
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        out  = model(data.x, data.edge_index, data.batch)
        pred = out.argmax(dim = 1)
        correct += int((pred == data.y).sum())

    return correct/len(loader.dataset)

for epoch in range(1,171):
    train()
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, {train_acc = }, {test_acc = }')

Epoch: 001, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 002, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 003, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 004, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 005, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 006, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 007, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 008, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 009, train_acc = 0.6733333333333333, test_acc = 0.631578947368421
Epoch: 010, train_acc = 0.6866666666666666, test_acc = 0.631578947368421
Epoch: 011, train_acc = 0.72, test_acc = 0.6578947368421053
Epoch: 012, train_acc = 0.7466666666666667, test_acc = 0.7105263157894737
Epoch: 013, train_acc = 0.74, test_acc = 0.7105263157894737
Epoch: 014, train_acc = 0.7333333333333333, test_acc = 0.6842105263157895
Ep

In [35]:
a = 0.0312312
print(f'{a = :0.4f}')

a = 0.0312
