In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv
import lightning.pytorch as pl


In [2]:
dataset = Planetoid(root='./data/Cora', name='Cora')
loader = DataLoader(dataset, batch_size=1, shuffle=False)
print(f'# of graph:    {len(dataset)}')
print(f'# of nodes:    {dataset[0].num_nodes}')
print(f'# of edges:    {dataset[0].num_edges}')
print(f'# of features: {dataset.num_node_features}')
print(f'# of classes:  {dataset.num_classes}')

print(f'Train:      {dataset[0].train_mask.sum().item()}')
print(f'Validation: {dataset[0].val_mask.sum().item()}')
print(f'Test:       {dataset[0].test_mask.sum().item()}')

# of graph:    1
# of nodes:    2708
# of edges:    10556
# of features: 1433
# of classes:  7
Train:      140
Validation: 500
Test:       1000


In [3]:
class GCN(torch.nn.Module):
    def __init__(self, num_features, num_classes, dropout_rate=0.6):
        super().__init__()
        self.num_features = num_features
        self.num_classes = num_classes
        self.dropout_rate = dropout_rate

        self.conv1 = GCNConv(self.num_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.log_softmax(x, dim=1)
        return x

In [4]:
class GCNModule(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=0.01,
                                     weight_decay=5e-4)
        return optimizer

    def training_step(self, batch, batch_idx):
        loss, acc = self.forward(batch)
        self.log('train_acc', acc)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        _, acc = self.forward(batch, mode="val")
        self.log("val_acc", acc)

    def test_step(self, batch, batch_idx):
        _, acc = self.forward(batch, mode="test")
        self.log("test_acc", acc)

    def forward(self, data, mode='train'):
        x = data.x
        y = data.y
        edge_index = data.edge_index

        if mode == "train":
            mask = data.train_mask
            self.model.training = True
        elif mode == "val":
            mask = data.val_mask
            self.model.training = False
        elif mode == "test":
            mask = data.test_mask
            self.model.training = False
        else:
            assert False, "Unknown forward mode: %s" % mode

        out = self.model(x, edge_index)
        loss = F.nll_loss(out[mask], y[mask])
        acc = (out[mask].argmax(dim=-1) == y[mask]).sum().float() / mask.sum()
        return loss, acc

In [5]:
model = GCN(dataset.num_features, dataset.num_classes)
gcn = GCNModule(model)
trainer = pl.Trainer(max_epochs=200, accelerator='gpu', enable_progress_bar=False)
trainer.fit(gcn, loader, loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3070') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | GCN  | 23.1 K
-------------------------------
23.1 K    Trainable params
0         Non-trainable params
23.1 K    Total params
0.092     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
`Trainer.fit` stopped: `max_epochs=200` reached.


In [24]:
batch = next(iter(loader))
batch = batch.to(gcn.device)
_, train_acc = gcn.forward(batch, mode="train")
_, test_acc = gcn.forward(batch, mode="test")
print(f'train_acc:{train_acc:.4f}')
print(f'test_acc: {test_acc:.4f}')

train_acc:1.0000
test_acc: 0.7970
