In [1]:
import os 
import torch

In [2]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='../datasets/Planetoid', name='PubMed',
                    transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('==================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)

from torch_geometric.loader import ClusterData, ClusterLoader

seed = 42
torch.manual_seed(seed)
cluster_data = ClusterData(data, num_parts=128)  # 1. Create subgraphs.
train_loader = ClusterLoader(cluster_data, batch_size=32,
                             shuffle=True)  # 2. Stochastic partioning scheme.


Dataset: PubMed():
Number of graphs: 1
Number of features: 500
Number of classes: 3

Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])


Computing METIS partitioning...
Done!


In [3]:
criterion = torch.nn.CrossEntropyLoss()

def train(model):
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01,
                                 weight_decay=5e-4)
    for sub_data in train_loader:  # Iterate over each mini-batch.
        optimizer.zero_grad()  # Clear gradients.
        out = model(sub_data.x,
                    sub_data.edge_index)  # Perform a single forward pass.
        loss = criterion(
            out[sub_data.train_mask], sub_data.y[sub_data.train_mask]
        )  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.


def test(model):
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)  # Use the class with highest probability.

    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = pred[mask] == data.y[
            mask]  # Check against ground-truth labels.
        accs.append(int(correct.sum()) /
                    int(mask.sum()))  # Derive ratio of correct predictions.
    return accs


def run(model, epochs=5):
    for epoch in range(epochs):
        loss = train(model)
        train_acc, val_acc, test_acc = test(model)
        print(
            f'Epoch: {epoch:03d}, Train: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}'
        )

In [None]:
import copy

import torch.nn.functional as F
from torch_geometric.nn import (
    Aggregation,
    MaxAggregation,
    MeanAggregation,
    MultiAggregation,
    SAGEConv,
    SoftmaxAggregation,
    StdAggregation,
    SumAggregation,
    VarAggregation,
)

class GNN(torch.nn.Module):
    def __init__(self, hidden_channels, aggr='mean', aggr_kwargs=None):
        super().__init__()
        self.conv1 = SAGEConv(
            dataset.num_node_features,
            hidden_channels,
            aggr=aggr,
            aggr_kwargs=aggr_kwargs,
        )
        self.conv2 = SAGEConv(
            hidden_channels,
            dataset.num_classes,
            aggr=copy.deepcopy(aggr),
            aggr_kwargs=aggr_kwargs,
        )

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [5]:
torch.manual_seed(seed)
model = GNN(16, aggr='mean')
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=mean)
  (conv2): SAGEConv(16, 3, aggr=mean)
)
Epoch: 000, Train: 0.3500, Val Acc: 0.2060, Test Acc: 0.4000
Epoch: 001, Train: 0.3333, Val Acc: 0.4180, Test Acc: 0.4080
Epoch: 002, Train: 0.6500, Val Acc: 0.5660, Test Acc: 0.4500
Epoch: 003, Train: 0.3500, Val Acc: 0.4220, Test Acc: 0.4370
Epoch: 004, Train: 0.7000, Val Acc: 0.4580, Test Acc: 0.7180


In [6]:
torch.manual_seed(seed)
model = GNN(16, aggr=MeanAggregation())
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=MeanAggregation())
  (conv2): SAGEConv(16, 3, aggr=MeanAggregation())
)
Epoch: 000, Train: 0.3500, Val Acc: 0.1960, Test Acc: 0.3010
Epoch: 001, Train: 0.3833, Val Acc: 0.4280, Test Acc: 0.4080
Epoch: 002, Train: 0.5500, Val Acc: 0.5240, Test Acc: 0.4320
Epoch: 003, Train: 0.4000, Val Acc: 0.4300, Test Acc: 0.4990
Epoch: 004, Train: 0.7500, Val Acc: 0.4860, Test Acc: 0.7180


#### REMARK: It's not learning when I use max aggregation

In [7]:
torch.manual_seed(seed)
model = GNN(16, aggr='max')
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=max)
  (conv2): SAGEConv(16, 3, aggr=max)
)
Epoch: 000, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 001, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 002, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 003, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 004, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800


In [8]:
torch.manual_seed(seed)
model = GNN(16, aggr=MaxAggregation())
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=MaxAggregation())
  (conv2): SAGEConv(16, 3, aggr=MaxAggregation())
)
Epoch: 000, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 001, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 002, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 003, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 004, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800


#### list of aggregations: same problem with max

In [9]:
torch.manual_seed(seed)
model = GNN(16, aggr=['mean', 'max', 'sum', 'std', 'var'])
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=['mean', 'max', 'sum', 'std', 'var'])
  (conv2): SAGEConv(16, 3, aggr=['mean', 'max', 'sum', 'std', 'var'])
)
Epoch: 000, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 001, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 002, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 003, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 004, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800


In [10]:
torch.manual_seed(seed)
model = GNN(16, aggr=['mean', 'sum', 'std', 'var'])
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=['mean', 'sum', 'std', 'var'])
  (conv2): SAGEConv(16, 3, aggr=['mean', 'sum', 'std', 'var'])
)
Epoch: 000, Train: 0.3500, Val Acc: 0.2480, Test Acc: 0.3830
Epoch: 001, Train: 0.3333, Val Acc: 0.2080, Test Acc: 0.4470
Epoch: 002, Train: 0.6333, Val Acc: 0.5880, Test Acc: 0.6240
Epoch: 003, Train: 0.4167, Val Acc: 0.4640, Test Acc: 0.6580
Epoch: 004, Train: 0.5167, Val Acc: 0.5740, Test Acc: 0.6850


In [11]:
torch.manual_seed(seed)
model = GNN(
    16, aggr=[
        MeanAggregation(),
        # MaxAggregation(),
        SumAggregation(),
        StdAggregation(),
        VarAggregation(),
    ])
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=['MeanAggregation()', 'SumAggregation()', 'StdAggregation()', 'VarAggregation()'])
  (conv2): SAGEConv(16, 3, aggr=['MeanAggregation()', 'SumAggregation()', 'StdAggregation()', 'VarAggregation()'])
)
Epoch: 000, Train: 0.3667, Val Acc: 0.2940, Test Acc: 0.4840
Epoch: 001, Train: 0.3333, Val Acc: 0.2740, Test Acc: 0.5750
Epoch: 002, Train: 0.5500, Val Acc: 0.4460, Test Acc: 0.6050
Epoch: 003, Train: 0.4167, Val Acc: 0.3620, Test Acc: 0.6230
Epoch: 004, Train: 0.5333, Val Acc: 0.5320, Test Acc: 0.6410


In [12]:
torch.manual_seed(seed)
model = GNN(
    16, aggr=[
        MeanAggregation(),
        MaxAggregation(),
        SumAggregation(),
        StdAggregation(),
        VarAggregation(),
    ])
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=['MeanAggregation()', 'MaxAggregation()', 'SumAggregation()', 'StdAggregation()', 'VarAggregation()'])
  (conv2): SAGEConv(16, 3, aggr=['MeanAggregation()', 'MaxAggregation()', 'SumAggregation()', 'StdAggregation()', 'VarAggregation()'])
)
Epoch: 000, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 001, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 002, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 003, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 004, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800


#### Use a list of mixed modules and strings for aggregations

In [13]:
torch.manual_seed(seed)
model = GNN(16, aggr=[
    'mean',
    MaxAggregation(),
    'sum',
    StdAggregation(),
    'var',
])
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=['mean', 'MaxAggregation()', 'sum', 'StdAggregation()', 'var'])
  (conv2): SAGEConv(16, 3, aggr=['mean', 'MaxAggregation()', 'sum', 'StdAggregation()', 'var'])
)
Epoch: 000, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 001, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 002, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 003, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 004, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800


In [14]:
torch.manual_seed(seed)
model = GNN(16, aggr=[
    'mean',
    # MaxAggregation(),
    'sum',
    StdAggregation(),
    'var',
])
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=['mean', 'sum', 'StdAggregation()', 'var'])
  (conv2): SAGEConv(16, 3, aggr=['mean', 'sum', 'StdAggregation()', 'var'])
)
Epoch: 000, Train: 0.3333, Val Acc: 0.2080, Test Acc: 0.3340
Epoch: 001, Train: 0.3333, Val Acc: 0.2200, Test Acc: 0.5010
Epoch: 002, Train: 0.5000, Val Acc: 0.3580, Test Acc: 0.6720
Epoch: 003, Train: 0.4333, Val Acc: 0.4500, Test Acc: 0.6340
Epoch: 004, Train: 0.3167, Val Acc: 0.4840, Test Acc: 0.6210


In [15]:
torch.manual_seed(seed)
aggr = MultiAggregation([
    SoftmaxAggregation(t=0.01, learn=True),
    SoftmaxAggregation(t=1, learn=True),
    SoftmaxAggregation(t=100, learn=True),
])
model = GNN(16, aggr=aggr)
print(model)
run(model)

GNN(
  (conv1): SAGEConv(500, 16, aggr=MultiAggregation([
    SoftmaxAggregation(learn=True),
    SoftmaxAggregation(learn=True),
    SoftmaxAggregation(learn=True),
  ], mode=cat))
  (conv2): SAGEConv(16, 3, aggr=MultiAggregation([
    SoftmaxAggregation(learn=True),
    SoftmaxAggregation(learn=True),
    SoftmaxAggregation(learn=True),
  ], mode=cat))
)
Epoch: 000, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 001, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 002, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 003, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
Epoch: 004, Train: 0.3333, Val Acc: 0.1960, Test Acc: 0.1800
