In [13]:
# !pip install torch-scatter
# !pip install torch_geometric

In [None]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool, ASAPooling as ASAP_Pooling
from torch_geometric.utils import train_test_split_edges
from torch_geometric.data import Data, Dataset
from torch_geometric.loader import DataLoader
from torch_scatter import scatter_mean, scatter_max

In [None]:
def readout(x, batch):
    x_mean = scatter_mean(x, batch, dim=0)
    x_max, _ = scatter_max(x, batch, dim=0)
    return torch.cat((x_mean, x_max), dim=-1)

class ASAP_Pool(torch.nn.Module):
    def __init__(self, dataset, num_layers, hidden, ratio=0.8, **kwargs):
        super(ASAP_Pool, self).__init__()
        if type(ratio)!=list:
            ratio = [ratio for i in range(num_layers)]
        self.conv1 = GCNConv(dataset.num_features, hidden)
        self.pool1 = ASAP_Pooling(in_channels=hidden, ratio=ratio[0], **kwargs)
        self.convs = torch.nn.ModuleList()
        self.pools = torch.nn.ModuleList()
        for i in range(num_layers - 1):
            self.convs.append(GCNConv(hidden, hidden))
            self.pools.append(ASAP_Pooling(in_channels=hidden, ratio=ratio[i], **kwargs))
        self.lin1 = Linear(2*hidden, hidden) # 2*hidden due to readout layer
        self.lin2 = Linear(hidden, dataset.num_classes)
        self.reset_parameters()

    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.pool1.reset_parameters()
        for conv, pool in zip(self.convs, self.pools):
            conv.reset_parameters()
            pool.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, edge_weight, batch, perm = self.pool1(x=x, edge_index=edge_index, edge_weight=None, batch=batch)
        xs = readout(x, batch)
        for conv, pool in zip(self.convs, self.pools):
            x = F.relu(conv(x=x, edge_index=edge_index, edge_weight=edge_weight))
            x, edge_index, edge_weight, batch, perm = pool(x=x, edge_index=edge_index, edge_weight=edge_weight, batch=batch)
            xs += readout(x, batch)
        x = F.relu(self.lin1(xs))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin2(x)
        out = F.log_softmax(x, dim=-1)
        return out

    def __repr__(self):
        return self.__class__.__name__

In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class DenseSparseDataset(Dataset):

    @staticmethod
    def nx2geometric(nx_graph: nx.DiGraph, label: int) -> Data:
        """
        Converts a networkx graph to a PyTorch Geometric Data object.
        """

        return Data(
            x=torch.ones(len(nx_graph.nodes), 1),
            edge_index=torch.tensor(list(nx_graph.edges)).t().contiguous(),
            edge_attr=torch.tensor(list(nx.get_edge_attributes(nx_graph, 'weight').values()), dtype=torch.float),
            y=torch.tensor([label], dtype=torch.long)
        ).to(device)

    def __init__(
            self,
            n_samples: int = 2000,
        ):

        half_samples = n_samples // 2
        graphs_sparse = [nx.newman_watts_strogatz_graph(50, 5, 0.25) for _ in range(half_samples)]
        graphs_dense = [nx.newman_watts_strogatz_graph(50, 5, 0.9) for _ in range(half_samples)]
        self.dataset = graphs_sparse + graphs_dense
        self.labels = [0 for _ in range(half_samples)] + [1 for _ in range(half_samples)]

        # Convert to networkx graphs
        self.nx_dataset = [self.nx2geometric(g, l) for g, l in zip(self.dataset, self.labels)]

    def len(self):
        return len(self.nx_dataset)

    def get(self, idx):
        return self.nx_dataset[idx]

    def __getitem__(self, idx) -> Data:
        return self.get(idx)

    def __len__(self) -> int:
        return self.len()

In [None]:
dataset = DenseSparseDataset(n_samples=1000)
dataset[0]

Data(x=[50, 1], edge_index=[2, 121], edge_attr=[0], y=[1])

In [None]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
for batch in dataloader:
    print(batch)
    print(batch.y)
    break

DataBatch(x=[1600, 1], edge_index=[2, 4800], edge_attr=[0], y=[32], batch=[1600], ptr=[33])
tensor([0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
        0, 0, 1, 0, 1, 0, 1, 0])


In [None]:
model = ASAP_Pool(dataset, 2, 16).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.002, weight_decay=5e-4)

In [None]:
model.train()
for epoch in range(1, 20 + 1):
    loss = 0
    for batch in dataloader:
        optimizer.zero_grad()
        out = model(batch)
        loss = F.nll_loss(out, batch.y)
        loss.backward()
        optimizer.step()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

Epoch: 001, Loss: 0.0051
Epoch: 002, Loss: 0.1366
Epoch: 003, Loss: 0.0002
Epoch: 004, Loss: 0.0002
Epoch: 005, Loss: 0.0048
Epoch: 006, Loss: 0.1179
Epoch: 007, Loss: 0.0003
Epoch: 008, Loss: 0.0004
Epoch: 009, Loss: 0.0013
Epoch: 010, Loss: 0.0156
Epoch: 011, Loss: 0.0028
Epoch: 012, Loss: 0.0000
Epoch: 013, Loss: 0.0596
Epoch: 014, Loss: 0.0597
Epoch: 015, Loss: 0.0001
Epoch: 016, Loss: 0.0027
Epoch: 017, Loss: 0.0012
Epoch: 018, Loss: 0.0039
Epoch: 019, Loss: 0.0026
Epoch: 020, Loss: 0.0549


In [None]:
ys = []
ys_hat = []
for batch in dataloader:
    ys.append(batch.y)
    ys_hat.append(model(batch))
ys = torch.cat(ys, dim=0)
ys_hat = torch.cat(ys_hat, dim=0)
_, ys_hat = ys_hat.max(dim=1)

In [None]:
accs = ys_hat.eq(ys).to(torch.float)
acc = accs.mean()
print(f"Train Accuracy: {acc:.4f}")

Train Accuracy: 0.9910
