# Graph Classification

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import sys
import torch
from transformers.optimization import get_cosine_schedule_with_warmup
import torch.nn.functional as F
import torch_geometric.transforms as T
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.loader import DataLoader

import os
import random
import pandas as pd
import torch
import torch_geometric.transforms as T
from typing import Optional
import torch
from torch import Tensor
from torch_geometric.data import Data
from torch_geometric.data.datapipes import functional_transform
from torch_geometric.transforms import BaseTransform

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.datasets import WebKB
from torch_geometric.datasets import Actor
from torch_geometric.datasets import GNNBenchmarkDataset
from torch_geometric.datasets import TUDataset
from sklearn.metrics import r2_score
from torch_geometric.data import DataLoader
from torch_geometric.datasets import MoleculeNet
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.utils import to_networkx
from torch.nn import Linear
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import random
import pandas as pd

import time
import psutil
import torch
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torch_geometric.utils.num_nodes import maybe_num_nodes
from torch_sparse import spspmm
from torch_sparse import coalesce
from torch_sparse import eye
from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_scatter import scatter_max

from torch_scatter import scatter_add, scatter
from torch_geometric.nn.inits import uniform
from torch_geometric.nn.resolver import activation_resolver
from torch_geometric.nn import GCNConv, GATConv, LEConv, SAGEConv, GraphConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp

from dataclasses import dataclass
from typing import Optional

import torch
from torch import Tensor
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.utils import add_remaining_self_loops, to_dense_adj, add_self_loops
from typing import Callable, Optional, Union
from torch_sparse import coalesce, transpose
from torch_scatter import scatter
from torch import Tensor

from typing import List, Optional, Tuple, Union
import math
import torch
from torch import Tensor
from torch_geometric.nn.models.mlp import Linear
from torch_geometric.nn.resolver import activation_resolver
from torch_geometric.nn import BatchNorm

class AsymCheegerCutPool(torch.nn.Module):
    r"""
    The asymmetric cheeger cut pooling layer from the `"Total Variation Graph Neural Networks"
    <https://arxiv.org/abs/2211.06218>`_ paper.

    Args:
        k (int):
            Number of clusters or output nodes
        mlp_channels (int, list of int):
            Number of hidden units for each hidden layer in the MLP used to
            compute cluster assignments. First integer must match the number
            of input channels.
        mlp_activation (any):
            Activation function between hidden layers of the MLP.
            Must be compatible with `torch_geometric.nn.resolver`.
        return_selection (bool):
            Whether to return selection matrix. Cannot not  be False
            if `return_pooled_graph` is False. (default: :obj:`False`)
        return_pooled_graph (bool):
            Whether to return pooled node features and adjacency.
            Cannot be False if `return_selection` is False. (default: :obj:`True`)
        bias (bool):
            whether to add a bias term to the MLP layers. (default: :obj:`True`)
        totvar_coeff (float):
            Coefficient for graph total variation loss component. (default: :obj:`1.0`)
        balance_coeff (float):
            Coefficient for asymmetric norm loss component. (default: :obj:`1.0`)
    """

    def __init__(self,
                 k: int,
                 mlp_channels: Union[int, List[int]],
                 mlp_activation="relu",
                 return_selection: bool = False,
                 return_pooled_graph: bool = True,
                 bias: bool = True,
                 totvar_coeff: float = 1.0,
                 balance_coeff: float = 1.0,
                 ):
        super().__init__()

        if not return_selection and not return_pooled_graph:
            raise ValueError("return_selection and return_pooled_graph can not both be False")

        if isinstance(mlp_channels, int):
            mlp_channels = [mlp_channels]

        act = activation_resolver(mlp_activation)
        in_channels = mlp_channels[0]
        self.mlp = torch.nn.Sequential()
        for channels in mlp_channels[1:]:
            self.mlp.append(Linear(in_channels, channels, bias=bias))
            in_channels = channels
            self.mlp.append(act)


        self.mlp.append(Linear(in_channels, k))
        self.k = k
        self.return_selection = return_selection
        self.return_pooled_graph = return_pooled_graph
        self.totvar_coeff = totvar_coeff
        self.balance_coeff = balance_coeff

        self.reset_parameters()

    def reset_parameters(self):
        for layer in self.mlp:
            if isinstance(layer, Linear):
                torch.nn.init.xavier_uniform(layer.weight)
                torch.nn.init.zeros_(layer.bias)

    def forward(
        self,
        x: Tensor,
        adj: Tensor,
        mask: Optional[Tensor] = None,
    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
        r"""
        Args:
            x (Tensor):
                Node feature tensor :math:`\mathbf{X} \in \mathbb{R}^{B \times N \times F}`
                with batch-size :math:`B`, (maximum) number of nodes :math:`N` for each graph,
                and feature dimension :math:`F`. Note that the cluster assignment matrix
                :math:`\mathbf{S} \in \mathbb{R}^{B \times N \times C}` is
                being created within this method.
            adj (Tensor):
                Adjacency tensor :math:`\mathbf{A} \in \mathbb{R}^{B \times N \times N}`.
            mask (BoolTensor, optional):
                Mask matrix :math:`\mathbf{M} \in {\{ 0, 1 \}}^{B \times N}`
                indicating the valid nodes for each graph. (default: :obj:`None`)

        :rtype: (:class:`Tensor`, :class:`Tensor`, :class:`Tensor`,
            :class:`Tensor`, :class:`Tensor`, :class:`Tensor`)
        """
        x = x.unsqueeze(0) if x.dim() == 2 else x
        adj = adj.unsqueeze(0) if adj.dim() == 2 else adj

        s = self.mlp(x)
        s = torch.softmax(s, dim=-1)

        batch_size, n_nodes, _ = x.size()

        if mask is not None:
            mask = mask.view(batch_size, n_nodes, 1).to(x.dtype)
            x, s = x * mask, s * mask

        # Pooled features and adjacency
        if self.return_pooled_graph:
            x_pool = torch.matmul(s.transpose(1, 2), x)
            adj_pool = torch.matmul(torch.matmul(s.transpose(1, 2), adj), s)

        # Total variation loss
        tv_loss = self.totvar_coeff*torch.mean(self.totvar_loss(adj, s))

        # Balance loss
        bal_loss = self.balance_coeff*torch.mean(self.balance_loss(s))

        if self.return_selection and self.return_pooled_graph:
            return s, x_pool, adj_pool, tv_loss, bal_loss
        elif self.return_selection and not self.return_pooled_graph:
            return s, tv_loss, bal_loss
        else:
            return x_pool, adj_pool, tv_loss, bal_loss

    def totvar_loss(self, adj, s):
        l1_norm = torch.sum(torch.abs(s[..., None, :] - s[:, None, ...]), dim=-1)

        loss = torch.sum(adj * l1_norm, dim=(-1, -2))

        # Normalize loss
        n_edges = torch.count_nonzero(adj, dim=(-1, -2))
        loss *= 1 / (2 * n_edges)

        return loss

    def balance_loss(self, s):
        n_nodes = s.size()[-2]

        # k-quantile
        idx = int(math.floor(n_nodes / self.k))
        quant = torch.sort(s, dim=-2, descending=True)[0][:, idx, :] # shape [B, K]

        # Asymmetric l1-norm
        loss = s - torch.unsqueeze(quant, dim=1)
        loss = (loss >= 0) * (self.k - 1) * loss + (loss < 0) * loss * -1
        loss = torch.sum(loss, dim=(-1, -2)) # shape [B]
        loss = 1 / (n_nodes * (self.k - 1)) * (n_nodes * (self.k - 1) - loss)

        return loss

### PROTEINS

In [8]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
max_nodes = 800
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="PROTEINS",
    transform=T.Compose([T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 2
mlp_hidden_channels = 128
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=128, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=128, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=128, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.6848, Val Acc: 0.6988, Test Acc: 0.7202
Seed: 42, Epoch: 002, Loss: 0.6427, Val Acc: 0.7108, Test Acc: 0.7321
Seed: 42, Epoch: 003, Loss: 0.6251, Val Acc: 0.6747, Test Acc: 0.7381
Seed: 42, Epoch: 004, Loss: 0.6062, Val Acc: 0.7229, Test Acc: 0.7440
Seed: 42, Epoch: 005, Loss: 0.6061, Val Acc: 0.7229, Test Acc: 0.7500
Seed: 42, Epoch: 006, Loss: 0.6064, Val Acc: 0.7108, Test Acc: 0.7500
Seed: 42, Epoch: 007, Loss: 0.5969, Val Acc: 0.7470, Test Acc: 0.7619
Seed: 42, Epoch: 008, Loss: 0.6008, Val Acc: 0.7289, Test Acc: 0.7500
Seed: 42, Epoch: 009, Loss: 0.5957, Val Acc: 0.7229, Test Acc: 0.7560
Seed: 42, Epoch: 010, Loss: 0.5881, Val Acc: 0.7289, Test Acc: 0.7679
Seed: 42, Epoch: 011, Loss: 0.5860, Val Acc: 0.7470, Test Acc: 0.7679
Seed: 42, Epoch: 012, Loss: 0.5817, Val Acc: 0.7289, Test Acc: 0.7679
Seed: 42, Epoch: 013, Loss: 0.5781, Val Acc: 0.7530, Test Acc: 0.7500
Seed: 42, Epoch: 014, Loss: 0.5802, Val Acc: 0.7470, Test Acc: 0.7679
Seed: 42, Epoch: 015

### NCI1

In [9]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
max_nodes = 150
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="NCI1",
    transform=T.Compose([T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 2
mlp_hidden_channels = 128
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=256, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=256, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=256, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.6918, Val Acc: 0.4838, Test Acc: 0.4619
Seed: 42, Epoch: 002, Loss: 0.6874, Val Acc: 0.4838, Test Acc: 0.4619
Seed: 42, Epoch: 003, Loss: 0.6775, Val Acc: 0.5747, Test Acc: 0.5332
Seed: 42, Epoch: 004, Loss: 0.6568, Val Acc: 0.5925, Test Acc: 0.6159
Seed: 42, Epoch: 005, Loss: 0.6428, Val Acc: 0.6347, Test Acc: 0.5997
Seed: 42, Epoch: 006, Loss: 0.6378, Val Acc: 0.6412, Test Acc: 0.5997
Seed: 42, Epoch: 007, Loss: 0.6316, Val Acc: 0.6412, Test Acc: 0.6353
Seed: 42, Epoch: 008, Loss: 0.6295, Val Acc: 0.6364, Test Acc: 0.6240
Seed: 42, Epoch: 009, Loss: 0.6248, Val Acc: 0.6542, Test Acc: 0.6143
Seed: 42, Epoch: 010, Loss: 0.6192, Val Acc: 0.6477, Test Acc: 0.6288
Seed: 42, Epoch: 011, Loss: 0.6167, Val Acc: 0.6494, Test Acc: 0.6451
Seed: 42, Epoch: 012, Loss: 0.6082, Val Acc: 0.6575, Test Acc: 0.6499
Seed: 42, Epoch: 013, Loss: 0.6055, Val Acc: 0.6656, Test Acc: 0.6629
Seed: 42, Epoch: 014, Loss: 0.5996, Val Acc: 0.6737, Test Acc: 0.6629
Seed: 42, Epoch: 015

### NCI109

In [10]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
max_nodes = 150
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="NCI109",
    transform=T.Compose([T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 1
mlp_hidden_channels = 128
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=256, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=256, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=256, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.6915, Val Acc: 0.4895, Test Acc: 0.5129
Seed: 42, Epoch: 002, Loss: 0.6870, Val Acc: 0.4830, Test Acc: 0.5065
Seed: 42, Epoch: 003, Loss: 0.6806, Val Acc: 0.5186, Test Acc: 0.5323
Seed: 42, Epoch: 004, Loss: 0.6704, Val Acc: 0.6123, Test Acc: 0.5774
Seed: 42, Epoch: 005, Loss: 0.6586, Val Acc: 0.6478, Test Acc: 0.5984
Seed: 42, Epoch: 006, Loss: 0.6504, Val Acc: 0.6462, Test Acc: 0.6323
Seed: 42, Epoch: 007, Loss: 0.6469, Val Acc: 0.6527, Test Acc: 0.6194
Seed: 42, Epoch: 008, Loss: 0.6473, Val Acc: 0.6591, Test Acc: 0.6129
Seed: 42, Epoch: 009, Loss: 0.6420, Val Acc: 0.6575, Test Acc: 0.6274
Seed: 42, Epoch: 010, Loss: 0.6423, Val Acc: 0.6543, Test Acc: 0.6081
Seed: 42, Epoch: 011, Loss: 0.6384, Val Acc: 0.6494, Test Acc: 0.6435
Seed: 42, Epoch: 012, Loss: 0.6355, Val Acc: 0.6688, Test Acc: 0.6435
Seed: 42, Epoch: 013, Loss: 0.6343, Val Acc: 0.6753, Test Acc: 0.6419
Seed: 42, Epoch: 014, Loss: 0.6317, Val Acc: 0.6769, Test Acc: 0.6242
Seed: 42, Epoch: 015

### MUTAG

In [45]:
import torch
from torch_geometric.data import Data
from torch_geometric.transforms import BaseTransform

from torch_geometric.utils import to_dense_adj
data_path = "/data/XXX/Pooling"
class ConvertToDenseAdj(BaseTransform):
    def __call__(self, data):
        # 确保 data.adj 存在且为三维
        if hasattr(data, 'adj') and data.adj.dim() == 3:
            # 对第三维进行合并操作，这里以求和为例
            data.adj = data.adj.sum(dim=-1)
            # 你可以选择其他方式，如取最大值：
            # data.adj = data.adj.max(dim=-1)[0]

        return data

# 在加载数据时应用这个变换
dataset_dense = TUDataset(
    data_path,
    name="MUTAG",
    transform=T.Compose([T.ToDense(max_nodes), ConvertToDenseAdj()]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
dataset_dense[3]

Data(x=[150, 7], y=[1], adj=[150, 150], mask=[150])

In [48]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 3
mlp_hidden_channels = 64
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=512, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.6895, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 002, Loss: 0.6865, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 003, Loss: 0.6835, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 004, Loss: 0.6807, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 005, Loss: 0.6778, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 006, Loss: 0.6749, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 007, Loss: 0.6721, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 008, Loss: 0.6695, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 009, Loss: 0.6667, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 010, Loss: 0.6639, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 011, Loss: 0.6610, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 012, Loss: 0.6580, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 013, Loss: 0.6549, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 014, Loss: 0.6517, Val Acc: 0.5000, Test Acc: 0.7241
Seed: 42, Epoch: 015

### DD

In [5]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
max_nodes = 500
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="DD",
    transform=T.Compose([T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 2
mlp_hidden_channels = 32
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)



Seed: 42, Epoch: 001, Loss: 0.6902, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 002, Loss: 0.6887, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 003, Loss: 0.6882, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 004, Loss: 0.6877, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 005, Loss: 0.6872, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 006, Loss: 0.6865, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 007, Loss: 0.6856, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 008, Loss: 0.6847, Val Acc: 0.5495, Test Acc: 0.5586
Seed: 42, Epoch: 009, Loss: 0.6836, Val Acc: 0.5676, Test Acc: 0.5856
Seed: 42, Epoch: 010, Loss: 0.6822, Val Acc: 0.7297, Test Acc: 0.7027
Seed: 42, Epoch: 011, Loss: 0.6810, Val Acc: 0.8018, Test Acc: 0.7027
Seed: 42, Epoch: 012, Loss: 0.6797, Val Acc: 0.7658, Test Acc: 0.7117
Seed: 42, Epoch: 013, Loss: 0.6776, Val Acc: 0.8018, Test Acc: 0.7387
Seed: 42, Epoch: 014, Loss: 0.6746, Val Acc: 0.7387, Test Acc: 0.7117
Seed: 42, Epoch: 015

### IMDB-BINARY

In [11]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="IMDB-BINARY",
    transform=T.Compose([T.OneHotDegree(136), T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 1
mlp_hidden_channels = 64
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=256, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=256, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=256, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.6961, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 002, Loss: 0.6955, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 003, Loss: 0.6948, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 004, Loss: 0.6943, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 005, Loss: 0.6935, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 006, Loss: 0.6928, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 007, Loss: 0.6917, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 008, Loss: 0.6904, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 009, Loss: 0.6888, Val Acc: 0.5267, Test Acc: 0.5533
Seed: 42, Epoch: 010, Loss: 0.6865, Val Acc: 0.6200, Test Acc: 0.7067
Seed: 42, Epoch: 011, Loss: 0.6833, Val Acc: 0.7133, Test Acc: 0.7867
Seed: 42, Epoch: 012, Loss: 0.6789, Val Acc: 0.7467, Test Acc: 0.8133
Seed: 42, Epoch: 013, Loss: 0.6725, Val Acc: 0.7667, Test Acc: 0.8067
Seed: 42, Epoch: 014, Loss: 0.6652, Val Acc: 0.7800, Test Acc: 0.8067
Seed: 42, Epoch: 015

### IMDB-MULTI

In [4]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="IMDB-MULTI",
    transform=T.Compose([T.OneHotDegree(88), T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 1
mlp_hidden_channels = 64
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=256, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=256, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=256, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 1.0991, Val Acc: 0.2711, Test Acc: 0.3511
Seed: 42, Epoch: 002, Loss: 1.0980, Val Acc: 0.2711, Test Acc: 0.3511
Seed: 42, Epoch: 003, Loss: 1.0972, Val Acc: 0.2711, Test Acc: 0.3511
Seed: 42, Epoch: 004, Loss: 1.0962, Val Acc: 0.2711, Test Acc: 0.3511
Seed: 42, Epoch: 005, Loss: 1.0950, Val Acc: 0.2844, Test Acc: 0.3689
Seed: 42, Epoch: 006, Loss: 1.0934, Val Acc: 0.3111, Test Acc: 0.3822
Seed: 42, Epoch: 007, Loss: 1.0912, Val Acc: 0.3022, Test Acc: 0.3644
Seed: 42, Epoch: 008, Loss: 1.0885, Val Acc: 0.4400, Test Acc: 0.4444
Seed: 42, Epoch: 009, Loss: 1.0849, Val Acc: 0.4578, Test Acc: 0.4533
Seed: 42, Epoch: 010, Loss: 1.0798, Val Acc: 0.4578, Test Acc: 0.4533
Seed: 42, Epoch: 011, Loss: 1.0722, Val Acc: 0.4844, Test Acc: 0.4622
Seed: 42, Epoch: 012, Loss: 1.0628, Val Acc: 0.4933, Test Acc: 0.4667
Seed: 42, Epoch: 013, Loss: 1.0492, Val Acc: 0.4800, Test Acc: 0.4756
Seed: 42, Epoch: 014, Loss: 1.0323, Val Acc: 0.4889, Test Acc: 0.4622
Seed: 42, Epoch: 015

### COLLAB

In [5]:

max_nodes = 500
data_path = "/data/XXX/Pooling"

dataset_dense = TUDataset(
    data_path,
    name="COLLAB",
    transform=T.Compose([T.OneHotDegree(491), T.ToDense(max_nodes)]),
    use_node_attr=True,
    pre_filter=lambda data: data.num_nodes <= max_nodes,
)
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
import random
from torch_geometric.nn import GCNConv

import os.path as osp
import time
from math import ceil

import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DenseDataLoader
from torch_geometric.nn import DenseGCNConv, dense_diff_pool
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

dataset = dataset_dense
dataset = dataset.shuffle()
N = 150
mp_layers = 1
mp_channels = 64
mp_activation = "relu"
delta_coeff = 2.0

mlp_hidden_layers = 2
mlp_hidden_channels = 64
mlp_activation = "relu"
totvar_coeff = 0.5
balance_coeff = 0.5

epochs = 100
batch_size = 16
learning_rate = 5e-4
l2_reg_val = 0
patience = 10

class GNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, normalize=False, lin=True):
        super().__init__()

        self.conv1 = DenseGCNConv(in_channels, hidden_channels, normalize)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv2 = DenseGCNConv(hidden_channels, hidden_channels, normalize)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.conv3 = DenseGCNConv(hidden_channels, out_channels, normalize)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        if lin:
            self.lin = torch.nn.Linear(out_channels, out_channels)
        else:
            self.lin = None

    def bn(self, i, x):
        batch_size, num_nodes, num_channels = x.size()
        x = x.view(-1, num_channels)
        x = getattr(self, f'bn{i}')(x)
        x = x.view(batch_size, num_nodes, num_channels)
        return x

    def forward(self, x, adj, mask=None):
        x = self.bn(1, self.conv1(x, adj, mask).relu())
        x = self.bn(2, self.conv2(x, adj, mask).relu())
        x = self.bn(3, self.conv3(x, adj, mask).relu())

        if self.lin is not None:
            x = self.lin(x).relu()

        return x


class Net_AsymCheegerCut(torch.nn.Module):
    def __init__(self):
        super().__init__()

        num_nodes = 64
        self.gnn1_pool = GNN(dataset.num_features, 64, num_nodes)
        self.gnn1_embed = DenseGCNConv(dataset.num_features, 64)

        num_nodes = 64
        self.gnn2_pool = GNN(64, 64, num_nodes)
        self.gnn2_embed = DenseGCNConv(64, 64)

        self.gnn3_embed = DenseGCNConv(64, 64)

        self.lin1 = torch.nn.Linear(64, 32)
        self.lin2 = torch.nn.Linear(32, dataset.num_classes)

        self.pool1 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)
        self.pool2 = AsymCheegerCutPool(int(N//2),
                           mlp_channels=[mp_channels] +
                                [mlp_hidden_channels for _ in range(mlp_hidden_layers)],
                           mlp_activation=mlp_activation,
                           totvar_coeff=totvar_coeff,
                           balance_coeff=balance_coeff,
                           return_selection=False,
                           return_pooled_graph=True)


    def forward(self, x, adj, mask=None):
        s = self.gnn1_pool(x, adj, mask)
        x = self.gnn1_embed(x, adj, mask)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool1(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        s = self.gnn2_pool(x, adj)
        x = self.gnn2_embed(x, adj)
        x = F.relu(x)

        x, adj, tv1, bal1 = self.pool2(x, adj, mask=None)
        #x = pool_output1.x_pool
        #adj = pool_output1.adj_pool

        x = self.gnn3_embed(x, adj)
        x = F.relu(x)

        x = x.mean(dim=1)
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

if torch.cuda.is_available():
    device = torch.device('cuda')
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

model = Net_AsymCheegerCut().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data.x, data.adj, data.mask)
        loss = F.nll_loss(output, data.y.view(-1))
        loss.backward()
        total_loss += data.y.size(0) * float(loss)
        optimizer.step()
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        output = model(data.x, data.adj, data.mask)
        pred = output.max(dim=1)[1]
        correct += int(pred.eq(data.y.view(-1)).sum())
    return correct / len(loader.dataset)


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)
    dataset_dense = dataset_dense.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_dense)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_dense[:num_train]
    val_dataset = dataset_dense[num_train:num_train + num_val]
    test_dataset = dataset_dense[num_train + num_val:]
    train_loader = DenseDataLoader(train_dataset, batch_size=256, shuffle=True)
    valid_loader = DenseDataLoader(val_dataset, batch_size=256, shuffle=False)
    test_loader = DenseDataLoader(test_dataset, batch_size=256, shuffle=False)

    model = Net_AsymCheegerCut().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 1.1174, Val Acc: 0.3213, Test Acc: 0.3320
Seed: 42, Epoch: 002, Loss: 1.0731, Val Acc: 0.6147, Test Acc: 0.5920
Seed: 42, Epoch: 003, Loss: 0.9545, Val Acc: 0.6200, Test Acc: 0.6347
Seed: 42, Epoch: 004, Loss: 0.7622, Val Acc: 0.6547, Test Acc: 0.6587
Seed: 42, Epoch: 005, Loss: 0.6128, Val Acc: 0.7013, Test Acc: 0.6853
Seed: 42, Epoch: 006, Loss: 0.5629, Val Acc: 0.6800, Test Acc: 0.6787
Seed: 42, Epoch: 007, Loss: 0.5361, Val Acc: 0.6813, Test Acc: 0.6880
Seed: 42, Epoch: 008, Loss: 0.5134, Val Acc: 0.7147, Test Acc: 0.7000
Seed: 42, Epoch: 009, Loss: 0.4925, Val Acc: 0.7320, Test Acc: 0.7320
Seed: 42, Epoch: 010, Loss: 0.4791, Val Acc: 0.7627, Test Acc: 0.7227
Seed: 42, Epoch: 011, Loss: 0.4664, Val Acc: 0.7613, Test Acc: 0.7293
Seed: 42, Epoch: 012, Loss: 0.4536, Val Acc: 0.7867, Test Acc: 0.7400
Seed: 42, Epoch: 013, Loss: 0.4537, Val Acc: 0.7613, Test Acc: 0.7213
Seed: 42, Epoch: 014, Loss: 0.4442, Val Acc: 0.7760, Test Acc: 0.7533
Seed: 42, Epoch: 015

# Graph Regression

### QM7 (2 64, 1 128)

In [11]:
!python /data/XXX/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --cuda_num 0 --run_times=5 --patience=150 --epochs=500 --pooling='AsymCheegerCut'

Existed dataset loaded: datasets/processed/qm7.pt

Current dataset: qm7, include 6832 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/500MAE=1545.6508 MAE=1532.2898 MAE=1511.2916 MAE=1476.9377 MAE=1453.9104 MAE=1414.8418 MAE=1332.9738 MAE=1354.1702 MAE=1256.8965 Epoch: 10/500MAE=1249.6379 MAE=1143.9844 MAE=1042.0984 MAE=1040.6293 MAE=981.3605 MAE=859.9890 MAE=789.3861 MAE=598.2144 MAE=522.1515 MAE=376.8714 Epoch: 20/500MAE=496.9565 MAE=437.2871 MAE=353.8682 MAE=301.8752 MAE=78.5323 MAE=328.9137 MAE=144.1179 MAE=136.7178 MAE=143.6091 MAE=232.3712 Epoch: 30/500MAE=242.3170 MAE=104.6889 MAE=104.4016 MAE=161.7224 MAE=244.0971 MAE=183.4391 MAE=221.0983 MAE=194.1880 MAE=204.7583 MAE=185.3420 Epoch: 40/500MAE=197.0789 MAE=205.3470 MAE=198.0338 MAE=195.7416 MAE=201.6071 MAE=205.5452 MAE=204.5414 MAE=203.5570 MAE=198.0005 MAE=204.2218 Epoch: 50/500MAE=206.4478 MAE=202.4700 MAE=207.8313 MAE=203.9200 MAE=203.3765 MAE=204.7877 MAE=204.4341 MAE=204.5223 MAE=203.4426 MAE=203.9673 