# Graph Classification

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import sys
import torch
from transformers.optimization import get_cosine_schedule_with_warmup
import torch.nn.functional as F
import torch_geometric.transforms as T
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.loader import DataLoader

import os
import random
import pandas as pd
import torch
import torch_geometric.transforms as T
from typing import Optional
import torch
from torch import Tensor
from torch_geometric.data import Data
from torch_geometric.data.datapipes import functional_transform
from torch_geometric.transforms import BaseTransform

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.datasets import WebKB
from torch_geometric.datasets import Actor
from torch_geometric.datasets import GNNBenchmarkDataset
from torch_geometric.datasets import TUDataset
from sklearn.metrics import r2_score
from torch_geometric.data import DataLoader
from torch_geometric.datasets import MoleculeNet
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.utils import to_networkx
from torch.nn import Linear
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import random
import pandas as pd

import time
import psutil
import torch
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.utils import add_remaining_self_loops, to_dense_adj, add_self_loops
from typing import Callable, Optional, Union
from torch_sparse import coalesce, transpose
from torch_scatter import scatter
from torch import Tensor
from torch_geometric.utils.num_nodes import maybe_num_nodes
from torch_sparse import spspmm
from torch_sparse import coalesce
from torch_sparse import eye
from torch.nn import Parameter
from torch_scatter import scatter_add
from torch_scatter import scatter_max
def cumsum(x: Tensor, dim: int = 0) -> Tensor:
    r"""Returns the cumulative sum of elements of :obj:`x`.
    In contrast to :meth:`torch.cumsum`, prepends the output with zero.

    Args:
        x (torch.Tensor): The input tensor.
        dim (int, optional): The dimension to do the operation over.
            (default: :obj:`0`)

    Example:
        >>> x = torch.tensor([2, 4, 1])
        >>> cumsum(x)
        tensor([0, 2, 6, 7])

    """
    size = x.size()[:dim] + (x.size(dim) + 1, ) + x.size()[dim + 1:]
    out = x.new_empty(size)

    out.narrow(dim, 0, 1).zero_()
    torch.cumsum(x, dim=dim, out=out.narrow(dim, 1, x.size(dim)))

    return out

def maybe_num_nodes(edge_index, num_nodes=None):
    if num_nodes is not None:
        return num_nodes
    elif isinstance(edge_index, Tensor):
        return int(edge_index.max()) + 1 if edge_index.numel() > 0 else 0
    else:
        return max(edge_index.size(0), edge_index.size(1))

def maybe_num_nodes(edge_index, num_nodes=None):
    if num_nodes is not None:
        return num_nodes
    elif isinstance(edge_index, Tensor):
        return int(edge_index.max()) + 1 if edge_index.numel() > 0 else 0
    else:
        return max(edge_index.size(0), edge_index.size(1))

def filter_adj(edge_index, edge_attr, perm, num_nodes=None):
    num_nodes = maybe_num_nodes(edge_index, num_nodes)

    mask = perm.new_full((num_nodes, ), -1)
    i = torch.arange(perm.size(0), dtype=torch.long, device=perm.device)
    mask[perm] = i

    row, col = edge_index
    row, col = mask[row], mask[col]
    mask = (row >= 0) & (col >= 0)
    row, col = row[mask], col[mask]

    if edge_attr is not None:
        edge_attr = edge_attr[mask]

    return torch.stack([row, col], dim=0), edge_attr

def topk(
    x: Tensor,
    ratio: Optional[Union[float, int]],
    batch: Tensor,
    min_score: Optional[float] = None,
    tol: float = 1e-7,
) -> Tensor:
    if min_score is not None:
        # Make sure that we do not drop all nodes in a graph.
        scores_max = scatter(x, batch, reduce='max')[batch] - tol
        scores_min = scores_max.clamp(max=min_score)

        perm = (x > scores_min).nonzero().view(-1)
        return perm

    if ratio is not None:
        num_nodes = scatter(batch.new_ones(x.size(0)), batch, reduce='sum')

        if ratio >= 1:
            k = num_nodes.new_full((num_nodes.size(0), ), int(ratio))
        else:
            k = (float(ratio) * num_nodes.to(x.dtype)).ceil().to(torch.long)

        x, x_perm = torch.sort(x.view(-1), descending=True)
        batch = batch[x_perm]
        batch, batch_perm = torch.sort(batch, descending=False, stable=True)

        arange = torch.arange(x.size(0), dtype=torch.long, device=x.device)
        ptr = cumsum(num_nodes)
        batched_arange = arange - ptr[batch]
        mask = batched_arange < k[batch]

        return x_perm[batch_perm[mask]]

    raise ValueError("At least one of the 'ratio' and 'min_score' parameters "
                     "must be specified")

class GPR_prop(MessagePassing):
    '''
    propagation class for GPR_GNN
    '''

    def __init__(self, K, alpha, Init, Gamma=None, bias=True, **kwargs):
        super(GPR_prop, self).__init__(aggr='add', **kwargs)
        self.K = K
        self.Init = Init
        self.alpha = alpha

        assert Init in ['SGC', 'PPR', 'NPPR', 'Random', 'WS']
        if Init == 'SGC':
            # SGC-like
            TEMP = 0.0*np.ones(K+1)
            TEMP[alpha] = 1.0
        elif Init == 'PPR':
            # PPR-like
            TEMP = alpha*(1-alpha)**np.arange(K+1)
            TEMP[-1] = (1-alpha)**K
        elif Init == 'NPPR':
            # Negative PPR
            TEMP = (alpha)**np.arange(K+1)
            TEMP = TEMP/np.sum(np.abs(TEMP))
        elif Init == 'Random':
            # Random
            bound = np.sqrt(3/(K+1))
            TEMP = np.random.uniform(-bound, bound, K+1)
            TEMP = TEMP/np.sum(np.abs(TEMP))
        elif Init == 'WS':
            # Specify Gamma
            TEMP = Gamma

        self.temp = Parameter(torch.tensor(TEMP))

    def reset_parameters(self):
        torch.nn.init.zeros_(self.temp)
        for k in range(self.K+1):
            self.temp.data[k] = self.alpha*(1-self.alpha)**k
        self.temp.data[-1] = (1-self.alpha)**self.K

    def forward(self, x, edge_index, edge_weight=None):
        edge_index, norm = gcn_norm(
            edge_index, edge_weight, num_nodes=x.size(0), dtype=x.dtype)

        hidden = x*(self.temp[0])
        for k in range(self.K):
            x = self.propagate(edge_index, x=x, norm=norm)
            gamma = self.temp[k+1]
            hidden = hidden + gamma*x
        return hidden

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def __repr__(self):
        return '{}(K={}, temp={})'.format(self.__class__.__name__, self.K,
                                           self.temp)


class NodeInformationScore(MessagePassing):
    def __init__(self, improved=False, cached=False, **kwargs):
        super(NodeInformationScore, self).__init__(aggr='add', **kwargs)

        self.improved = improved
        self.cached = cached
        self.cached_result = None
        self.cached_num_edges = None

    @staticmethod
    def norm(edge_index, num_nodes, edge_weight, dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, device=edge_index.device)

        edge_index, edge_weight = add_remaining_self_loops(edge_index, edge_weight, 0, num_nodes) # in case all the edges are removed

        edge_index = edge_index.type(torch.long)
        row, col = edge_index
        # print(row, col)
        # print(edge_weight.shape, row.shape, num_nodes)
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0

        # row, col = edge_index
        expand_deg = torch.zeros((edge_weight.size(0),), dtype=dtype, device=edge_index.device)
        expand_deg[-num_nodes:] = torch.ones((num_nodes,), dtype=dtype, device=edge_index.device)

        return edge_index, expand_deg - deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]

    def forward(self, x, edge_index, edge_weight):
        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}'.format(self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            edge_index, norm = self.norm(edge_index, x.size(0), edge_weight, x.dtype)
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result

        return self.propagate(edge_index, x=x, norm=norm)

    def message(self, x_j, norm):
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out

class graph_attention(torch.nn.Module):
    # reference: https://github.com/gordicaleksa/pytorch-GAT/blob/39c8f0ee634477033e8b1a6e9a6da3c7ed71bbd1/models/definitions/GAT.py#L324
    src_nodes_dim = 0  # position of source nodes in edge index
    trg_nodes_dim = 1  # position of target nodes in edge index

    nodes_dim = 0      # node dimension/axis
    head_dim = 1       # attention head dimension/axis

    def __init__(self, num_in_features, num_out_features, num_of_heads, dropout_prob=0.6, log_attention_weights=False):
        super().__init__()

        # Saving these as we'll need them in forward propagation in children layers (imp1/2/3)
        self.num_of_heads = num_of_heads
        self.num_out_features = num_out_features
        #
        # Trainable weights: linear projection matrix (denoted as "W" in the paper), attention target/source
        # (denoted as "a" in the paper) and bias (not mentioned in the paper but present in the official GAT repo)
        #

        # You can treat this one matrix as num_of_heads independent W matrices
        self.linear_proj = nn.Linear(num_in_features, num_of_heads * num_out_features, bias=False)

        # After we concatenate target node (node i) and source node (node j) we apply the additive scoring function
        # which gives us un-normalized score "e". Here we split the "a" vector - but the semantics remain the same.

        # Basically instead of doing [x, y] (concatenation, x/y are node feature vectors) and dot product with "a"
        # we instead do a dot product between x and "a_left" and y and "a_right" and we sum them up
        self.scoring_fn_target = nn.Parameter(torch.Tensor(1, num_of_heads, num_out_features))
        self.scoring_fn_source = nn.Parameter(torch.Tensor(1, num_of_heads, num_out_features))

        self.init_params()

    def init_params(self):
        """
        The reason we're using Glorot (aka Xavier uniform) initialization is because it's a default TF initialization:
            https://stackoverflow.com/questions/37350131/what-is-the-default-variable-initializer-in-tensorflow
        The original repo was developed in TensorFlow (TF) and they used the default initialization.
        Feel free to experiment - there may be better initializations depending on your problem.
        """
        nn.init.xavier_uniform_(self.linear_proj.weight)
        nn.init.xavier_uniform_(self.scoring_fn_target)
        nn.init.xavier_uniform_(self.scoring_fn_source)

    def forward(self, x, edge_index):
        #
        # Step 1: Linear Projection + regularization
        #

        in_nodes_features = x  # unpack data
        num_of_nodes = in_nodes_features.shape[self.nodes_dim]

        # shape = (N, FIN) * (FIN, NH*FOUT) -> (N, NH, FOUT) where NH - number of heads, FOUT - num of output features
        # We project the input node features into NH independent output features (one for each attention head)
        nodes_features_proj = self.linear_proj(in_nodes_features).view(-1, self.num_of_heads, self.num_out_features)

        #
        # Step 2: Edge attention calculation
        #

        # Apply the scoring function (* represents element-wise (a.k.a. Hadamard) product)
        # shape = (N, NH, FOUT) * (1, NH, FOUT) -> (N, NH, 1) -> (N, NH) because sum squeezes the last dimension
        # Optimization note: torch.sum() is as performant as .sum() in my experiments
        scores_source = (nodes_features_proj * self.scoring_fn_source).sum(dim=-1)
        scores_target = (nodes_features_proj * self.scoring_fn_target).sum(dim=-1)

        # We simply copy (lift) the scores for source/target nodes based on the edge index. Instead of preparing all
        # the possible combinations of scores we just prepare those that will actually be used and those are defined
        # by the edge index.
        # scores shape = (E, NH), nodes_features_proj_lifted shape = (E, NH, FOUT), E - number of edges in the graph
        scores_source_lifted, scores_target_lifted, nodes_features_proj_lifted = self.lift(scores_source, scores_target, nodes_features_proj, edge_index)
        scores_per_edge = scores_source_lifted + scores_target_lifted

        return torch.sigmoid(scores_per_edge)

    def lift(self, scores_source, scores_target, nodes_features_matrix_proj, edge_index):
        """
        Lifts i.e. duplicates certain vectors depending on the edge index.
        One of the tensor dims goes from N -> E (that's where the "lift" comes from).
        """
        src_nodes_index = edge_index[self.src_nodes_dim]
        trg_nodes_index = edge_index[self.trg_nodes_dim]

        # Using index_select is faster than "normal" indexing (scores_source[src_nodes_index]) in PyTorch!
        scores_source = scores_source.index_select(self.nodes_dim, src_nodes_index)
        scores_target = scores_target.index_select(self.nodes_dim, trg_nodes_index)
        nodes_features_matrix_proj_lifted = nodes_features_matrix_proj.index_select(self.nodes_dim, src_nodes_index)

        return scores_source, scores_target, nodes_features_matrix_proj_lifted



class CoPooling(torch.nn.Module):
    # reference for GAT code: https://github.com/PetarV-/GAT
    # reference for generalized pagerank code: https://github.com/jianhao2016/GPRGNN
    def __init__(self, ratio=0.5, K=0.05, edge_ratio=0.6, nhid=64, alpha=0.1, Init='Random', Gamma=None):
        super(CoPooling, self).__init__()
        self.ratio = ratio
        self.calc_information_score = NodeInformationScore()
        self.edge_ratio = edge_ratio

        self.prop1 = GPR_prop(K, alpha, Init, Gamma)

        score_dim = 32
        self.G_att = graph_attention(num_in_features=nhid, num_out_features=score_dim, num_of_heads=1)

        self.weight = Parameter(torch.Tensor(2*nhid, nhid))
        nn.init.xavier_uniform_(self.weight.data)
        self.bias = Parameter(torch.Tensor(nhid))
        nn.init.zeros_(self.bias.data)

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight.data)
        nn.init.zeros_(self.bias.data)
        self.prop1.reset_parameters()
        self.G_att.init_params()

    def forward(self, x, edge_index, edge_attr, batch=None, nodes_index=None, node_attr=None):
        if batch is None:
            batch = edge_index.new_zeros(x.size(0))
        ori_batch = batch.clone()
        device = x.device
        num_nodes = x.shape[0]

        # cut edges based on scores
        x_cut = self.prop1(x, edge_index) # run generalized pagerank to update features

        attention = self.G_att(x_cut, edge_index) # get the attention weights after sigmoid
        attention = attention.sum(dim=1) #sum the weights on head dim
        edge_index, attention = add_self_loops(edge_index, attention, 1.0, num_nodes) # add self loops in case no edges

        # to get a systemitic adj matrix
        edge_index_t, attention_t = transpose(edge_index, attention, num_nodes, num_nodes)
        edge_tmp = torch.cat((edge_index, edge_index_t), 1)
        att_tmp = torch.cat((attention, attention_t),0)
        edge_index, attention = coalesce(edge_tmp, att_tmp, num_nodes, num_nodes, 'mean')

        attention_np = attention.cpu().data.numpy()
        cut_val = np.percentile(attention_np, int(100*(1-self.edge_ratio))) # this is for keep the top edge_ratio edges
        attention = attention * (attention >= cut_val) # keep the edge_ratio higher weights of edges

        kep_idx = attention > 0.0
        cut_edge_index, cut_edge_attr = edge_index[:, kep_idx], attention[kep_idx]

        # Graph Pooling based on nodes
        x_information_score = self.calc_information_score(x, cut_edge_index, cut_edge_attr)
        score = torch.sum(torch.abs(x_information_score), dim=1)
        perm = topk(score, self.ratio, batch)
        x_topk = x[perm]
        batch = batch[perm]
        if nodes_index is not None:
            nodes_index = nodes_index[perm]

        if node_attr is not None:
            node_attr = node_attr[perm]
        if cut_edge_index is not None or cut_edge_index.nelement() != 0:
            induced_edge_index, induced_edge_attr = filter_adj(cut_edge_index, cut_edge_attr, perm, num_nodes=num_nodes)
        else:
            print('All edges are cut!')
            induced_edge_index, induced_edge_attr = cut_edge_index, cut_edge_attr

        # update node features
        attention_dense = (to_dense_adj(cut_edge_index, edge_attr=cut_edge_attr, max_num_nodes=num_nodes)).squeeze()
        x = F.relu(torch.matmul(torch.cat((x_topk, torch.matmul(attention_dense[perm],x)), 1), self.weight) + self.bias)

        return x, induced_edge_index, perm, induced_edge_attr, batch, nodes_index, node_attr, attention_dense

### MUTAG

In [7]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 150
data_path = "/data/XXX/Pooling/1"

dataset_sparse = TUDataset(root=data_path, name="MUTAG", pre_filter=lambda data: data.num_nodes <= max_nodes, use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_CO(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_CO, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = CoPooling(ratio=0.9, K=1, edge_ratio=0.6, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = CoPooling(ratio=0.9, K=1, edge_ratio=0.6, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool1(x, edge_index, edge_attr=None, batch=batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool2(x, edge_index, edge_attr=None, batch=batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        #print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Early stopping at epoch 162 for seed 43
Average Time: 10.02 seconds
Var Time: 0.98 seconds
Average Memory: 122.00 MB
Average Best Val Acc: 0.8571
Std Best Test Acc: 0.0325
Average Test Acc: 0.8391


### DD

In [3]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="DD", pre_filter=lambda data: data.num_nodes <= max_nodes, use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_CO(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_CO, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = CoPooling(ratio=0.9, K=1, edge_ratio=0.6, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = CoPooling(ratio=0.9, K=1, edge_ratio=0.6, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool1(x, edge_index, edge_attr=None, batch=batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool2(x, edge_index, edge_attr=None, batch=batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.7131, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 002, Loss: 0.7110, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 003, Loss: 0.7093, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 004, Loss: 0.7073, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 005, Loss: 0.7052, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 006, Loss: 0.7036, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 007, Loss: 0.7020, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 008, Loss: 0.7004, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 009, Loss: 0.6986, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 010, Loss: 0.6968, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 011, Loss: 0.6949, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 012, Loss: 0.6928, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 013, Loss: 0.6914, Val Acc: 0.4505, Test Acc: 0.4414
Seed: 42, Epoch: 014, Loss: 0.6893, Val Acc: 0.4595, Test Acc: 0.4685
Seed: 42, Epoch: 015

In [6]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="IMDB-BINARY", transform=T.Compose([T.OneHotDegree(136)]), use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_CO(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_CO, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = CoPooling(ratio=0.7, K=3, edge_ratio=0.8, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = CoPooling(ratio=0.7, K=3, edge_ratio=0.8, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool1(x, edge_index, edge_attr=None, batch=batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool2(x, edge_index, edge_attr=None, batch=batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.7022, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 002, Loss: 0.6978, Val Acc: 0.5200, Test Acc: 0.5267
Seed: 42, Epoch: 003, Loss: 0.6925, Val Acc: 0.5200, Test Acc: 0.5333
Seed: 42, Epoch: 004, Loss: 0.6894, Val Acc: 0.5267, Test Acc: 0.5467
Seed: 42, Epoch: 005, Loss: 0.6826, Val Acc: 0.5333, Test Acc: 0.6200
Seed: 42, Epoch: 006, Loss: 0.6737, Val Acc: 0.5533, Test Acc: 0.6533
Seed: 42, Epoch: 007, Loss: 0.6642, Val Acc: 0.5800, Test Acc: 0.6933
Seed: 42, Epoch: 008, Loss: 0.6490, Val Acc: 0.5867, Test Acc: 0.6800
Seed: 42, Epoch: 009, Loss: 0.6323, Val Acc: 0.6400, Test Acc: 0.6867
Seed: 42, Epoch: 010, Loss: 0.6244, Val Acc: 0.6333, Test Acc: 0.7267
Seed: 42, Epoch: 011, Loss: 0.5967, Val Acc: 0.6867, Test Acc: 0.7200
Seed: 42, Epoch: 012, Loss: 0.5940, Val Acc: 0.6933, Test Acc: 0.7400
Seed: 42, Epoch: 013, Loss: 0.5772, Val Acc: 0.7600, Test Acc: 0.7600
Seed: 42, Epoch: 014, Loss: 0.5671, Val Acc: 0.7867, Test Acc: 0.7733
Seed: 42, Epoch: 015

### IMDB-MULTI

In [5]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="IMDB-MULTI", transform=T.Compose([T.OneHotDegree(88)]), use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_CO(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_CO, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = CoPooling(ratio=0.9, K=1, edge_ratio=0.8, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = CoPooling(ratio=0.9, K=1, edge_ratio=0.8, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool1(x, edge_index, edge_attr=None, batch=batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool2(x, edge_index, edge_attr=None, batch=batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 1.0919, Val Acc: 0.3022, Test Acc: 0.3333
Seed: 42, Epoch: 002, Loss: 1.0851, Val Acc: 0.3911, Test Acc: 0.3333
Seed: 42, Epoch: 003, Loss: 1.0776, Val Acc: 0.4000, Test Acc: 0.3378
Seed: 42, Epoch: 004, Loss: 1.0758, Val Acc: 0.3733, Test Acc: 0.3644
Seed: 42, Epoch: 005, Loss: 1.0727, Val Acc: 0.4178, Test Acc: 0.4356
Seed: 42, Epoch: 006, Loss: 1.0665, Val Acc: 0.4267, Test Acc: 0.4489
Seed: 42, Epoch: 007, Loss: 1.0806, Val Acc: 0.4222, Test Acc: 0.4444
Seed: 42, Epoch: 008, Loss: 1.0609, Val Acc: 0.4222, Test Acc: 0.4133
Seed: 42, Epoch: 009, Loss: 1.0579, Val Acc: 0.3822, Test Acc: 0.4267
Seed: 42, Epoch: 010, Loss: 1.0651, Val Acc: 0.4133, Test Acc: 0.3867
Seed: 42, Epoch: 011, Loss: 1.0544, Val Acc: 0.4089, Test Acc: 0.4089
Seed: 42, Epoch: 012, Loss: 1.0486, Val Acc: 0.4000, Test Acc: 0.4089
Seed: 42, Epoch: 013, Loss: 1.0480, Val Acc: 0.4000, Test Acc: 0.4133
Seed: 42, Epoch: 014, Loss: 1.0427, Val Acc: 0.4089, Test Acc: 0.4178
Seed: 42, Epoch: 015

### COLLAB

In [7]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
data_path = "/data1/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="COLLAB", transform=T.Compose([T.OneHotDegree(491)]), use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, ASAPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_CO(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_CO, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = CoPooling(ratio=0.9, K=1, edge_ratio=0.9, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = CoPooling(ratio=0.9, K=1, edge_ratio=0.9, nhid=64, alpha=0.1, Init='Random', Gamma=1.0)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool1(x, edge_index, edge_attr=None, batch=batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, perm, _, batch, _, _, _ = self.pool2(x, edge_index, edge_attr=None, batch=batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)

num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_CO(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Seed: 42, Epoch: 001, Loss: 0.9134, Val Acc: 0.6280, Test Acc: 0.5933
Seed: 42, Epoch: 002, Loss: 0.8109, Val Acc: 0.6773, Test Acc: 0.6693
Seed: 42, Epoch: 003, Loss: 0.7397, Val Acc: 0.6600, Test Acc: 0.6600
Seed: 42, Epoch: 004, Loss: 0.6781, Val Acc: 0.7027, Test Acc: 0.6960
Seed: 42, Epoch: 005, Loss: 0.6423, Val Acc: 0.6973, Test Acc: 0.6893
Seed: 42, Epoch: 006, Loss: 0.6250, Val Acc: 0.7080, Test Acc: 0.7000
Seed: 42, Epoch: 007, Loss: 0.5973, Val Acc: 0.7067, Test Acc: 0.6960
Seed: 42, Epoch: 008, Loss: 0.5910, Val Acc: 0.7080, Test Acc: 0.6973
Seed: 42, Epoch: 009, Loss: 0.5700, Val Acc: 0.7067, Test Acc: 0.7053
Seed: 42, Epoch: 010, Loss: 0.5570, Val Acc: 0.6773, Test Acc: 0.6933
Seed: 42, Epoch: 011, Loss: 0.5613, Val Acc: 0.7107, Test Acc: 0.7093
Seed: 42, Epoch: 012, Loss: 0.5453, Val Acc: 0.7147, Test Acc: 0.7080
Seed: 42, Epoch: 013, Loss: 0.5364, Val Acc: 0.7133, Test Acc: 0.7040
Seed: 42, Epoch: 014, Loss: 0.5270, Val Acc: 0.6800, Test Acc: 0.6947
Seed: 42, Epoch: 015

# Graph Regression

### QM7

In [8]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --co_ratio=0.1 --pooling='CO'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --co_ratio=0.3 --pooling='CO'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --co_ratio=0.5 --pooling='CO'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --co_ratio=0.7 --pooling='CO'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --co_ratio=0.9 --pooling='CO'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: qm7, include 6832 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/500MAE=1549.9253 MAE=1547.9928 MAE=1545.0317 MAE=1543.1550 MAE=1540.5305 MAE=1537.8622 MAE=1534.8763 MAE=1531.3993 MAE=1527.0344 Epoch: 10/500MAE=1524.3928 MAE=1518.8035 MAE=1513.8445 MAE=1508.5342 MAE=1503.7075 MAE=1497.8966 MAE=1491.4194 MAE=1485.8691 MAE=1478.4685 MAE=1471.7300 Epoch: 20/500MAE=1463.4873 MAE=1455.6171 MAE=1444.8009 MAE=1442.0656 MAE=1435.0598 MAE=1424.4014 MAE=1414.2493 MAE=1404.2482 MAE=1391.5200 MAE=1382.3337 Epoch: 30/500MAE=1375.0381 MAE=1362.1423 MAE=1352.3616 MAE=1346.1841 MAE=1324.6183 MAE=1317.3501 MAE=1303.8706 MAE=1287.2766 MAE=1272.1208 MAE=1264.2551 Epoch: 40/500MAE=1248.8540 MAE=1238.7097 MAE=1230.0125 MAE=1207.6420 MAE=1195.9270 MAE=1171.2872 MAE=1155.2688 MAE=1145.0004 MAE=1124.8623 MAE=1105.5095 Epoch: 50/500MAE=1086.6848 MAE=1073.1459 MAE=1054

### QM8

In [9]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --co_ratio=0.1 --pooling='CO'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --co_ratio=0.3 --pooling='CO'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --co_ratio=0.5 --pooling='CO'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --co_ratio=0.7 --pooling='CO'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --co_ratio=0.9 --pooling='CO'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: qm8, include 21783 molecules and 12 regression tasks

Splitting, finish 1/1  
Epoch: 1/150MAE=0.1438 MAE=0.1175 MAE=0.0771 MAE=0.0544 MAE=0.0463 MAE=0.0445 MAE=0.0432 MAE=0.0386 MAE=0.0355 Epoch: 10/150MAE=0.0351 MAE=0.0316 MAE=0.0323 MAE=0.0304 MAE=0.0304 MAE=0.0299 MAE=0.0305 MAE=0.0293 MAE=0.0286 MAE=0.0299 Epoch: 20/150MAE=0.0295 MAE=0.0291 MAE=0.0288 MAE=0.0276 MAE=0.0278 MAE=0.0277 MAE=0.0281 MAE=0.0277 MAE=0.0270 MAE=0.0266 Epoch: 30/150MAE=0.0271 MAE=0.0268 MAE=0.0271 MAE=0.0271 MAE=0.0267 MAE=0.0263 MAE=0.0264 MAE=0.0263 MAE=0.0263 MAE=0.0268 Epoch: 40/150MAE=0.0262 MAE=0.0262 MAE=0.0261 MAE=0.0261 MAE=0.0260 MAE=0.0263 MAE=0.0262 MAE=0.0261 MAE=0.0261 MAE=0.0259 Epoch: 50/150MAE=0.0259 MAE=0.0258 MAE=0.0258 MAE=0.0258 MAE=0.0258 MAE=0.0258 MAE=0.0257 MAE=0.0256 MAE=0.0258 MAE=0.0258 Epoch: 60/150MAE=0.0257 MAE=0.0254 MAE=0.0253 MAE=0.0255 MAE=0.0257 MAE

### BACE

In [1]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num -1 --run_times=5 --patience=20 --epochs=150 --co_ratio=0.1 --pooling='CO'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num -1 --run_times=5 --patience=20 --epochs=150 --co_ratio=0.3 --pooling='CO'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num -1 --run_times=5 --patience=20 --epochs=150 --co_ratio=0.5 --pooling='CO'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num -1 --run_times=5 --patience=20 --epochs=150 --co_ratio=0.7 --pooling='CO'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num -1 --run_times=5 --patience=20 --epochs=150 --co_ratio=0.9 --pooling='CO'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: bace, include 1513 molecules and 1 classification tasks

Splitting, finish 1/1  
Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150Epoch: 40/150Epoch: 50/150
********************1's fold 1's run over********************
AUROC: 0.839 +/- 0.000
AUPRC: 0.805 +/- 0.000

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150Epoch: 40/150
********************1's fold 2's run over********************
AUROC: 0.827 +/- 0.012
AUPRC: 0.786 +/- 0.019

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150Epoch: 40/150
********************1's fold 3's run over********************
AUROC: 0.824 +/- 0.010
AUPRC: 0.780 +/- 0.018

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150Epoch: 40/150
********************1's fold 4's run over********************
AUROC: 0.823 +/- 0.009
AUPRC: 0.784 +/- 0.018

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150
********************1's fold 5's 

### ESOL

In [2]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --co_ratio=0.1 --pooling='CO'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --co_ratio=0.3 --pooling='CO'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --co_ratio=0.5 --pooling='CO'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --co_ratio=0.7 --pooling='CO'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --co_ratio=0.9 --pooling='CO'

++++++++++++++++++++++0.1++++++++++++++++++++++++


Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: esol, include 1127 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/150RMSE=1.5650 RMSE=1.3964 RMSE=1.4950 RMSE=1.2556 RMSE=1.2993 RMSE=1.2439 RMSE=1.2286 RMSE=1.3725 RMSE=1.1922 Epoch: 10/150RMSE=1.2696 RMSE=1.5961 RMSE=1.3705 RMSE=1.3077 RMSE=1.3156 RMSE=1.3182 RMSE=1.2859 RMSE=1.3351 RMSE=1.2906 RMSE=1.2643 Epoch: 20/150RMSE=1.2661 RMSE=1.2632 RMSE=1.1968 RMSE=1.2187 RMSE=1.2631 RMSE=1.2214 RMSE=1.2307 RMSE=1.2434 RMSE=1.2361 RMSE=1.2740 RMSE=1.5812 
********************1's fold 1's run over********************
RMSE: 1.581 +/- 0.000

Epoch: 1/150RMSE=1.6883 RMSE=1.4866 RMSE=1.4878 RMSE=1.4642 RMSE=1.4679 RMSE=1.4483 RMSE=1.4553 RMSE=1.3822 RMSE=1.4342 Epoch: 10/150RMSE=1.4078 RMSE=1.4169 RMSE=1.3717 RMSE=1.3256 RMSE=1.3051 RMSE=1.3004 RMSE=1.3797 RMSE=1.3566 RMSE=1.2365 RMSE=1.2871 Epoch: 20/150RMSE=1.2410 RMSE=1.3291 RMSE=1.3049 RMSE=1.2286 RMSE=1.2349 RMSE=1.2469 RMSE=1.2233 RMSE=1.3533 RM

### Freesolv

In [3]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --co_ratio=0.1 --pooling='CO'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --co_ratio=0.3 --pooling='CO'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --co_ratio=0.5 --pooling='CO'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --co_ratio=0.7 --pooling='CO'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --co_ratio=0.9 --pooling='CO'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: freesolv, include 639 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/150RMSE=5.1588 RMSE=4.3907 RMSE=3.6676 RMSE=2.7378 RMSE=2.4986 RMSE=2.4437 RMSE=2.4213 RMSE=2.1056 RMSE=2.5684 Epoch: 10/150RMSE=2.4880 RMSE=2.4404 RMSE=2.6973 RMSE=2.2776 RMSE=2.1642 RMSE=2.2796 RMSE=2.0681 RMSE=2.1840 RMSE=2.1889 RMSE=2.1902 Epoch: 20/150RMSE=2.2006 RMSE=2.2405 RMSE=2.3109 RMSE=2.2345 RMSE=2.1170 RMSE=2.0191 RMSE=1.9778 RMSE=2.0128 RMSE=2.0126 RMSE=2.1002 Epoch: 30/150RMSE=2.0785 RMSE=2.1043 RMSE=2.0690 RMSE=2.0217 RMSE=2.2268 RMSE=2.0828 RMSE=2.1348 RMSE=2.1767 RMSE=2.0472 RMSE=2.2173 Epoch: 40/150RMSE=2.1714 RMSE=2.2160 RMSE=2.1820 RMSE=2.1831 RMSE=2.1295 RMSE=2.2319 RMSE=2.1703 RMSE=2.8630 
********************1's fold 1's run over********************
RMSE: 2.863 +/- 0.000

Epoch: 1/150RMSE=5.2355 RMSE=4.2974 RMSE=2.9535 RMSE=2.3861 RMSE=2.1795 RMSE=2.51

### Lipophilicity

In [4]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --co_ratio=0.1 --pooling='CO'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --co_ratio=0.3 --pooling='CO'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --co_ratio=0.5 --pooling='CO'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --co_ratio=0.7 --pooling='CO'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --co_ratio=0.9 --pooling='CO'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: lipo, include 4200 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/150RMSE=1.3593 RMSE=1.2427 RMSE=1.2660 RMSE=1.2301 RMSE=1.2611 RMSE=1.2509 RMSE=1.2021 RMSE=1.1906 RMSE=1.1400 Epoch: 10/150RMSE=1.1342 RMSE=1.1542 RMSE=1.1523 RMSE=1.1465 RMSE=1.0978 RMSE=1.1577 RMSE=1.1104 RMSE=1.1353 RMSE=1.1129 RMSE=1.0916 Epoch: 20/150RMSE=1.1026 RMSE=1.0944 RMSE=1.0712 RMSE=1.0703 RMSE=1.0657 RMSE=1.0636 RMSE=1.0781 RMSE=1.0864 RMSE=1.0710 RMSE=1.0875 Epoch: 30/150RMSE=1.0698 RMSE=1.0596 RMSE=1.0501 RMSE=1.0690 RMSE=1.0708 RMSE=1.0640 RMSE=1.0479 RMSE=1.0811 RMSE=1.0705 RMSE=1.0581 Epoch: 40/150RMSE=1.0524 RMSE=1.0406 RMSE=1.0556 RMSE=1.0578 RMSE=1.0616 RMSE=1.0426 RMSE=1.0540 RMSE=1.0499 RMSE=1.0555 RMSE=1.0471 Epoch: 50/150RMSE=1.0437 RMSE=1.0509 RMSE=1.0485 RMSE=1.0538 RMSE=1.0325 RMSE=1.0466 RMSE=1.0416 RMSE=1.0567 RMSE=1.0471 RMSE=1.0398 Epoch: 60/150