# Graph Classification

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import sys
import torch
from transformers.optimization import get_cosine_schedule_with_warmup
import torch.nn.functional as F
import torch_geometric.transforms as T
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.loader import DataLoader

import os
import random
import pandas as pd
import torch
import torch_geometric.transforms as T
from typing import Optional
import torch
from torch import Tensor
from torch_geometric.data import Data
from torch_geometric.data.datapipes import functional_transform
from torch_geometric.transforms import BaseTransform

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.datasets import WebKB
from torch_geometric.datasets import Actor
from torch_geometric.datasets import GNNBenchmarkDataset
from torch_geometric.datasets import TUDataset
from sklearn.metrics import r2_score
from torch_geometric.data import DataLoader
from torch_geometric.datasets import MoleculeNet
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.utils import to_networkx
from torch.nn import Linear
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import random
import pandas as pd

import time
import psutil
import torch
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")

### MUTAG

In [3]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 150
data_path = "/data/XXX/Pooling/1"

dataset_sparse = TUDataset(root=data_path, name="MUTAG", pre_filter=lambda data: data.num_nodes <= max_nodes, use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, TopKPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_TOPK(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_TOPK, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = TopKPooling(hidden_channels, ratio=0.9)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = TopKPooling(hidden_channels, ratio=0.9)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        #print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Early stopping at epoch 153 for seed 42
Early stopping at epoch 176 for seed 43
Early stopping at epoch 153 for seed 44
Average Time: 25.21 seconds
Var Time: 4.86 seconds
Average Memory: 28.00 MB
Average Best Val Acc: 0.8095
Std Best Test Acc: 0.0488
Average Test Acc: 0.8276


### DD

In [6]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="DD", pre_filter=lambda data: data.num_nodes <= max_nodes, use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_TOPK(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_TOPK, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = TopKPooling(hidden_channels, ratio=0.9)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = TopKPooling(hidden_channels, ratio=0.9)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        #print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Early stopping at epoch 174 for seed 43
Average Time: 44.80 seconds
Var Time: 8.52 seconds
Average Memory: 663.33 MB
Average Best Val Acc: 0.7087
Std Best Test Acc: 0.0552
Average Test Acc: 0.6907


### IMDB-MULTI

In [11]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
max_nodes = 500
data_path = "/data/XXX/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="IMDB-MULTI", transform=T.Compose([T.OneHotDegree(88)]), use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_TOPK(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_TOPK, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = TopKPooling(hidden_channels, ratio=0.9)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = TopKPooling(hidden_channels, ratio=0.9)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        #print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Average Time: 70.93 seconds
Var Time: 0.90 seconds
Average Memory: 116.00 MB
Average Best Val Acc: 0.5156
Std Best Test Acc: 0.0346
Average Test Acc: 0.4844


### COLLAB

In [2]:
from torch_geometric.datasets import TUDataset
import torch_geometric.transforms as T
from torch_geometric.data import DenseDataLoader
data_path = "/data1/Pooling/"

dataset_sparse = TUDataset(root=data_path, name="COLLAB", transform=T.Compose([T.OneHotDegree(491)]), use_node_attr=True)
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, TopKPooling
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import ToUndirected
from torch.nn import Linear
import torch.optim as optim
from torch_geometric.nn import global_mean_pool
from torch_geometric.utils import to_dense_batch
from torch_geometric.nn import BatchNorm

class HierarchicalGCN_TOPK(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_classes):
        super(HierarchicalGCN_TOPK, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.bn1 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool1 = TopKPooling(hidden_channels, ratio=0.7)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = torch.nn.BatchNorm1d(hidden_channels)
        self.pool2 = TopKPooling(hidden_channels, ratio=0.7)
        self.conv3 = GCNConv(hidden_channels, out_channels)
        self.bn3 = torch.nn.BatchNorm1d(out_channels)

        self.lin1 = torch.nn.Linear(out_channels, 32)
        self.lin2 = torch.nn.Linear(32, num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # First GCN and pooling layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        #x = self.bn1(x)
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)

        # Second GCN and pooling layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        #x = self.bn2(x)
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)

        # Third GCN layer
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        #x = self.bn3(x)

        # Mean pooling over the nodes
        x, mask = to_dense_batch(x, batch)
        x = x.mean(dim=1)

        # Fully connected layers
        x = self.lin1(x).relu()
        x = self.lin2(x)
        return F.log_softmax(x, dim=-1)


num_classes = dataset_sparse.num_classes
in_channels = dataset_sparse.num_features

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
    return total_loss / len(train_loader.dataset)

def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seeds = [42, 43, 44]
times = []
memories = []
best_val_accs = []
best_test_accs = []

early_stop_patience = 150
tolerance = 0.0001

for seed in seeds:
    set_seed(seed)

    dataset_sparse = dataset_sparse.shuffle()

    train_ratio = 0.7
    val_ratio = 0.15
    val_ratio = 0.15
    # Calculate the sizes of each subset
    num_total = len(dataset_sparse)
    num_train = int(num_total * train_ratio)
    num_val = int(num_total * val_ratio)
    num_test = num_total - num_train - num_val
    train_dataset = dataset_sparse[:num_train]
    val_dataset = dataset_sparse[num_train:num_train + num_val]
    test_dataset = dataset_sparse[num_train + num_val:]
    train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
    valid_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

    model = HierarchicalGCN_TOPK(in_channels=dataset_sparse.num_features, hidden_channels=64,out_channels=64, num_classes=dataset_sparse.num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    start_time = time.time()

    best_val_acc = 0
    epochs_no_improve = 0

    for epoch in range(1, 201):
        loss = train()
        val_acc = test(valid_loader)
        test_acc = test(test_loader)
        if val_acc > best_val_acc + tolerance:
            best_val_acc = val_acc
            best_test_acc = test_acc
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        #print(f'Seed: {seed}, Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')

        if epochs_no_improve >= early_stop_patience:
            print(f'Early stopping at epoch {epoch} for seed {seed}')
            break

    end_time = time.time()
    total_time = end_time - start_time
    memory_allocated = torch.cuda.memory_reserved(device) / (1024 ** 2)  # Convert to MB

    times.append(total_time)
    memories.append(memory_allocated)
    best_val_accs.append(best_val_acc)
    best_test_accs.append(best_test_acc)

    torch.cuda.empty_cache()

print(f'Average Time: {np.mean(times):.2f} seconds')
print(f'Var Time: {np.var(times):.2f} seconds')
print(f'Average Memory: {np.mean(memories):.2f} MB')
print(f'Average Best Val Acc: {np.mean(best_val_accs):.4f}')
print(f'Std Best Test Acc: {np.std(best_test_accs):.4f}')
print(f'Average Test Acc: {np.mean(best_test_accs):.4f}')

Average Time: 257.23 seconds
Var Time: 2690.07 seconds
Average Memory: 9379.33 MB
Average Best Val Acc: 0.7751
Std Best Test Acc: 0.0113
Average Test Acc: 0.7538


# Graph Regression

### QM7

In [2]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --topk_ratio=0.1 --pooling='TopK'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --topk_ratio=0.3 --pooling='TopK'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --topk_ratio=0.5 --pooling='TopK'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --topk_ratio=0.7 --pooling='TopK'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm7 --run_times=5 --patience=150 --epochs=500 --topk_ratio=0.9 --pooling='TopK'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: qm7, include 6832 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/500MAE=1550.1357 MAE=1548.9111 MAE=1544.3176 MAE=1541.3911 MAE=1538.6223 MAE=1535.8312 MAE=1532.3088 MAE=1527.5659 MAE=1524.3118 Epoch: 10/500MAE=1520.0950 MAE=1516.6558 MAE=1510.3894 MAE=1505.8710 MAE=1498.8002 MAE=1493.2722 MAE=1486.2177 MAE=1479.6128 MAE=1471.7745 MAE=1464.8569 Epoch: 20/500MAE=1455.5422 MAE=1447.3185 MAE=1439.3657 MAE=1430.2181 MAE=1417.7734 MAE=1410.8145 MAE=1398.8132 MAE=1388.2728 MAE=1373.7764 MAE=1366.6654 Epoch: 30/500MAE=1352.9722 MAE=1326.2085 MAE=1309.7583 MAE=1304.8232 MAE=1289.0845 MAE=1276.4613 MAE=1259.2396 MAE=1242.4783 MAE=1229.8147 MAE=1210.8958 Epoch: 40/500MAE=1190.4221 MAE=1177.0272 MAE=1166.2898 MAE=1199.1882 MAE=1135.8846 MAE=1096.9941 MAE=1044.5836 MAE=1055.4502 MAE=1061.4122 MAE=1013.4131 Epoch: 50/500MAE=1004.9944 MAE=995.3588 MAE=934.2

### QM8

In [1]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --topk_ratio=0.1 --pooling='TopK'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --topk_ratio=0.3 --pooling='TopK'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --topk_ratio=0.5 --pooling='TopK'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --topk_ratio=0.7 --pooling='TopK'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=qm8 --run_times=5 --patience=10 --epochs=150 --topk_ratio=0.9 --pooling='TopK'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: qm8, include 21783 molecules and 12 regression tasks

Splitting, finish 1/1  
Epoch: 1/150MAE=0.0902 MAE=0.0551 MAE=0.0433 MAE=0.0346 MAE=0.0348 MAE=0.0339 MAE=0.0320 MAE=0.0324 MAE=0.0304 Epoch: 10/150MAE=0.0306 MAE=0.0297 MAE=0.0291 MAE=0.0296 MAE=0.0310 MAE=0.0299 MAE=0.0313 MAE=0.0294 MAE=0.0291 MAE=0.0282 Epoch: 20/150MAE=0.0282 MAE=0.0280 MAE=0.0279 MAE=0.0278 MAE=0.0277 MAE=0.0277 MAE=0.0278 MAE=0.0277 MAE=0.0277 MAE=0.0279 Epoch: 30/150MAE=0.0275 MAE=0.0275 MAE=0.0276 MAE=0.0275 MAE=0.0274 MAE=0.0274 MAE=0.0274 MAE=0.0273 MAE=0.0273 MAE=0.0274 Epoch: 40/150MAE=0.0275 MAE=0.0274 MAE=0.0273 MAE=0.0273 MAE=0.0273 MAE=0.0274 MAE=0.0274 MAE=0.0274 MAE=0.0273 MAE=0.0274 Epoch: 50/150MAE=0.0274 MAE=0.0274 MAE=0.0274 MAE=0.0273 MAE=0.0279 
********************1's fold 1's run over********************
MAE: 0.028 +/- 0.000

Epoch: 1/150MAE=0.1366 MAE=0.0976 MAE=0.0

### BACE

In [1]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num 2 --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.1 --pooling='TopK'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num 2 --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.3 --pooling='TopK'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num 2 --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.5 --pooling='TopK'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num 2 --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.7 --pooling='TopK'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=bace --cuda_num 2 --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.9 --pooling='TopK'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: bace, include 1513 molecules and 1 classification tasks

Splitting, finish 1/1  
Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150Epoch: 40/150
********************1's fold 1's run over********************
AUROC: 0.776 +/- 0.000
AUPRC: 0.735 +/- 0.000

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150
********************1's fold 2's run over********************
AUROC: 0.772 +/- 0.004
AUPRC: 0.724 +/- 0.010

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150Epoch: 40/150
********************1's fold 3's run over********************
AUROC: 0.776 +/- 0.007
AUPRC: 0.735 +/- 0.018

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150
********************1's fold 4's run over********************
AUROC: 0.782 +/- 0.012
AUPRC: 0.745 +/- 0.023

Epoch: 1/150Epoch: 10/150Epoch: 20/150Epoch: 30/150
********************1's fold 5's run over********************
AUROC: 0.7

### ESOL

In [2]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.1 --pooling='TopK'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.3 --pooling='TopK'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.5 --pooling='TopK'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.7 --pooling='TopK'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=esol --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.9 --pooling='TopK'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: esol, include 1127 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/150RMSE=1.7704 RMSE=1.5174 RMSE=1.4740 RMSE=1.5567 RMSE=1.4802 RMSE=1.4888 RMSE=1.4905 RMSE=1.4422 RMSE=1.4523 Epoch: 10/150RMSE=1.4645 RMSE=1.3990 RMSE=1.3678 RMSE=1.4552 RMSE=1.4062 RMSE=1.3734 RMSE=1.3606 RMSE=1.3975 RMSE=1.4020 RMSE=1.4076 Epoch: 20/150RMSE=1.3628 RMSE=1.4620 RMSE=1.6449 RMSE=1.4167 RMSE=1.4597 RMSE=1.5001 RMSE=1.4915 RMSE=1.5172 RMSE=1.5140 RMSE=1.4895 Epoch: 30/150RMSE=1.5052 RMSE=1.5034 RMSE=1.5070 RMSE=1.5286 RMSE=1.5254 RMSE=1.5304 RMSE=1.5273 RMSE=1.5363 RMSE=1.5219 RMSE=1.5302 Epoch: 40/150RMSE=1.5365 RMSE=1.5977 
********************1's fold 1's run over********************
RMSE: 1.598 +/- 0.000

Epoch: 1/150RMSE=1.5523 RMSE=1.5152 RMSE=1.4807 RMSE=1.4174 RMSE=1.4962 RMSE=1.4658 RMSE=1.4411 RMSE=1.4230 RMSE=1.4473 Epoch: 10/150RMSE=1.4491 RMSE=1.4327

### Freesolv

In [1]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.1 --pooling='TopK'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.3 --pooling='TopK'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.5 --pooling='TopK'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.7 --pooling='TopK'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=freesolv --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.9 --pooling='TopK'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: freesolv, include 639 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/150RMSE=5.2529 RMSE=4.7835 RMSE=3.9368 RMSE=3.6258 RMSE=2.9207 RMSE=3.0483 RMSE=3.3643 RMSE=3.0781 RMSE=3.6829 Epoch: 10/150RMSE=3.5803 RMSE=3.5417 RMSE=3.4914 RMSE=2.9709 RMSE=3.5161 RMSE=3.5412 RMSE=3.5544 RMSE=3.5960 RMSE=3.5984 RMSE=3.6111 Epoch: 20/150RMSE=3.6224 RMSE=3.5889 RMSE=3.5509 RMSE=3.5584 RMSE=3.5880 RMSE=3.6018 RMSE=3.9856 
********************1's fold 1's run over********************
RMSE: 3.986 +/- 0.000

Epoch: 1/150RMSE=5.0937 RMSE=4.6670 RMSE=3.6626 RMSE=2.7539 RMSE=2.3496 RMSE=1.9296 RMSE=1.7342 RMSE=1.7721 RMSE=1.6790 Epoch: 10/150RMSE=1.7397 RMSE=1.7819 RMSE=1.6749 RMSE=1.5588 RMSE=2.1204 RMSE=1.7132 RMSE=1.7974 RMSE=1.6553 RMSE=1.6832 RMSE=1.9717 Epoch: 20/150RMSE=1.8210 RMSE=1.7127 RMSE=1.8297 RMSE=1.6860 RMSE=1.8096 RMSE=1.8456 RMSE=1.8398 RMSE=1.84

### Lipophilicity

In [3]:
print("++++++++++++++++++++++0.1++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.1 --pooling='TopK'
print("++++++++++++++++++++++0.3++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.3 --pooling='TopK'
print("++++++++++++++++++++++0.5++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.5 --pooling='TopK'
print("++++++++++++++++++++++0.7++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.7 --pooling='TopK'
print("++++++++++++++++++++++0.9++++++++++++++++++++++++")
!python /data1/Pooling/Graph_Pooling_Benchmark/Regression/run_regression.py --dataset=lipo --run_times=5 --patience=20 --epochs=150 --topk_ratio=0.9 --pooling='TopK'

++++++++++++++++++++++0.1++++++++++++++++++++++++
Generating dataset...
Packaging molecules, finish 100.0%

Current dataset: lipo, include 4200 molecules and 1 regression tasks

Splitting, finish 1/1  
Epoch: 1/150RMSE=1.3203 RMSE=1.2309 RMSE=1.2293 RMSE=1.2294 RMSE=1.2034 RMSE=1.1929 RMSE=1.2144 RMSE=1.1928 RMSE=1.2082 Epoch: 10/150RMSE=1.1783 RMSE=1.2219 RMSE=1.2130 RMSE=1.2108 RMSE=1.1951 RMSE=1.1694 RMSE=1.1686 RMSE=1.1575 RMSE=1.1688 RMSE=1.1944 Epoch: 20/150RMSE=1.1747 RMSE=1.1726 RMSE=1.1551 RMSE=1.1567 RMSE=1.1605 RMSE=1.1560 RMSE=1.1420 RMSE=1.1503 RMSE=1.1518 RMSE=1.1717 Epoch: 30/150RMSE=1.1558 RMSE=1.1657 RMSE=1.1729 RMSE=1.1705 RMSE=1.1671 RMSE=1.1669 RMSE=1.1707 RMSE=1.1655 RMSE=1.1686 RMSE=1.1712 Epoch: 40/150RMSE=1.1718 RMSE=1.1714 RMSE=1.1751 RMSE=1.1783 RMSE=1.1746 RMSE=1.1757 RMSE=1.1788 RMSE=1.0852 
********************1's fold 1's run over********************
RMSE: 1.085 +/- 0.000

Epoch: 1/150RMSE=1.2369 RMSE=1.1868 RMSE=1.2046 RMSE=1.1709 RMSE=1.1304 RMSE=1.1454 