<a href="https://colab.research.google.com/github/ghommidhWassim/GNN-variants/blob/main/nodeFeatCorrGCN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"
!pip install torchvision
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.6.0+cu124.html


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone
2.6.0+cu124
12.4
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (fr

In [11]:
# Standard libraries
import numpy as np
from scipy import sparse
import seaborn as sns
import pandas as pd
import time
import json
# Plotting libraries
import matplotlib.pyplot as plt
import networkx as nx
from matplotlib import cm
from IPython.display import Javascript  # Restrict height of output cell.

# PyTorch
import torch
import torch.nn.functional as F
from torch.nn import Linear
import torch.nn as nn
from torch_sparse import spmm
# import pyg_lib
import torch_sparse
from torch_geometric.utils import to_networkx
from sklearn.metrics import f1_score

# PyTorch geometric
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid,Amazon
from torch_geometric.loader import ClusterData, ClusterLoader
from torch_geometric.transforms import NormalizeFeatures, RandomNodeSplit
import torch_geometric.transforms as T
from torch_geometric.data import Data
from torch_geometric import seed_everything
from torch.profiler import profile, record_function, ProfilerActivity
import os.path as osp
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GraphSAGE
from sklearn.metrics import accuracy_score
from transformers import AutoModelForCausalLM
from torch.profiler import profile, record_function, ProfilerActivity

In [3]:
def clean_gpu_memory():
    """Cleans GPU memory without fully resetting the CUDA context"""
    import gc
    gc.collect()  # Python garbage collection
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # PyTorch cache
        torch.cuda.reset_peak_memory_stats()  # Reset tracking
        print(f"Memory after cleanup: {torch.cuda.memory_allocated()/1024**2:.2f} MB")

def f1_micro(data, mask):
    model.eval()
    out = model(data.x.to(device), data.edge_index.to(device))
    pred = out.argmax(dim=1)
    y_true = data.y[mask].cpu().numpy()
    y_pred = pred[mask.to(device)].cpu().numpy()
    return f1_score(y_true, y_pred, average='micro')


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Initial allocated memory: {torch.cuda.memory_allocated() / (1024**2):.2f} MB")

def dataset_load():
  print(f"Using device: {device}")
  dataset = Planetoid(root='data/Planetoid', name='PubMed', transform=NormalizeFeatures())
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

Initial allocated memory: 0.00 MB


In [5]:
num_features, data, num_classes, device, dataset = dataset_load()
print(f'Number of nodes:          {data.num_nodes}')
print(f'Number of edges:          {data.num_edges}')
print(f'Average node degree:      {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}')
print(f'Has isolated nodes:       {data.has_isolated_nodes()}')
print(f'Has self-loops:           {data.has_self_loops()}')
print(f'Is undirected:            {data.is_undirected()}')

Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!


Number of nodes:          19717
Number of edges:          88648
Average node degree:      4.50
Number of training nodes: 60
Training node label rate: 0.003
Has isolated nodes:       False
Has self-loops:           False
Is undirected:            True


In [6]:
class GCN(torch.nn.Module):
  def __init__(self, hidden_channels):
    super().__init__()
    self.conv1 = GCNConv(num_features, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, num_classes)

  def forward(self, x, edge_index):
    x = self.conv1(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv2(x, edge_index)
    return F.log_softmax(x, dim=-1)

In [7]:
def train(data, mask):
  model.train()
  optimizer.zero_grad()  # Clear gradients.
  out = model(data.x, data.edge_index)  # Perform a single forward pass.
  loss = criterion(out[mask], data.y[mask])  # Compute the loss solely based on the training nodes.
  loss.backward()  # Derive gradients.
  optimizer.step()  # Update parameters based on gradients.
  return loss

def test(data, mask):
  model.eval()
  out = model(data.x.to(device), data.edge_index.to(device))
  pred = out.argmax(dim=1)
  correct = pred[mask.to(device)] == data.y[mask.to(device)]
  acc = int(correct.sum()) / int(mask.sum())
  return acc

In [14]:
import torch
from torch_geometric.utils import subgraph, to_networkx
import numpy as np
from scipy.stats import pearsonr

def compute_corr_matrix(features):
    # features: [num_nodes, num_features] numpy array
    n = features.shape[0]
    corr_matrix = np.ones((n, n))
    for i in range(n):
        for j in range(i + 1, n):
            corr, _ = pearsonr(features[i], features[j])
            corr_matrix[i, j] = corr
            corr_matrix[j, i] = corr
    return corr_matrix

def select_least_correlated_nodes(nodes_idx, features_np, num_select):
    # nodes_idx: list of node indices (int) to consider neighbors for
    # features_np: numpy array of features of those nodes
    n = len(nodes_idx)
    corr_matrix = compute_corr_matrix(features_np)

    selected_indices = set()
    for i in range(n):
        # sort neighbors by ascending correlation (less correlated first)
        corr_values = corr_matrix[i]
        sorted_indices = np.argsort(np.abs(corr_values))  # abs because correlation ranges [-1,1]
        # remove self index
        sorted_indices = sorted_indices[sorted_indices != i]

        # select top-k least correlated neighbors (or all available)
        k = min(num_select, len(sorted_indices))
        for idx in sorted_indices[:k]:
            selected_indices.add(nodes_idx[idx])
    return list(selected_indices)

def custom_partition(data, num_neighbors=4, num_hops=2):
    all_nodes = set(range(data.num_nodes))
    used_nodes = set()
    subgraphs = []

    x_np = data.x.cpu().numpy()

    while len(used_nodes) < data.num_nodes:
        # Remaining nodes to pick from
        candidate_nodes = list(all_nodes - used_nodes)
        if not candidate_nodes:
            break

        # Pick a random seed node not used yet
        seed = np.random.choice(candidate_nodes)
        current_hop_nodes = [seed]
        selected_nodes = set([seed])  # Include starting node

        for hop in range(num_hops):
            neighbors = []
            # get neighbors of current hop nodes
            for node in current_hop_nodes:
                neigh = data.edge_index[1][data.edge_index[0] == node].cpu().numpy()
                neighbors.extend(neigh.tolist())
            neighbors = list(set(neighbors) - used_nodes - selected_nodes)
            if not neighbors:
                break

            # get features of neighbors and current hop nodes combined
            nodes_to_consider = current_hop_nodes + neighbors
            feats = x_np[nodes_to_consider]

            # select least correlated neighbors among neighbors
            selected_neighbors = select_least_correlated_nodes(neighbors, x_np[neighbors], num_neighbors)

            selected_nodes.update(selected_neighbors)
            current_hop_nodes = selected_neighbors

        used_nodes.update(selected_nodes)

        # Build subgraph with selected nodes
        sub_nodes = list(selected_nodes)
        edge_index_sub, _ = subgraph(sub_nodes, data.edge_index, relabel_nodes=True, num_nodes=data.num_nodes)

        x_sub = data.x[sub_nodes]
        y_sub = data.y[sub_nodes]
        mask_sub = {}
        for key in ['train_mask', 'val_mask', 'test_mask']:
            if hasattr(data, key):
                mask = getattr(data, key)
                mask_sub[key] = mask[sub_nodes]

        subgraph_data = Data(
            x=x_sub,
            edge_index=edge_index_sub,
            y=y_sub,
            **mask_sub
        )
        subgraphs.append(subgraph_data)

    return subgraphs


In [48]:
subgraphs = custom_partition(data, num_neighbors=5, num_hops=2)

from torch_geometric.loader import DataLoader
loader = DataLoader(subgraphs, batch_size=128, shuffle=True)


In [49]:
def avg_neighbors_per_node(subgraphs):
    total_neighbors = 0
    total_nodes = 0

    for g in subgraphs:
        edge_index = g.edge_index
        num_nodes = g.num_nodes
        num_edges = edge_index.size(1)
        # Each edge connects two nodes, but we count for undirected graph
        total_neighbors += num_edges
        total_nodes += num_nodes

    avg = total_neighbors / total_nodes
    print(f"Average neighbors per node: {avg:.2f}")
    return avg
avg_neighbors_per_node(subgraphs)
print(f"Number of subgraphs: {len(subgraphs)}")


Average neighbors per node: 1.71
Number of subgraphs: 1071


In [None]:
data = data.cpu()
cluster_data = ClusterData(data, num_parts=128)
train_loader = ClusterLoader(cluster_data, batch_size=32, shuffle=True)


Computing METIS partitioning...
Done!


In [43]:
def train_batch(loader):
    model.train()
    total_loss = 0
    i = 1
    for sub_data in loader:
        sub_data = sub_data.to(device)
        batch_mem = sum(t.element_size() * t.nelement() for t in [sub_data.x, sub_data.edge_index, sub_data.y])
        #print(f"Batch {i}: {batch_mem/1024**2:.2f} MB cluster data")

        out = model(sub_data.x, sub_data.edge_index)
        #print(f"  + Forward pass: {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")
        loss = criterion(out[sub_data.train_mask], sub_data.y[sub_data.train_mask])
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
        i += 1
    return total_loss / i  # Optional: average loss


In [50]:

model = GCN(hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss().to(device)

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
data=data.to(device)
res=[]
for i in range(10):
  clean_gpu_memory()
  start_time= time.time()
  for epoch in range(1, 101):
    loss = train_batch(loader)
    if epoch % 10 == 0:
      train_acc = test(data, data.train_mask)
      val_acc = test(data, data.val_mask)
      val_f1 = f1_micro(data, data.val_mask)
      print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val F1 (micro): {val_f1:.4f}')
  test_acc = test(data, data.test_mask)
  res.append(test_acc)
  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")


<IPython.core.display.Javascript object>

Memory after cleanup: 50.07 MB
Epoch: 010, Train Acc: 0.9643, Val Acc: 0.6960, Val F1 (micro): 0.6960
Epoch: 020, Train Acc: 0.9929, Val Acc: 0.7460, Val F1 (micro): 0.7460
Epoch: 030, Train Acc: 0.9929, Val Acc: 0.7180, Val F1 (micro): 0.7180
Epoch: 040, Train Acc: 0.9786, Val Acc: 0.7080, Val F1 (micro): 0.7080
Epoch: 050, Train Acc: 0.9857, Val Acc: 0.7580, Val F1 (micro): 0.7580
Epoch: 060, Train Acc: 0.9857, Val Acc: 0.7520, Val F1 (micro): 0.7520
Epoch: 070, Train Acc: 0.9929, Val Acc: 0.7360, Val F1 (micro): 0.7360
Epoch: 080, Train Acc: 0.9929, Val Acc: 0.7320, Val F1 (micro): 0.7320
Epoch: 090, Train Acc: 0.9929, Val Acc: 0.7200, Val F1 (micro): 0.7200
Epoch: 100, Train Acc: 0.9929, Val Acc: 0.7320, Val F1 (micro): 0.7320
Training time: 10.86 seconds
Memory after cleanup: 49.72 MB
Epoch: 010, Train Acc: 0.9929, Val Acc: 0.7240, Val F1 (micro): 0.7240
Epoch: 020, Train Acc: 0.9929, Val Acc: 0.7140, Val F1 (micro): 0.7140
Epoch: 030, Train Acc: 0.9929, Val Acc: 0.7200, Val F1 (m

In [51]:
print(res)
print(sum(res)/10)

[0.753, 0.754, 0.741, 0.733, 0.736, 0.718, 0.748, 0.762, 0.724, 0.733]
0.7402


In [52]:
test_acc = test(data, data.test_mask)

test_f1 = f1_micro(data, data.test_mask)
print(f'Test F1 Score (micro): {test_f1:.4f}')
print(f'Test test accuracy (micro):  {test_acc:.4f}')

Test F1 Score (micro): 0.7330
Test test accuracy (micro):  0.7330


In [53]:
print(f"Allocated memory after tensor creation: {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory after tensor creation: {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")


Allocated memory after tensor creation: 49.72 MB
Reserved memory after tensor creation: 90.00 MB
Peak allocated memory: 57.77 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "clusterGCN",
    "accuracy": test_acc,
    "f1_micro":test_f1,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":peak_memory_mb
}

with open("clusterGCN_pubmed_results.json", "w") as f:
    json.dump(metrics, f)

**cora dataset**

In [54]:
clean_gpu_memory()

def dataset_load():
  print(f"Using device: {device}")
  dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

num_features, data, num_classes, device, dataset = dataset_load()
subgraphs = custom_partition(data, num_neighbors=5, num_hops=2)

loader = DataLoader(subgraphs, batch_size=128, shuffle=True)


model = GCN(hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss().to(device)

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
data=data.to(device)

res=[]
for i in range(10):
  clean_gpu_memory()
  start_time= time.time()
  for epoch in range(1, 101):
    loss = train_batch(loader)
    if epoch % 10 == 0:
      train_acc = test(data, data.train_mask)
      val_acc = test(data, data.val_mask)
      val_f1 = f1_micro(data, data.val_mask)
      print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val F1 (micro): {val_f1:.4f}')

  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  test_acc = test(data, data.test_mask)
  res.append(test_acc)


test_acc = test(data, data.test_mask)

test_f1 = f1_micro(data, data.test_mask)
print(f'Test F1 Score (micro): {test_f1:.4f}')
print(f'Test test accuracy (micro):  {test_acc:.4f}')

Memory after cleanup: 49.72 MB
Using device: cuda


<IPython.core.display.Javascript object>

Memory after cleanup: 66.54 MB
Epoch: 010, Train Acc: 0.9643, Val Acc: 0.7800, Val F1 (micro): 0.7800
Epoch: 020, Train Acc: 0.9786, Val Acc: 0.7860, Val F1 (micro): 0.7860
Epoch: 030, Train Acc: 0.9786, Val Acc: 0.7820, Val F1 (micro): 0.7820
Epoch: 040, Train Acc: 0.9929, Val Acc: 0.7720, Val F1 (micro): 0.7720
Epoch: 050, Train Acc: 0.9857, Val Acc: 0.7720, Val F1 (micro): 0.7720
Epoch: 060, Train Acc: 0.9929, Val Acc: 0.7800, Val F1 (micro): 0.7800
Epoch: 070, Train Acc: 0.9929, Val Acc: 0.7780, Val F1 (micro): 0.7780
Epoch: 080, Train Acc: 1.0000, Val Acc: 0.7780, Val F1 (micro): 0.7780
Epoch: 090, Train Acc: 1.0000, Val Acc: 0.7760, Val F1 (micro): 0.7760
Epoch: 100, Train Acc: 0.9929, Val Acc: 0.7800, Val F1 (micro): 0.7800
Training time: 10.51 seconds
Memory after cleanup: 49.67 MB
Epoch: 010, Train Acc: 0.9929, Val Acc: 0.7760, Val F1 (micro): 0.7760
Epoch: 020, Train Acc: 0.9857, Val Acc: 0.7500, Val F1 (micro): 0.7500
Epoch: 030, Train Acc: 0.9929, Val Acc: 0.7680, Val F1 (m

In [55]:
print(res)
print(sum(res)/10)

[0.779, 0.771, 0.772, 0.78, 0.756, 0.769, 0.772, 0.759, 0.75, 0.763]
0.7671000000000001


In [56]:
print(f"Allocated memory after tensor creation: {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory after tensor creation: {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")


Allocated memory after tensor creation: 49.67 MB
Reserved memory after tensor creation: 90.00 MB
Peak allocated memory: 57.72 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "clusterGCN",
    "accuracy": test_acc,
    "f1_micro":test_f1,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":peak_memory_mb
}

with open("clusterGCN_Cora_results.json", "w") as f:
    json.dump(metrics, f)

**citeser dataset**

In [57]:
clean_gpu_memory()

def dataset_load():
  print(f"Using device: {device}")
  dataset = Planetoid(root='data/Planetoid', name='CiteSeer', transform=NormalizeFeatures())
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

num_features, data, num_classes, device, dataset = dataset_load()
subgraphs = custom_partition(data, num_neighbors=5, num_hops=2)

loader = DataLoader(subgraphs, batch_size=128, shuffle=True)


model = GCN(hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss().to(device)

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
data=data.to(device)
res=[]
for i in range(10):
  clean_gpu_memory()
  start_time= time.time()
  for epoch in range(1, 101):
    loss = train_batch(loader)
    if epoch % 10 == 0:
      train_acc = test(data, data.train_mask)
      val_acc = test(data, data.val_mask)
      val_f1 = f1_micro(data, data.val_mask)
      print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val F1 (micro): {val_f1:.4f}')

  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  test_acc = test(data, data.test_mask)
  res.append(test_acc)


test_acc = test(data, data.test_mask)

test_f1 = f1_micro(data, data.test_mask)
print(f'Test F1 Score (micro): {test_f1:.4f}')
print(f'Test test accuracy (micro):  {test_acc:.4f}')

Memory after cleanup: 49.67 MB
Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!
  corr, _ = pearsonr(features[i], features[j])


<IPython.core.display.Javascript object>

Memory after cleanup: 130.57 MB
Epoch: 010, Train Acc: 0.9667, Val Acc: 0.6660, Val F1 (micro): 0.6660
Epoch: 020, Train Acc: 0.9750, Val Acc: 0.6680, Val F1 (micro): 0.6680
Epoch: 030, Train Acc: 0.9833, Val Acc: 0.7040, Val F1 (micro): 0.7040
Epoch: 040, Train Acc: 0.9917, Val Acc: 0.6820, Val F1 (micro): 0.6820
Epoch: 050, Train Acc: 0.9750, Val Acc: 0.6900, Val F1 (micro): 0.6900
Epoch: 060, Train Acc: 0.9917, Val Acc: 0.6860, Val F1 (micro): 0.6860
Epoch: 070, Train Acc: 0.9917, Val Acc: 0.6860, Val F1 (micro): 0.6860
Epoch: 080, Train Acc: 0.9917, Val Acc: 0.6860, Val F1 (micro): 0.6860
Epoch: 090, Train Acc: 0.9917, Val Acc: 0.6740, Val F1 (micro): 0.6740
Epoch: 100, Train Acc: 0.9833, Val Acc: 0.6700, Val F1 (micro): 0.6700
Training time: 16.56 seconds
Memory after cleanup: 117.05 MB
Epoch: 010, Train Acc: 0.9917, Val Acc: 0.6880, Val F1 (micro): 0.6880
Epoch: 020, Train Acc: 0.9917, Val Acc: 0.6840, Val F1 (micro): 0.6840
Epoch: 030, Train Acc: 0.9917, Val Acc: 0.6720, Val F1 

In [58]:
print(res)
print(sum(res)/10)
print(f"Allocated memory after tensor creation: {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory after tensor creation: {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")


[0.674, 0.662, 0.657, 0.671, 0.644, 0.595, 0.666, 0.63, 0.668, 0.649]
0.6516000000000001
Allocated memory after tensor creation: 117.05 MB
Reserved memory after tensor creation: 156.00 MB
Peak allocated memory: 127.21 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "clusterGCN",
    "accuracy": test_acc,
    "f1_micro":test_f1,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":peak_memory_mb
}

with open("clusterGCN_CiteSeer_results.json", "w") as f:
    json.dump(metrics, f)

**Amazon dataset**

In [59]:
clean_gpu_memory()

def dataset_load():
  print(f"Using device: {device}")
  dataset = Amazon(
        root='data/Amazon',
        name='Computers',
        transform=T.Compose([
        NormalizeFeatures(),          # feature‑wise ℓ₂ normalisation
        RandomNodeSplit(              # ⇦ add a split transform
                split='train_rest',       # 10% val, 10% test by default
                num_val=0.1,
                num_test=0.1,
                num_splits=1,
            )
        ])
    )
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

num_features, data, num_classes, device, dataset = dataset_load()
subgraphs = custom_partition(data, num_neighbors=5, num_hops=2)

loader = DataLoader(subgraphs, batch_size=128, shuffle=True)

model = GCN(hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss().to(device)

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
data=data.to(device)

res=[]
for i in range(10):
  clean_gpu_memory()
  start_time= time.time()
  for epoch in range(1, 101):
    loss = train_batch(loader)
    if epoch % 10 == 0:
      train_acc = test(data, data.train_mask)
      val_acc = test(data, data.val_mask)
      val_f1 = f1_micro(data, data.val_mask)
      print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Val F1 (micro): {val_f1:.4f}')

  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  test_acc = test(data, data.test_mask)
  res.append(test_acc)

test_acc = test(data, data.test_mask)

test_f1 = f1_micro(data, data.test_mask)
print(f'Test F1 Score (micro): {test_f1:.4f}')
print(f'Test test accuracy (micro):  {test_acc:.4f}')

Memory after cleanup: 117.05 MB
Using device: cuda


Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_computers.npz
Processing...
Done!
  corr, _ = pearsonr(features[i], features[j])


<IPython.core.display.Javascript object>

Memory after cleanup: 166.81 MB
Epoch: 010, Train Acc: 0.6537, Val Acc: 0.6793, Val F1 (micro): 0.6793
Epoch: 020, Train Acc: 0.7433, Val Acc: 0.7549, Val F1 (micro): 0.7549
Epoch: 030, Train Acc: 0.7874, Val Acc: 0.7920, Val F1 (micro): 0.7920
Epoch: 040, Train Acc: 0.8193, Val Acc: 0.8160, Val F1 (micro): 0.8160
Epoch: 050, Train Acc: 0.8464, Val Acc: 0.8422, Val F1 (micro): 0.8422
Epoch: 060, Train Acc: 0.8504, Val Acc: 0.8451, Val F1 (micro): 0.8451
Epoch: 070, Train Acc: 0.8672, Val Acc: 0.8655, Val F1 (micro): 0.8655
Epoch: 080, Train Acc: 0.8738, Val Acc: 0.8647, Val F1 (micro): 0.8647
Epoch: 090, Train Acc: 0.8876, Val Acc: 0.8785, Val F1 (micro): 0.8785
Epoch: 100, Train Acc: 0.8787, Val Acc: 0.8705, Val F1 (micro): 0.8705
Training time: 24.53 seconds
Memory after cleanup: 113.57 MB
Epoch: 010, Train Acc: 0.8912, Val Acc: 0.8749, Val F1 (micro): 0.8749
Epoch: 020, Train Acc: 0.8902, Val Acc: 0.8778, Val F1 (micro): 0.8778
Epoch: 030, Train Acc: 0.8977, Val Acc: 0.8865, Val F1 

In [60]:
print(res)
print(sum(res)/10)
print(f"Allocated memory after tensor creation: {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory after tensor creation: {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")


[0.8552727272727273, 0.885090909090909, 0.8887272727272727, 0.8836363636363637, 0.901090909090909, 0.9032727272727272, 0.8974545454545455, 0.9054545454545454, 0.9061818181818182, 0.896]
0.8922181818181819
Allocated memory after tensor creation: 113.57 MB
Reserved memory after tensor creation: 538.00 MB
Peak allocated memory: 378.43 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "clusterGCN",
    "accuracy": test_acc,
    "f1_micro":test_f1,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":peak_memory_mb
}

with open("clusterGCN_Amazon_results.json", "w") as f:
    json.dump(metrics, f)