<a href="https://colab.research.google.com/github/ghommidhWassim/GNN-variants/blob/main/graphSAGE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"
!pip install torchvision
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.6.0+cu124.html


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone
2.6.0+cu124
12.4
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (fr

In [2]:
# Standard libraries
import numpy as np
from scipy import sparse
import seaborn as sns
import pandas as pd
import time
# Plotting libraries
import matplotlib.pyplot as plt
import networkx as nx
from matplotlib import cm
from IPython.display import Javascript  # Restrict height of output cell.

# PyTorch
import torch
import torch.nn.functional as F
from torch.nn import Linear
import torch.nn as nn
from torch_sparse import spmm
# import pyg_lib
import torch_sparse

# PyTorch geometric
from torch_geometric.nn import GCNConv,SAGEConv
from torch_geometric.datasets import Planetoid,Amazon
from torch_geometric.transforms import NormalizeFeatures, RandomNodeSplit
import torch_geometric.transforms as T
from torch_geometric import seed_everything
import torch
import os.path as osp
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import NeighborLoader
import json
from sklearn.metrics import f1_score


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def dataset_load():
  print(f"Using device: {device}")
  dataset = Planetoid(root='data/Planetoid', name='PubMed', transform=NormalizeFeatures())
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

def clean_gpu_memory():
    """Cleans GPU memory without fully resetting the CUDA context"""
    import gc
    gc.collect()  # Python garbage collection
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # PyTorch cache
        torch.cuda.reset_peak_memory_stats()  # Reset tracking
        print(f"Memory after cleanup: {torch.cuda.memory_allocated()/1024**2:.2f} MB")

num_features, data, num_classes, device, dataset = dataset_load()
print(f'Number of nodes:          {data.num_nodes}')
print(f'Number of edges:          {data.num_edges}')
print(f'Average node degree:      {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}')
print(f'Has isolated nodes:       {data.has_isolated_nodes()}')
print(f'Has self-loops:           {data.has_self_loops()}')
print(f'Is undirected:            {data.is_undirected()}')
num_features

Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!


Number of nodes:          19717
Number of edges:          88648
Average node degree:      4.50
Number of training nodes: 60
Training node label rate: 0.003
Has isolated nodes:       False
Has self-loops:           False
Is undirected:            True


500

In [4]:
print(f"Allocated memory : {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory : {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")


Allocated memory : 39.56 MB
Reserved memory : 62.00 MB
Peak allocated memory: 45.02 MB


In [5]:
train_loader = NeighborLoader(
    data,
    input_nodes=data.train_mask,
    num_neighbors=[10, 10],  # s = 10 per layer (2 layers)
    batch_size=128,
    shuffle=True,
)


In [6]:
print(f"Allocated memory : {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory : {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")

Allocated memory : 39.56 MB
Reserved memory : 62.00 MB
Peak allocated memory: 45.02 MB


In [7]:
class testGraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout=0.5):
        super().__init__()
        self.num_layers = num_layers
        self.dropout = dropout

        self.convs = torch.nn.ModuleList()
        # First layer: in_channels -> hidden_channels
        self.convs.append(SAGEConv(in_channels, hidden_channels))
        # Intermediate layers: hidden_channels -> hidden_channels
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))
        # Last layer: hidden_channels -> out_channels (optional, if no linear layers)
        self.convs.append(SAGEConv(hidden_channels, out_channels))

        # Optional MLP head (for further transformation)
        self.lin1 = Linear(out_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs[:-1]:
            x = F.relu(conv(x, edge_index))
            x = F.dropout(x, p=self.dropout, training=self.training)

        # Last layer (no ReLU/Dropout for classification)
        x = self.convs[-1](x, edge_index)

        # Optional MLP head
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lin2(x)

        return F.log_softmax(x, dim=1)

In [8]:
hidden_channels = 64

model = testGraphSAGE(
    in_channels=dataset.num_features,  # Input feature dimension
    hidden_channels=64,               # Hidden layer size
    num_layers=2,                     # Number of SAGEConv layers
    out_channels=dataset.num_classes,  # Output dimension (number of classes)
    dropout=0.5,                      # Dropout rate                         # Jumping Knowledge (optional: "cat", "max", "lstm")
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()  # Negative Log Likelihood (used with log_softmax)


In [9]:
# Minibatch training function
def train():
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = criterion(out[batch.train_mask], batch.y[batch.train_mask])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

# Full-batch evaluation (for simplicity)
def evaluate(mask):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        pred = out[mask].argmax(dim=1)
        acc = (pred == data.y[mask]).float().mean().item()
    return acc

In [10]:
# Training loop

res=[]
for i in range(10):
  start_time = time.time()

  allocated_mem = []
  reserved_mem = []
  peak_mem = []
  for epoch in range(1, 101):
      #torch.cuda.reset_peak_memory_stats()  # reset peak tracking
      loss = train()
      val_acc = evaluate(data.val_mask)
      print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}")

      alloc = torch.cuda.memory_allocated() / 1024**2
      reserv = torch.cuda.memory_reserved() / 1024**2
      peak = torch.cuda.max_memory_allocated() / 1024**2

      allocated_mem.append(alloc)
      reserved_mem.append(reserv)
      peak_mem.append(peak)

  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  res.append(evaluate(data.test_mask))

# Test accuracy
test_acc = evaluate(data.test_mask)
print(f"Test Accuracy: {test_acc:.4f}")


model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)  # predicted class indices
    true_labels = data.y

    # Apply mask to get only test node predictions
    test_preds = preds[data.test_mask].cpu().numpy()
    test_labels = true_labels[data.test_mask].cpu().numpy()

    # Compute F1 score (micro)
    f1_micro = f1_score(test_labels, test_preds, average='micro')
    print(f"F1 Score (micro): {f1_micro:.4f}")


Epoch: 001, Loss: 1.0928, Val Acc: 0.1960
Epoch: 002, Loss: 1.1312, Val Acc: 0.3880
Epoch: 003, Loss: 1.1230, Val Acc: 0.6020
Epoch: 004, Loss: 1.0677, Val Acc: 0.5160
Epoch: 005, Loss: 1.0841, Val Acc: 0.5300
Epoch: 006, Loss: 1.0520, Val Acc: 0.6840
Epoch: 007, Loss: 1.0427, Val Acc: 0.6740
Epoch: 008, Loss: 1.0127, Val Acc: 0.6260
Epoch: 009, Loss: 0.9654, Val Acc: 0.6000
Epoch: 010, Loss: 0.9272, Val Acc: 0.5880
Epoch: 011, Loss: 0.8416, Val Acc: 0.5980
Epoch: 012, Loss: 0.7853, Val Acc: 0.6240
Epoch: 013, Loss: 0.7431, Val Acc: 0.6480
Epoch: 014, Loss: 0.6535, Val Acc: 0.6700
Epoch: 015, Loss: 0.5705, Val Acc: 0.6960
Epoch: 016, Loss: 0.4701, Val Acc: 0.7060
Epoch: 017, Loss: 0.4148, Val Acc: 0.7320
Epoch: 018, Loss: 0.3565, Val Acc: 0.7440
Epoch: 019, Loss: 0.3549, Val Acc: 0.7340
Epoch: 020, Loss: 0.2946, Val Acc: 0.7260
Epoch: 021, Loss: 0.2482, Val Acc: 0.7340
Epoch: 022, Loss: 0.1713, Val Acc: 0.7600
Epoch: 023, Loss: 0.1402, Val Acc: 0.7880
Epoch: 024, Loss: 0.1086, Val Acc:

In [11]:
print(res)
print(sum(res)/10)

[0.76500004529953, 0.7660000324249268, 0.7460000514984131, 0.7460000514984131, 0.7710000276565552, 0.7870000600814819, 0.7760000228881836, 0.7400000095367432, 0.7290000319480896, 0.7720000147819519]
0.7598000347614289


In [None]:
test_acc = evaluate(data.test_mask)
print(f"Test Accuracy: {test_acc:.4f}")


Test Accuracy: 0.7120


In [None]:
print(f"Allocated memory : {torch.cuda.memory_allocated() / (1024**2):.2f} MB")
print(f"Reserved memory : {torch.cuda.memory_reserved() / (1024**2):.2f} MB")
print(f"Peak allocated memory: {torch.cuda.max_memory_allocated() / (1024**2):.2f} MB")


Allocated memory : 57.19 MB
Reserved memory : 330.00 MB
Peak allocated memory: 303.26 MB


In [None]:
summ=torch.cuda.memory_summary()

In [None]:
import math

def calculate_graphsage_memory(
    batch_size: int,
    hidden_dim: int,
    num_layers: int,
    num_neighbors: int,
    num_features: int = None,
    num_classes: int = None,
    dtype_bytes: int = 4,  # float32=4, float16=2
    verbose: bool = True
) -> dict:
    """
    Calculate memory consumption for GraphSAGE according to LADIES paper's formulation:
    - Embedding storage: O(b * K * s^{L-1})
    - Weight matrices: O(L * K^2)

    Where:
        b = batch_size,
        K = hidden_dim,
        s = num_neighbors (per layer),
        L = num_layers

    Args:
        batch_size (int): Number of seed nodes (b).
        hidden_dim (int): Hidden dimension size (K).
        num_layers (int): Number of layers (L).
        num_neighbors (int): Neighbor sample size per layer (s).
        num_features (int): Input feature dimension (optional, for first layer weights).
        num_classes (int): Output dimension (optional, for last layer weights).
        dtype_bytes (int): Bytes per parameter (4 for float32, 2 for float16).
        verbose (bool): Print detailed breakdown.

    Returns:
        dict: Memory components in bytes and MB.
    """
    # --- Embedding Storage (O(b * K * s^{L-1})) ---
    embedding_memory_bytes = batch_size * hidden_dim * (num_neighbors ** (num_layers - 1)) * dtype_bytes

    # --- Weight Matrices (O(L * K^2)) ---
    # First layer: (num_features -> hidden_dim) if provided
    if num_features is not None:
        weight_memory_bytes = num_features * hidden_dim * dtype_bytes
        remaining_layers = num_layers - 1
    else:
        weight_memory_bytes = 0
        remaining_layers = num_layers

    # Hidden layers: L-1 layers of (K x K)
    weight_memory_bytes += remaining_layers * (hidden_dim * hidden_dim) * dtype_bytes

    # Output layer: (K -> num_classes) if provided
    if num_classes is not None:
        weight_memory_bytes += hidden_dim * num_classes * dtype_bytes

    total_memory_bytes = embedding_memory_bytes + weight_memory_bytes

    # Convert to MB
    embedding_memory_mb = embedding_memory_bytes / (1024 ** 2)
    weight_memory_mb = weight_memory_bytes / (1024 ** 2)
    total_memory_mb = total_memory_bytes / (1024 ** 2)

    if verbose:
        print("\n=== GraphSAGE Memory Breakdown ===")
        print(f"Batch size (b): {batch_size}")
        print(f"Hidden dim (K): {hidden_dim}")
        print(f"Layers (L): {num_layers}")
        print(f"Neighbors (s): {num_neighbors}")
        print(f"Data type: {'float32' if dtype_bytes == 4 else 'float16'}")
        print("\nEmbedding Storage:")
        print(f"- Formula: O(b * K * s^(L-1)) = {batch_size} * {hidden_dim} * {num_neighbors}^({num_layers}-1)")
        print(f"- Memory: {embedding_memory_bytes:,} bytes ({embedding_memory_mb:.2f} MB)")
        print("\nWeight Matrices:")
        print(f"- Formula: O(L * K^2) = {num_layers} * {hidden_dim}^2")
        if num_features is not None:
            print(f"  (First layer: {num_features} -> {hidden_dim})")
        if num_classes is not None:
            print(f"  (Last layer: {hidden_dim} -> {num_classes})")
        print(f"- Memory: {weight_memory_bytes:,} bytes ({weight_memory_mb:.2f} MB)")
        print("\nTotal Memory:")
        print(f"- Total: {total_memory_bytes:,} bytes ({total_memory_mb:.2f} MB)")

    return {
        'embedding_bytes': embedding_memory_bytes,
        'weight_bytes': weight_memory_bytes,
        'total_bytes': total_memory_bytes,
        'embedding_mb': embedding_memory_mb,
        'weight_mb': weight_memory_mb,
        'total_mb': total_memory_mb,
        'formula': {
            'embedding': f'O(b*K*s^(L-1)) = {batch_size}*{hidden_dim}*{num_neighbors}^({num_layers-1})',
            'weights': f'O(L*K^2) = {num_layers}*{hidden_dim}^2'
        }
    }


# Example usage
if __name__ == "__main__":
    # PubMed dataset example
    memory_stats = calculate_graphsage_memory(
        batch_size=128,
        hidden_dim=64,
        num_layers=2,
        num_neighbors=10,
        num_features=500,  # PubMed node features
        num_classes=3,     # PubMed classes
        dtype_bytes=4      # float32
    )


=== GraphSAGE Memory Breakdown ===
Batch size (b): 128
Hidden dim (K): 64
Layers (L): 2
Neighbors (s): 10
Data type: float32

Embedding Storage:
- Formula: O(b * K * s^(L-1)) = 128 * 64 * 10^(2-1)
- Memory: 327,680 bytes (0.31 MB)

Weight Matrices:
- Formula: O(L * K^2) = 2 * 64^2
  (First layer: 500 -> 64)
  (Last layer: 64 -> 3)
- Memory: 145,152 bytes (0.14 MB)

Total Memory:
- Total: 472,832 bytes (0.45 MB)


In [None]:
memory_stats["embedding_mb"]

0.3125

In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "graphSAGE",
    "accuracy": test_acc,
    "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "embedding_storage":memory_stats["embedding_mb"],
    "Weight_Matrices":memory_stats["weight_mb"],
    "Total_Memory":memory_stats["total_mb"]
}

with open("graphSAGE_pubmed_results.json", "w") as f:
    json.dump(metrics, f)

**CORA** **Dataset**

In [12]:
clean_gpu_memory()
def dataset_load():
  print(f"Using device: {device}")
  dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

num_features, data, num_classes, device, dataset = dataset_load()
print(f'Number of features:       {num_features}')
print(f'Number of classes:       {num_classes}')

print(f'Number of nodes:          {data.num_nodes}')
print(f'Number of edges:          {data.num_edges}')
print(f'Average node degree:      {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}')
print(f'Has isolated nodes:       {data.has_isolated_nodes()}')
print(f'Has self-loops:           {data.has_self_loops()}')
print(f'Is undirected:            {data.is_undirected()}')

train_loader = NeighborLoader(
    data,
    input_nodes=data.train_mask,
    num_neighbors=[10, 10],  # s = 10 per layer (2 layers)
    batch_size=128,
    shuffle=True,
)
hidden_channels = 64

model = testGraphSAGE(
    in_channels=dataset.num_features,  # Input feature dimension
    hidden_channels=64,               # Hidden layer size
    num_layers=2,                     # Number of SAGEConv layers
    out_channels=dataset.num_classes,  # Output dimension (number of classes)
    dropout=0.5,                      # Dropout rate                         # Jumping Knowledge (optional: "cat", "max", "lstm")
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()  # Negative Log Likelihood (used with log_softmax)
# Training loop


res=[]
for i in range(10):
  clean_gpu_memory()
  start_time = time.time()

  allocated_mem = []
  reserved_mem = []
  peak_mem = []
  for epoch in range(1, 101):
      #torch.cuda.reset_peak_memory_stats()  # reset peak tracking
      loss = train()
      val_acc = evaluate(data.val_mask)
      print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}")

      alloc = torch.cuda.memory_allocated() / 1024**2
      reserv = torch.cuda.memory_reserved() / 1024**2
      peak = torch.cuda.max_memory_allocated() / 1024**2

      allocated_mem.append(alloc)
      reserved_mem.append(reserv)
      peak_mem.append(peak)
  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  res.append(evaluate(data.test_mask))
# Test accuracy
test_acc = evaluate(data.test_mask)
print(f"Test Accuracy: {test_acc:.4f}")

model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)  # predicted class indices
    true_labels = data.y

    # Apply mask to get only test node predictions
    test_preds = preds[data.test_mask].cpu().numpy()
    test_labels = true_labels[data.test_mask].cpu().numpy()

    # Compute F1 score (micro)
    f1_micro = f1_score(test_labels, test_preds, average='micro')
    print(f"F1 Score (micro): {f1_micro:.4f}")


Memory after cleanup: 57.19 MB
Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Number of features:       1433
Number of classes:       7
Number of nodes:          2708
Number of edges:          10556
Average node degree:      3.90
Number of training nodes: 140
Training node label rate: 0.052
Has isolated nodes:       False
Has self-loops:           False
Is undirected:            True
Memory after cleanup: 72.89 MB
Epoch: 001, Loss: 1.9260, Val Acc: 0.1620
Epoch: 002, Loss: 1.9570, Val Acc: 0.1620
Epoch: 003, Loss: 1.9143, Val Acc: 0.1620
Epoch: 004, Loss: 1.9021, Val Acc: 0.1620
Epoch: 005, Loss: 1.9473, Val Acc: 0.1620
Epoch: 006, Loss: 1.9437, Val Acc: 0.1620
Epoch: 007, Loss: 1.8912, Val Acc: 0.2200
Epoch: 008, Loss: 1.8552, Val Acc: 0.0820
Epoch: 009, Loss: 1.8408, Val Acc: 0.0900
Epoch: 010, Loss: 1.6817, Val Acc: 0.1800
Epoch: 011, Loss: 1.7051, Val Acc: 0.2540
Epoch: 012, Loss: 1.4854, Val Acc: 0.2220
Epoch: 013, Loss: 1.4164, Val Acc: 0.1860
Epoch: 014, Loss: 1.4187, Val Acc: 0.2380
Epoch: 015, Loss: 1.2736, Val Acc: 0.2920
Epoch: 016, Loss: 1.0406, Val 

In [13]:
print(res)
print(sum(res)/10)

[0.7280000448226929, 0.7420000433921814, 0.734000027179718, 0.7600000500679016, 0.7540000081062317, 0.7420000433921814, 0.7250000238418579, 0.7520000338554382, 0.7290000319480896, 0.718000054359436]
0.7384000360965729


In [None]:
if __name__ == "__main__":
    # Cora dataset example
    memory_stats = calculate_graphsage_memory(
        batch_size=128,
        hidden_dim=64,
        num_layers=2,
        num_neighbors=10,
        num_features=1433,  # CORA node features
        num_classes=7,     # CORA classes
        dtype_bytes=4      # float32
    )


=== GraphSAGE Memory Breakdown ===
Batch size (b): 128
Hidden dim (K): 64
Layers (L): 2
Neighbors (s): 10
Data type: float32

Embedding Storage:
- Formula: O(b * K * s^(L-1)) = 128 * 64 * 10^(2-1)
- Memory: 327,680 bytes (0.31 MB)

Weight Matrices:
- Formula: O(L * K^2) = 2 * 64^2
  (First layer: 1433 -> 64)
  (Last layer: 64 -> 7)
- Memory: 385,024 bytes (0.37 MB)

Total Memory:
- Total: 712,704 bytes (0.68 MB)


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "graphSAGE",
    "accuracy": test_acc,
    "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "embedding_storage":memory_stats["embedding_mb"],
    "Weight_Matrices":memory_stats["weight_mb"],
    "Total_Memory":memory_stats["total_mb"]
}

with open("graphSAGE_cora_results.json", "w") as f:
    json.dump(metrics, f)

**CiteSeer DATASET**

In [14]:
clean_gpu_memory()
def dataset_load():
  print(f"Using device: {device}")
  dataset = Planetoid(root='data/Planetoid', name='CiteSeer', transform=NormalizeFeatures())
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

num_features, data, num_classes, device, dataset = dataset_load()
print(f'Number of features:       {num_features}')
print(f'Number of classes:       {num_classes}')

print(f'Number of nodes:          {data.num_nodes}')
print(f'Number of edges:          {data.num_edges}')
print(f'Average node degree:      {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}')
print(f'Has isolated nodes:       {data.has_isolated_nodes()}')
print(f'Has self-loops:           {data.has_self_loops()}')
print(f'Is undirected:            {data.is_undirected()}')

train_loader = NeighborLoader(
    data,
    input_nodes=data.train_mask,
    num_neighbors=[10, 10],  # s = 10 per layer (2 layers)
    batch_size=128,
    shuffle=True,
)
hidden_channels = 64

model = testGraphSAGE(
    in_channels=dataset.num_features,  # Input feature dimension
    hidden_channels=64,               # Hidden layer size
    num_layers=2,                     # Number of SAGEConv layers
    out_channels=dataset.num_classes,  # Output dimension (number of classes)
    dropout=0.5,                      # Dropout rate                         # Jumping Knowledge (optional: "cat", "max", "lstm")
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()  # Negative Log Likelihood (used with log_softmax)
# Training loop
res=[]
for i in range (10):
  clean_gpu_memory()
  start_time = time.time()

  allocated_mem = []
  reserved_mem = []
  peak_mem = []
  for epoch in range(1, 101):
      #torch.cuda.reset_peak_memory_stats()  # reset peak tracking
      loss = train()
      val_acc = evaluate(data.val_mask)
      print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}")

      alloc = torch.cuda.memory_allocated() / 1024**2
      reserv = torch.cuda.memory_reserved() / 1024**2
      peak = torch.cuda.max_memory_allocated() / 1024**2

      allocated_mem.append(alloc)
      reserved_mem.append(reserv)
      peak_mem.append(peak)
  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  res.append(evaluate(data.test_mask))
# Test accuracy
test_acc = evaluate(data.test_mask)
print(f"Test Accuracy: {test_acc:.4f}")

model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)  # predicted class indices
    true_labels = data.y

    # Apply mask to get only test node predictions
    test_preds = preds[data.test_mask].cpu().numpy()
    test_labels = true_labels[data.test_mask].cpu().numpy()

    # Compute F1 score (micro)
    f1_micro = f1_score(test_labels, test_preds, average='micro')
    print(f"F1 Score (micro): {f1_micro:.4f}")


Memory after cleanup: 34.18 MB
Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


Number of features:       3703
Number of classes:       6
Number of nodes:          3327
Number of edges:          9104
Average node degree:      2.74
Number of training nodes: 120
Training node label rate: 0.036
Has isolated nodes:       True
Has self-loops:           False
Is undirected:            True
Memory after cleanup: 65.35 MB
Epoch: 001, Loss: 1.7990, Val Acc: 0.1720
Epoch: 002, Loss: 1.8023, Val Acc: 0.2220
Epoch: 003, Loss: 1.7810, Val Acc: 0.2320
Epoch: 004, Loss: 1.7893, Val Acc: 0.2320
Epoch: 005, Loss: 1.7745, Val Acc: 0.2400
Epoch: 006, Loss: 1.7776, Val Acc: 0.3320
Epoch: 007, Loss: 1.7705, Val Acc: 0.3820
Epoch: 008, Loss: 1.7481, Val Acc: 0.3080
Epoch: 009, Loss: 1.7380, Val Acc: 0.3060
Epoch: 010, Loss: 1.7119, Val Acc: 0.3320
Epoch: 011, Loss: 1.6732, Val Acc: 0.3540
Epoch: 012, Loss: 1.6503, Val Acc: 0.3480
Epoch: 013, Loss: 1.5779, Val Acc: 0.3360
Epoch: 014, Loss: 1.5182, Val Acc: 0.3260
Epoch: 015, Loss: 1.4503, Val Acc: 0.3400
Epoch: 016, Loss: 1.3900, Val Ac

In [15]:
print(res)
print(sum(res)/10)

[0.5870000123977661, 0.6110000014305115, 0.6290000081062317, 0.6230000257492065, 0.6490000486373901, 0.6080000400543213, 0.6620000600814819, 0.6370000243186951, 0.6470000147819519, 0.6290000081062317]
0.6282000243663788


In [None]:
if __name__ == "__main__":
    # PubMed dataset example
    memory_stats = calculate_graphsage_memory(
        batch_size=128,
        hidden_dim=64,
        num_layers=2,
        num_neighbors=10,
        num_features=3703,  # CiteSeer node features
        num_classes=6,     # CiteSeer classes
        dtype_bytes=4      # float32
    )


=== GraphSAGE Memory Breakdown ===
Batch size (b): 128
Hidden dim (K): 64
Layers (L): 2
Neighbors (s): 10
Data type: float32

Embedding Storage:
- Formula: O(b * K * s^(L-1)) = 128 * 64 * 10^(2-1)
- Memory: 327,680 bytes (0.31 MB)

Weight Matrices:
- Formula: O(L * K^2) = 2 * 64^2
  (First layer: 3703 -> 64)
  (Last layer: 64 -> 6)
- Memory: 965,888 bytes (0.92 MB)

Total Memory:
- Total: 1,293,568 bytes (1.23 MB)


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "graphSAGE",
    "accuracy": test_acc,
    "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "embedding_storage":memory_stats["embedding_mb"],
    "Weight_Matrices":memory_stats["weight_mb"],
    "Total_Memory":memory_stats["total_mb"]
}

with open("graphSAGE_citeeser_results.json", "w") as f:
    json.dump(metrics, f)

** Amazon DATASET**

In [17]:
clean_gpu_memory()
def dataset_load():
  print(f"Using device: {device}")
  dataset = Amazon(
        root='data/Amazon',
        name='Computers',
        transform=T.Compose([
        NormalizeFeatures(),          # feature‑wise ℓ₂ normalisation
        RandomNodeSplit(              # ⇦ add a split transform
                split='train_rest',       # 10% val, 10% test by default
                num_val=0.1,
                num_test=0.1,
                num_splits=1,
            )
        ])
    )
  num_features = dataset.num_features
  num_classes = dataset.num_classes
  data = dataset[0].to(device)  # Get the first graph object.
  return num_features, data, num_classes, device,dataset

num_features, data, num_classes, device, dataset = dataset_load()
print(f'Number of features:       {num_features}')
print(f'Number of classes:       {num_classes}')

print(f'Number of nodes:          {data.num_nodes}')
print(f'Number of edges:          {data.num_edges}')
print(f'Average node degree:      {data.num_edges / data.num_nodes:.2f}')
print(f"Training nodes: {data.train_mask.sum().item()}")
print(f"Validation nodes: {data.val_mask.sum().item()}")
print(f"Test nodes: {data.test_mask.sum().item()}")
#print(f'Number of training nodes: {data.train_mask.sum()}')
#print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}')
#print(f'Has isolated nodes:       {data.has_isolated_nodes()}')
#print(f'Has self-loops:           {data.has_self_loops()}')
#print(f'Is undirected:            {data.is_undirected()}')
train_loader = NeighborLoader(
    data,
    input_nodes=data.train_mask,
    num_neighbors=[10, 10],  # s = 10 per layer (2 layers)
    batch_size=128,
    shuffle=True,
)
hidden_channels = 64

model = testGraphSAGE(
    in_channels=dataset.num_features,  # Input feature dimension
    hidden_channels=64,               # Hidden layer size
    num_layers=2,                     # Number of SAGEConv layers
    out_channels=dataset.num_classes,  # Output dimension (number of classes)
    dropout=0.5,                      # Dropout rate                         # Jumping Knowledge (optional: "cat", "max", "lstm")
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()  # Negative Log Likelihood (used with log_softmax)
# Training loop
res=[]
for i in range(10):
  clean_gpu_memory()
  start_time = time.time()

  allocated_mem = []
  reserved_mem = []
  peak_mem = []
  for epoch in range(1, 101):
      #torch.cuda.reset_peak_memory_stats()  # reset peak tracking
      loss = train()
      val_acc = evaluate(data.val_mask)
      print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}")

      alloc = torch.cuda.memory_allocated() / 1024**2
      reserv = torch.cuda.memory_reserved() / 1024**2
      peak = torch.cuda.max_memory_allocated() / 1024**2

      allocated_mem.append(alloc)
      reserved_mem.append(reserv)
      peak_mem.append(peak)
  end_time = time.time()
  print(f"Training time: {end_time - start_time:.2f} seconds")
  res.append(evaluate(data.test_mask))

# Test accuracy
test_acc = evaluate(data.test_mask)
print(f"Test Accuracy: {test_acc:.4f}")

model.eval()
with torch.no_grad():
    out = model(data.x, data.edge_index)
    preds = out.argmax(dim=1)  # predicted class indices
    true_labels = data.y

    # Apply mask to get only test node predictions
    test_preds = preds[data.test_mask].cpu().numpy()
    test_labels = true_labels[data.test_mask].cpu().numpy()

    # Compute F1 score (micro)
    f1_micro = f1_score(test_labels, test_preds, average='micro')
    print(f"F1 Score (micro): {f1_micro:.4f}")


Memory after cleanup: 102.81 MB
Using device: cuda
Number of features:       767
Number of classes:       10
Number of nodes:          13752
Number of edges:          491722
Average node degree:      35.76
Training nodes: 11002
Validation nodes: 1375
Test nodes: 1375
Memory after cleanup: 102.82 MB
Epoch: 001, Loss: 1.7392, Val Acc: 0.5622
Epoch: 002, Loss: 1.2240, Val Acc: 0.6204
Epoch: 003, Loss: 1.0690, Val Acc: 0.6509
Epoch: 004, Loss: 1.0125, Val Acc: 0.7047
Epoch: 005, Loss: 0.9025, Val Acc: 0.7193
Epoch: 006, Loss: 0.8403, Val Acc: 0.7360
Epoch: 007, Loss: 0.7968, Val Acc: 0.7884
Epoch: 008, Loss: 0.7609, Val Acc: 0.7862
Epoch: 009, Loss: 0.7538, Val Acc: 0.7905
Epoch: 010, Loss: 0.7164, Val Acc: 0.7855
Epoch: 011, Loss: 0.7044, Val Acc: 0.7796
Epoch: 012, Loss: 0.6958, Val Acc: 0.7942
Epoch: 013, Loss: 0.6887, Val Acc: 0.7927
Epoch: 014, Loss: 0.6777, Val Acc: 0.7942
Epoch: 015, Loss: 0.6862, Val Acc: 0.7913
Epoch: 016, Loss: 0.6642, Val Acc: 0.7956
Epoch: 017, Loss: 0.6638, Va

In [18]:
print(res)
print(sum(res)/10)

[0.8087272644042969, 0.8021817803382874, 0.8050909042358398, 0.7999999523162842, 0.8174545168876648, 0.8065454363822937, 0.7963635921478271, 0.7985454201698303, 0.8029090762138367, 0.7963635921478271]
0.8034181535243988


In [None]:
if __name__ == "__main__":
    # amazon dataset example
    memory_stats = calculate_graphsage_memory(
        batch_size=128,
        hidden_dim=64,
        num_layers=2,
        num_neighbors=10,
        num_features=767,  # AMAZON node features
        num_classes=10,     # AMAZON classes
        dtype_bytes=4      # float32
    )


=== GraphSAGE Memory Breakdown ===
Batch size (b): 128
Hidden dim (K): 64
Layers (L): 2
Neighbors (s): 10
Data type: float32

Embedding Storage:
- Formula: O(b * K * s^(L-1)) = 128 * 64 * 10^(2-1)
- Memory: 327,680 bytes (0.31 MB)

Weight Matrices:
- Formula: O(L * K^2) = 2 * 64^2
  (First layer: 767 -> 64)
  (Last layer: 64 -> 10)
- Memory: 215,296 bytes (0.21 MB)

Total Memory:
- Total: 542,976 bytes (0.52 MB)


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "graphSAGE",
    "accuracy": test_acc,
     "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "embedding_storage":memory_stats["embedding_mb"],
    "Weight_Matrices":memory_stats["weight_mb"],
    "Total_Memory":memory_stats["total_mb"]
}

with open("graphSAGE_amazon_results.json", "w") as f:
    json.dump(metrics, f)