<a href="https://colab.research.google.com/github/ghommidhWassim/GNN-variants/blob/main/test_gcn_variants.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [69]:
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [70]:
!python -c "import torch; print(torch.__version__)"


2.6.0+cu124


In [71]:
!python -c "import torch; print(torch.version.cuda)"



12.4


In [72]:
pip install torchvision



In [73]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.6.0+cu124.html



Looking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html


In [74]:
# Standard libraries
import numpy as np
from scipy import sparse
import seaborn as sns
import pandas as pd

# Plotting libraries
import matplotlib.pyplot as plt
import networkx as nx
from matplotlib import cm
from IPython.display import Javascript  # Restrict height of output cell.

# PyTorch
import torch
import torch.nn.functional as F
from torch.nn import Linear

# import pyg_lib
import torch_sparse

# PyTorch geometric
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import ClusterData, ClusterLoader
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.data import Data
from torch_geometric import seed_everything

In [75]:
random_seed = 42
torch.manual_seed(1234567)
seed_everything(42)
plt.style.use('dark_background')
num_epochs = 101

In [76]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
dataset = Planetoid(root='data/Planetoid', name='PubMed', transform=NormalizeFeatures())
num_features = dataset.num_features
num_classes = dataset.num_classes
data = dataset[0].to(device)  # Get the first graph object.
data

Using device: cuda


Data(x=[19717, 500], edge_index=[2, 88648], y=[19717], train_mask=[19717], val_mask=[19717], test_mask=[19717])

In [77]:
print(f'Number of nodes:          {data.num_nodes}')
print(f'Number of edges:          {data.num_edges}')
print(f'Average node degree:      {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.3f}')
print(f'Has isolated nodes:       {data.has_isolated_nodes()}')
print(f'Has self-loops:           {data.has_self_loops()}')
print(f'Is undirected:            {data.is_undirected()}')

Number of nodes:          19717
Number of edges:          88648
Average node degree:      4.50
Number of training nodes: 60
Training node label rate: 0.003
Has isolated nodes:       False
Has self-loops:           False
Is undirected:            True


In [78]:
class GCN(torch.nn.Module):
  def __init__(self, hidden_channels):
    super().__init__()
    self.conv1 = GCNConv(num_features, hidden_channels)
    self.conv2 = GCNConv(hidden_channels, num_classes)

  def forward(self, x, edge_index):
    x = self.conv1(x, edge_index)
    x = x.relu()
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv2(x, edge_index)
    return x

**STANDARD GCN**

In [79]:
model = GCN(hidden_channels=16).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
print(model)

GCN(
  (conv1): GCNConv(500, 16)
  (conv2): GCNConv(16, 3)
)


In [80]:
def train(data, mask):
  model.train()
  optimizer.zero_grad()  # Clear gradients.
  out = model(data.x, data.edge_index)  # Perform a single forward pass.
  loss = criterion(out[mask], data.y[mask])  # Compute the loss solely based on the training nodes.
  loss.backward()  # Derive gradients.
  optimizer.step()  # Update parameters based on gradients.
  return loss

def test(data, mask):
  model.eval()
  out = model(data.x, data.edge_index)
  pred = out.argmax(dim=1)  # Use the class with highest probability.
  correct = pred[mask] == data.y[mask]  # Check against ground-truth labels.
  acc = int(correct.sum()) / int(mask.sum())  # Derive ratio of correct predictions.
  return acc

In [81]:
model = GCN(hidden_channels=16).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

for epoch in range(1, num_epochs):
  loss = train(data, data.train_mask)
  if epoch % 10 == 0:
    train_acc = test(data, data.train_mask)
    val_acc = test(data, data.val_mask)
    print(f'Epoch: {epoch:03d}, Train: {train_acc:.4f}')

<IPython.core.display.Javascript object>

Epoch: 010, Train: 0.9000
Epoch: 020, Train: 0.9000
Epoch: 030, Train: 0.9333
Epoch: 040, Train: 0.9333
Epoch: 050, Train: 0.9500
Epoch: 060, Train: 0.9667
Epoch: 070, Train: 0.9667
Epoch: 080, Train: 0.9833
Epoch: 090, Train: 1.0000
Epoch: 100, Train: 1.0000


In [82]:
test_acc = test(data, data.test_mask)
test_acc

0.771

**CLUSTER GCN**

In [83]:
data = data.cpu()
cluster_data = ClusterData(data, num_parts=128)
train_loader = ClusterLoader(cluster_data, batch_size=32, shuffle=True)
criterion = torch.nn.CrossEntropyLoss().to(device)

total_num_nodes = 0
#for step, sub_data in enumerate(train_loader):
 # print(f'Batch: {step + 1} has {sub_data.num_nodes} nodes')
  #print(sub_data)
  #print()
  #total_num_nodes += sub_data.num_nodes

print(f'Iterated over {total_num_nodes} of {data.num_nodes} nodes!')

Computing METIS partitioning...


Iterated over 0 of 19717 nodes!


Done!


In [84]:
def train_batch(loader):
  model.train()
  for sub_data in train_loader:  # Iterate over each mini-batch.
    sub_data = sub_data.to(device)
    out = model(sub_data.x, sub_data.edge_index)  # Perform a single forward pass.
    loss = criterion(out[sub_data.train_mask], sub_data.y[sub_data.train_mask])  # Compute the loss solely based on the training nodes.
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    optimizer.zero_grad()  # Clear gradients.

In [85]:

model = GCN(hidden_channels=16).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
data=data.to(device)
for epoch in range(1, num_epochs):
  loss = train_batch(train_loader)
  if epoch % 10 == 0:
    train_acc = test(data, data.train_mask)
    val_acc = test(data, data.val_mask)
    print(f'Epoch: {epoch:03d}, Train: {train_acc:.4f}')

<IPython.core.display.Javascript object>

Epoch: 010, Train: 0.9167
Epoch: 020, Train: 0.9667
Epoch: 030, Train: 0.9833
Epoch: 040, Train: 1.0000
Epoch: 050, Train: 0.9833
Epoch: 060, Train: 1.0000
Epoch: 070, Train: 1.0000
Epoch: 080, Train: 1.0000
Epoch: 090, Train: 1.0000
Epoch: 100, Train: 1.0000


In [86]:
test_acc = test(data, data.test_mask)
test_acc

0.767

In [92]:
run_profiling(model, data, loader=train_loader)  # Or None for full-batch


Epoch 0 Profile:



Epoch 1 Profile:



Epoch 2 Profile:



Epoch 3 Profile:



Epoch 4 Profile:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls   Total FLOPs  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                           forward_pass         0.00%       0.000us         0.00

**GraphSAGE**

In [88]:
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import GraphSAGE
from sklearn.metrics import accuracy_score
from transformers import AutoModelForCausalLM

torch.cuda.memory._record_memory_history(max_entries=100000)

# Set random seed for reproducibility
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)


# Create neighbor loader for mini-batch training
train_loader = NeighborLoader(
    data,
    num_neighbors=[10, 10],  # 2 layers with 10 neighbors each
    batch_size=1024,
    input_nodes=data.train_mask,
    shuffle=True
)

# Initialize GraphSAGE model (similar hidden size to GCN)
model = GraphSAGE(
    in_channels=dataset.num_features,  # 500 for PubMed
    hidden_channels=16,               # Same as GCN
    num_layers=2,                     # 2-layer model
    out_channels=dataset.num_classes, # 3 for PubMed
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Same LR as GCN

def train():
    model.train()
    total_loss = total_correct = total_examples = 0

    for batch in train_loader:
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = F.cross_entropy(out[batch.train_mask], batch.y[batch.train_mask])
        loss.backward()
        optimizer.step()

        total_loss += float(loss) * batch.train_mask.sum()
        total_correct += int((out[batch.train_mask].argmax(dim=-1) == batch.y[batch.train_mask]).sum())
        total_examples += batch.train_mask.sum()

    return total_loss / total_examples, total_correct / total_examples

@torch.no_grad()
def test(mask):
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=-1)
    acc = accuracy_score(data.y[mask].cpu(), pred[mask].cpu())
    return acc

# Training loop (same number of epochs as GCN)
for epoch in range(1, 101):
    loss, train_acc = train()
    if epoch % 10 == 0:
        val_acc = test(data.val_mask)
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, Val: {val_acc:.4f}')

# Final testing
test_acc = test(data.test_mask)
print(f'Test Accuracy: {test_acc:.4f}')
torch.cuda.memory._dump_snapshot("profile.pkl")
torch.cuda.memory._record_memory_history(enabled=None)

Epoch: 010, Loss: 0.9164, Train: 0.9667, Val: 0.7160
Epoch: 020, Loss: 0.5277, Train: 0.9833, Val: 0.7400
Epoch: 030, Loss: 0.2005, Train: 0.9833, Val: 0.7560
Epoch: 040, Loss: 0.0689, Train: 1.0000, Val: 0.7600
Epoch: 050, Loss: 0.0272, Train: 1.0000, Val: 0.7640
Epoch: 060, Loss: 0.0146, Train: 1.0000, Val: 0.7640
Epoch: 070, Loss: 0.0089, Train: 1.0000, Val: 0.7680
Epoch: 080, Loss: 0.0073, Train: 1.0000, Val: 0.7660
Epoch: 090, Loss: 0.0059, Train: 1.0000, Val: 0.7680
Epoch: 100, Loss: 0.0047, Train: 1.0000, Val: 0.7720
Test Accuracy: 0.7460


In [89]:
from torch.profiler import profile, record_function, ProfilerActivity
import torch

def train_step(batch, model, optimizer, criterion):
    optimizer.zero_grad()
    with record_function("forward_pass"):
        out = model(batch.x, batch.edge_index)
    with record_function("loss_calculation"):
        loss = criterion(out[batch.train_mask], batch.y[batch.train_mask])
    with record_function("backward_pass"):
        loss.backward()
        optimizer.step()

def run_profiling(model, data, loader=None, epochs=5):
    # Warm-up (important for CUDA)
    for _ in range(2):
        if loader:
            for batch in loader:
                train_step(batch, model, optimizer, criterion)
        else:
            train_step(data, model, optimizer, criterion)

    # Profiling proper
    with profile(
        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
        schedule=torch.profiler.schedule(
            wait=1,  # Skip first epoch
            warmup=1,  # Warmup profiler
            active=3  # Profile next 3 epochs
        ),
        on_trace_ready=torch.profiler.tensorboard_trace_handler('./logs'),
        record_shapes=True,
        profile_memory=True,
        with_stack=True,
        with_flops=True  # Measure FLOPs if available
    ) as prof:

        for epoch in range(epochs):
            model.train()
            with record_function(f"epoch_{epoch}"):
                if loader:
                    for batch in loader:
                        train_step(batch, model, optimizer, criterion)
                else:
                    train_step(data, model, optimizer, criterion)

            prof.step()

            # Print summary every epoch
            print(f"\nEpoch {epoch} Profile:")
            print(prof.key_averages().table(
                sort_by="self_cuda_time_total" if torch.cuda.is_available() else "self_cpu_time_total",
                row_limit=15
            ))
            print(prof.key_averages().table(
            sort_by="self_cuda_memory_usage",
            row_limit=20
            ))

# Usage
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

print("Profiling GraphSAGE...")
run_profiling(model, data, loader=train_loader)  # Or None for full-batch

Profiling GraphSAGE...

Epoch 0 Profile:



Epoch 1 Profile:



Epoch 2 Profile:



Epoch 3 Profile:



Epoch 4 Profile:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls   Total FLOPs  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                           forward_pass         0.00%    

In [90]:
print(prof.key_averages().table(
    sort_by="cuda_time_total" if torch.cuda.is_available() else "cpu_time_total",
    row_limit=10
))

NameError: name 'prof' is not defined