<a href="https://colab.research.google.com/github/ghommidhWassim/GNN-variants/blob/main/LADIES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"
!pip install torchvision
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.6.0+cu124.html


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone
2.6.0+cu124
12.4
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (fr

In [None]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.6.0+cu124.html


Looking in links: https://data.pyg.org/whl/torch-2.6.0+cu124.html


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import scipy.sparse as sp
from torch_geometric.datasets import Planetoid, Amazon
from torch_geometric.transforms import NormalizeFeatures, RandomNodeSplit
import torch_geometric.transforms as T
from torch_geometric.utils import to_scipy_sparse_matrix
from sklearn.metrics import accuracy_score
import random
from sklearn.metrics import f1_score
import json,time
import gc
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [None]:
def clean_gpu_memory():
    """Cleans GPU memory without fully resetting the CUDA context"""
    import gc
    gc.collect()  # Python garbage collection
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # PyTorch cache
        torch.cuda.reset_peak_memory_stats()  # Reset tracking
        print(f"Memory after cleanup: {torch.cuda.memory_allocated()/1024**2:.2f} MB")


In [None]:
clean_gpu_memory()

Memory after cleanup: 0.00 MB


In [None]:


# Ensure reproducibility
def seed_everything(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

seed_everything()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def estimate_ladies_memory_MB(K, L, slayer):
    bytes_per_float = 4  # float32
    total_floats = L * K * slayer + L * K * K
    total_bytes = total_floats * bytes_per_float
    return total_bytes / (1024 ** 2)

# ------------------- Load Dataset -------------------
def dataset_load():
    print(f"Using device: {device}")
    dataset = Planetoid(root='data/Planetoid', name='PubMed', transform=NormalizeFeatures())
    data = dataset[0].to(device)
    return dataset.num_features, data, dataset.num_classes

num_features, data, num_classes = dataset_load()

# ------------------- Prepare Adjacency -------------------
adj = to_scipy_sparse_matrix(data.edge_index, num_nodes=data.num_nodes)
lap_matrix = adj + sp.eye(adj.shape[0])

def row_normalize(mx):
    rowsum = np.array(mx.sum(1)).flatten()
    rowsum[rowsum == 0] = 1  # Avoid division by zero
    r_inv = np.power(rowsum, -1)
    r_mat_inv = sp.diags(r_inv)
    return r_mat_inv.dot(mx)


lap_matrix = row_normalize(lap_matrix)

# ------------------- Sampler -------------------
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert scipy sparse matrix to torch sparse tensor"""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse_coo_tensor(indices, values, shape, device=device)
def evaluate(model, features, adjs, labels, nodes):
    model.eval()
    with torch.no_grad():
        out = model(features, adjs)
        preds = out[nodes].argmax(dim=1).cpu()
        targets = labels[nodes].cpu()

        acc = (preds == targets).float().mean().item()
        f1_micro = f1_score(targets, preds, average='micro')

    return acc, f1_micro

def ladies_sampler(seed, batch_nodes, samp_num_list, num_nodes, lap_matrix, depth):
    np.random.seed(seed)
    previous_nodes = batch_nodes.cpu().numpy()
    adjs = []
    for d in range(depth):
        U = lap_matrix[previous_nodes, :]
        pi = np.array(np.sum(U.multiply(U), axis=0))[0]
        p = pi / np.sum(pi)
        s_num = np.min([np.sum(p > 0), samp_num_list[d]])
        after_nodes = np.random.choice(num_nodes, s_num, p=p, replace=False)
        after_nodes = np.unique(np.concatenate((after_nodes, batch_nodes.cpu().numpy())))
        adj = U[:, after_nodes].multiply(1 / p[after_nodes])
        adj = row_normalize(adj)
        adjs.append(sparse_mx_to_torch_sparse_tensor(adj))
        previous_nodes = after_nodes
    adjs.reverse()
    return adjs, torch.tensor(previous_nodes, device=device), batch_nodes

# ------------------- Model -------------------
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers):
        super().__init__()
        self.convs = nn.ModuleList()
        self.convs.append(nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(nn.Linear(hidden_channels, hidden_channels))
        self.convs.append(nn.Linear(hidden_channels, out_channels))

    def forward(self, x, adjs):
        for i, (conv, adj) in enumerate(zip(self.convs[:-1], adjs)):
            x = conv(x)
            x = torch.sparse.mm(adj, x)
            x = F.relu(x)
        x = self.convs[-1](x)
        return x

# ------------------- Training -------------------
model = GCN(num_features, 64, num_classes, num_layers=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_nodes = torch.where(data.train_mask)[0]
valid_nodes = torch.where(data.val_mask)[0]
labels = data.y
features = data.x
batch_size = 128
samp_num_list = [64, 64]
depth = len(samp_num_list)
start_time = time.time()

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()

    idx = torch.randperm(train_nodes.size(0), device=device)[:batch_size]
    batch_nodes = train_nodes[idx]

    adjs, input_nodes, output_nodes = ladies_sampler(
        seed=np.random.randint(0, 100000),
        batch_nodes=batch_nodes,
        samp_num_list=samp_num_list,
        num_nodes=data.num_nodes,
        lap_matrix=lap_matrix,
        depth=depth
    )

    out = model(features[input_nodes], adjs)
    loss = criterion(out[output_nodes], labels[output_nodes])

    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
        acc, f1_micro = evaluate(model, features, [full_adj]*depth, labels, valid_nodes)
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val Acc: {acc:.4f} | F1-micro: {f1_micro:.4f}")


end_time = time.time()
test_nodes = torch.where(data.test_mask)[0]
full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
test_acc, test_f1 = evaluate(model, features, [full_adj]*depth, labels, test_nodes)

print(f"Test Accuracy: {test_acc:.4f} | Test F1-micro: {test_f1:.4f}")

# LADIES theoretical memory
K = 64         # hidden dimension
L = 2          # number of layers
slayer = 64    # number of sampled nodes per layer
mem_MB = estimate_ladies_memory_MB(K, L, slayer)

print(f"Theoretical LADIES memory usage: {mem_MB:.2f} MB (embedding + transformation weights)")

print(f"Training time: {end_time - start_time:.2f} seconds")

# ------------------- GPU Usage -------------------
print(f"GPU memory allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
print(f"Max GPU memory used:  {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")


Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!


Epoch 010 | Loss: 0.9537 | Val Acc: 0.7080 | F1-micro: 0.7080
Epoch 020 | Loss: 0.6219 | Val Acc: 0.7440 | F1-micro: 0.7440
Epoch 030 | Loss: 0.2689 | Val Acc: 0.7600 | F1-micro: 0.7600
Epoch 040 | Loss: 0.1102 | Val Acc: 0.7660 | F1-micro: 0.7660
Epoch 050 | Loss: 0.0542 | Val Acc: 0.7580 | F1-micro: 0.7580
Epoch 060 | Loss: 0.0345 | Val Acc: 0.7560 | F1-micro: 0.7560
Epoch 070 | Loss: 0.0155 | Val Acc: 0.7700 | F1-micro: 0.7700
Epoch 080 | Loss: 0.0173 | Val Acc: 0.7540 | F1-micro: 0.7540
Epoch 090 | Loss: 0.0102 | Val Acc: 0.7620 | F1-micro: 0.7620
Epoch 100 | Loss: 0.0079 | Val Acc: 0.7700 | F1-micro: 0.7700
Test Accuracy: 0.7450 | Test F1-micro: 0.7450
Theoretical LADIES memory usage: 0.06 MB (embedding + transformation weights)
Training time: 2.21 seconds
GPU memory allocated: 61.21 MB
Max GPU memory used:  83.03 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "Ladies",
    "accuracy": test_acc,
    "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":mem_MB
}

with open("Ladies_pubmed_results.json", "w") as f:
    json.dump(metrics, f)

**Cora**

In [None]:
clean_gpu_memory()
def dataset_load():
    print(f"Using device: {device}")
    dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
    data = dataset[0].to(device)
    return dataset.num_features, data, dataset.num_classes

num_features, data, num_classes = dataset_load()
adj = to_scipy_sparse_matrix(data.edge_index, num_nodes=data.num_nodes)
lap_matrix = adj + sp.eye(adj.shape[0])
lap_matrix = row_normalize(lap_matrix)
model = GCN(num_features, 64, num_classes, num_layers=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_nodes = torch.where(data.train_mask)[0]
valid_nodes = torch.where(data.val_mask)[0]
labels = data.y
features = data.x
batch_size = 128
samp_num_list = [64, 64]
depth = len(samp_num_list)
start_time = time.time()

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()

    idx = torch.randperm(train_nodes.size(0), device=device)[:batch_size]
    batch_nodes = train_nodes[idx]

    adjs, input_nodes, output_nodes = ladies_sampler(
        seed=np.random.randint(0, 100000),
        batch_nodes=batch_nodes,
        samp_num_list=samp_num_list,
        num_nodes=data.num_nodes,
        lap_matrix=lap_matrix,
        depth=depth
    )

    out = model(features[input_nodes], adjs)
    loss = criterion(out[output_nodes], labels[output_nodes])

    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
        acc, f1_micro = evaluate(model, features, [full_adj]*depth, labels, valid_nodes)
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val Acc: {acc:.4f} | F1-micro: {f1_micro:.4f}")


end_time = time.time()
test_nodes = torch.where(data.test_mask)[0]
full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
test_acc, test_f1 = evaluate(model, features, [full_adj]*depth, labels, test_nodes)

print(f"Test Accuracy: {test_acc:.4f} | Test F1-micro: {test_f1:.4f}")

# LADIES theoretical memory
K = 64         # hidden dimension
L = 2          # number of layers
slayer = 64    # number of sampled nodes per layer
mem_MB = estimate_ladies_memory_MB(K, L, slayer)

print(f"Theoretical LADIES memory usage: {mem_MB:.2f} MB (embedding + transformation weights)")

print(f"Training time: {end_time - start_time:.2f} seconds")

# ------------------- GPU Usage -------------------
print(f"GPU memory allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
print(f"Max GPU memory used:  {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")


Memory after cleanup: 58.40 MB
Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Epoch 010 | Loss: 1.9201 | Val Acc: 0.3540 | F1-micro: 0.3540
Epoch 020 | Loss: 1.8112 | Val Acc: 0.3320 | F1-micro: 0.3320
Epoch 030 | Loss: 1.7370 | Val Acc: 0.3460 | F1-micro: 0.3460
Epoch 040 | Loss: 1.6298 | Val Acc: 0.2460 | F1-micro: 0.2460
Epoch 050 | Loss: 1.5298 | Val Acc: 0.2880 | F1-micro: 0.2880
Epoch 060 | Loss: 1.4164 | Val Acc: 0.2760 | F1-micro: 0.2760
Epoch 070 | Loss: 1.3651 | Val Acc: 0.2340 | F1-micro: 0.2340
Epoch 080 | Loss: 1.2404 | Val Acc: 0.2820 | F1-micro: 0.2820
Epoch 090 | Loss: 1.2279 | Val Acc: 0.2420 | F1-micro: 0.2420
Epoch 100 | Loss: 1.2857 | Val Acc: 0.2060 | F1-micro: 0.2060
Test Accuracy: 0.2380 | Test F1-micro: 0.2380
Theoretical LADIES memory usage: 0.06 MB (embedding + transformation weights)
Training time: 1.38 seconds
GPU memory allocated: 33.23 MB
Max GPU memory used:  75.76 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "Ladies",
    "accuracy": test_acc,
    "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":mem_MB
}

with open("Ladies_cora_results.json", "w") as f:
    json.dump(metrics, f)

**Citeser**

In [None]:
clean_gpu_memory()
def dataset_load():
    print(f"Using device: {device}")
    dataset = Planetoid(root='data/Planetoid', name='CiteSeer', transform=NormalizeFeatures())
    data = dataset[0].to(device)
    return dataset.num_features, data, dataset.num_classes

num_features, data, num_classes = dataset_load()
adj = to_scipy_sparse_matrix(data.edge_index, num_nodes=data.num_nodes)
lap_matrix = adj + sp.eye(adj.shape[0])
lap_matrix = row_normalize(lap_matrix)
model = GCN(num_features, 64, num_classes, num_layers=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_nodes = torch.where(data.train_mask)[0]
valid_nodes = torch.where(data.val_mask)[0]
labels = data.y
features = data.x
batch_size = 128
samp_num_list = [64, 64]
depth = len(samp_num_list)
start_time = time.time()

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()

    idx = torch.randperm(train_nodes.size(0), device=device)[:batch_size]
    batch_nodes = train_nodes[idx]

    adjs, input_nodes, output_nodes = ladies_sampler(
        seed=np.random.randint(0, 100000),
        batch_nodes=batch_nodes,
        samp_num_list=samp_num_list,
        num_nodes=data.num_nodes,
        lap_matrix=lap_matrix,
        depth=depth
    )

    out = model(features[input_nodes], adjs)
    loss = criterion(out[output_nodes], labels[output_nodes])

    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
        acc, f1_micro = evaluate(model, features, [full_adj]*depth, labels, valid_nodes)
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val Acc: {acc:.4f} | F1-micro: {f1_micro:.4f}")


end_time = time.time()
test_nodes = torch.where(data.test_mask)[0]
full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
test_acc, test_f1 = evaluate(model, features, [full_adj]*depth, labels, test_nodes)

print(f"Test Accuracy: {test_acc:.4f} | Test F1-micro: {test_f1:.4f}")

# LADIES theoretical memory
K = 64         # hidden dimension
L = 2          # number of layers
slayer = 64    # number of sampled nodes per layer
mem_MB = estimate_ladies_memory_MB(K, L, slayer)

print(f"Theoretical LADIES memory usage: {mem_MB:.2f} MB (embedding + transformation weights)")

print(f"Training time: {end_time - start_time:.2f} seconds")

# ------------------- GPU Usage -------------------
print(f"GPU memory allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
print(f"Max GPU memory used:  {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")


Memory after cleanup: 32.95 MB
Using device: cuda


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


Epoch 010 | Loss: 1.6401 | Val Acc: 0.4960 | F1-micro: 0.4960
Epoch 020 | Loss: 1.1947 | Val Acc: 0.5660 | F1-micro: 0.5660
Epoch 030 | Loss: 0.6185 | Val Acc: 0.5860 | F1-micro: 0.5860
Epoch 040 | Loss: 0.2521 | Val Acc: 0.6100 | F1-micro: 0.6100
Epoch 050 | Loss: 0.1264 | Val Acc: 0.5960 | F1-micro: 0.5960
Epoch 060 | Loss: 0.0935 | Val Acc: 0.6160 | F1-micro: 0.6160
Epoch 070 | Loss: 0.0679 | Val Acc: 0.6300 | F1-micro: 0.6300
Epoch 080 | Loss: 0.0593 | Val Acc: 0.6260 | F1-micro: 0.6260
Epoch 090 | Loss: 0.0573 | Val Acc: 0.6240 | F1-micro: 0.6240
Epoch 100 | Loss: 0.0594 | Val Acc: 0.6220 | F1-micro: 0.6220
Test Accuracy: 0.6340 | Test F1-micro: 0.6340
Theoretical LADIES memory usage: 0.06 MB (embedding + transformation weights)
Training time: 1.39 seconds
GPU memory allocated: 67.58 MB
Max GPU memory used:  86.30 MB


In [None]:
peak_memory_mb=f"{torch.cuda.max_memory_allocated()/1024**2:.2f}"
total_train_time=f"{end_time - start_time:.2f}"

metrics = {
    "model": "Ladies",
    "accuracy": test_acc,
    "f1_micro":f1_micro,
    "peak_memory_MB": peak_memory_mb,
    "train_time_sec": total_train_time,
    "mem_MB":mem_MB
}

with open("Ladies_citeser_results.json", "w") as f:
    json.dump(metrics, f)

**Amazon dataset**

In [None]:
clean_gpu_memory()


Memory after cleanup: 67.32 MB


In [None]:
def stratified_split(data, train_ratio=0.6, val_ratio=0.2, seed=42):
    y = data.y.cpu().numpy()
    idx = np.arange(len(y))
    train_idx, temp_idx = train_test_split(
        idx, stratify=y, train_size=train_ratio, random_state=seed)
    val_idx, test_idx = train_test_split(
        temp_idx, stratify=y[temp_idx], test_size=0.5, random_state=seed)

    data.train_mask = torch.zeros(len(y), dtype=torch.bool)
    data.val_mask   = torch.zeros(len(y), dtype=torch.bool)
    data.test_mask  = torch.zeros(len(y), dtype=torch.bool)
    data.train_mask[train_idx] = True
    data.val_mask[val_idx]     = True
    data.test_mask[test_idx]   = True
    return data

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def estimate_ladies_memory_MB(K, L, slayer):
    bytes_per_float = 4  # float32
    total_floats = L * K * slayer + L * K * K
    total_bytes = total_floats * bytes_per_float
    return total_bytes / (1024 ** 2)

# ------------------- Load Dataset -------------------
dataset = Amazon(root='data/Amazon', name='Computers', transform=NormalizeFeatures())
data    = dataset[0]
data    = stratified_split(data)        # create masks
data    = data.to(device)
num_features, num_classes = dataset.num_features, dataset.num_classes


# ------------------- Prepare Adjacency -------------------
adj = to_scipy_sparse_matrix(data.edge_index, num_nodes=data.num_nodes)
lap_matrix = adj + sp.eye(adj.shape[0])

def row_normalize(mx):
    rowsum = np.array(mx.sum(1)).flatten()
    rowsum[rowsum == 0] = 1  # Avoid division by zero
    r_inv = np.power(rowsum, -1)
    r_mat_inv = sp.diags(r_inv)
    return r_mat_inv.dot(mx)


lap_matrix = row_normalize(lap_matrix)

# ------------------- Sampler -------------------
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert scipy sparse matrix to torch sparse tensor"""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse_coo_tensor(indices, values, shape, device=device)
def evaluate(model, features, adjs, labels, nodes):
    model.eval()
    with torch.no_grad():
        out = model(features, adjs)
        preds = out[nodes].argmax(dim=1).cpu()
        targets = labels[nodes].cpu()

        acc = (preds == targets).float().mean().item()
        f1_micro = f1_score(targets, preds, average='micro')

    return acc, f1_micro

def ladies_sampler(seed, batch_nodes, samp_num_list, num_nodes, lap_matrix, depth):
    np.random.seed(seed)
    previous_nodes = batch_nodes.cpu().numpy()
    adjs = []
    for d in range(depth):
        U = lap_matrix[previous_nodes, :]
        pi = np.array(np.sum(U.multiply(U), axis=0))[0]
        p = pi / np.sum(pi)
        s_num = np.min([np.sum(p > 0), samp_num_list[d]])
        after_nodes = np.random.choice(num_nodes, s_num, p=p, replace=False)
        after_nodes = np.unique(np.concatenate((after_nodes, batch_nodes.cpu().numpy())))
        adj = U[:, after_nodes].multiply(1 / p[after_nodes])
        adj = row_normalize(adj)
        adjs.append(sparse_mx_to_torch_sparse_tensor(adj))
        previous_nodes = after_nodes
    adjs.reverse()
    return adjs, torch.tensor(previous_nodes, device=device), batch_nodes

# ------------------- Model -------------------
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers):
        super().__init__()
        self.convs = nn.ModuleList()
        self.convs.append(nn.Linear(in_channels, hidden_channels))
        for _ in range(num_layers - 2):
            self.convs.append(nn.Linear(hidden_channels, hidden_channels))
        self.convs.append(nn.Linear(hidden_channels, out_channels))

    def forward(self, x, adjs):
        for i, (conv, adj) in enumerate(zip(self.convs[:-1], adjs)):
            x = conv(x)
            x = torch.sparse.mm(adj, x)
            x = F.relu(x)
        x = self.convs[-1](x)
        return x

# ------------------- Training -------------------
model = GCN(num_features, 64, num_classes, num_layers=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

train_nodes = torch.where(data.train_mask)[0]
valid_nodes = torch.where(data.val_mask)[0]
labels = data.y
features = data.x
batch_size = 128
samp_num_list = [64, 64]
depth = len(samp_num_list)
start_time = time.time()

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()

    idx = torch.randperm(train_nodes.size(0), device=device)[:batch_size]
    batch_nodes = train_nodes[idx]

    adjs, input_nodes, output_nodes = ladies_sampler(
        seed=np.random.randint(0, 100000),
        batch_nodes=batch_nodes,
        samp_num_list=samp_num_list,
        num_nodes=data.num_nodes,
        lap_matrix=lap_matrix,
        depth=depth
    )

    out = model(features[input_nodes], adjs)
    loss = criterion(out[output_nodes], labels[output_nodes])

    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
        acc, f1_micro = evaluate(model, features, [full_adj]*depth, labels, valid_nodes)
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val Acc: {acc:.4f} | F1-micro: {f1_micro:.4f}")


end_time = time.time()
test_nodes = torch.where(data.test_mask)[0]
full_adj = sparse_mx_to_torch_sparse_tensor(row_normalize(adj + sp.eye(adj.shape[0])))
test_acc, test_f1 = evaluate(model, features, [full_adj]*depth, labels, test_nodes)

print(f"Test Accuracy: {test_acc:.4f} | Test F1-micro: {test_f1:.4f}")`