In [1]:
import os
import sys
import pickle
import random

import networkx as nx
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.loader import NeighborLoader
from torch_geometric.nn import SAGEConv
from torch_geometric.utils import to_undirected
from torch_geometric.utils.convert import from_networkx

# Add "src" path to Python path
sys.path.append(os.path.abspath("../src"))

# Import custom graph formatting function
from graph_formatting_utils import format_graph_for_graphsage
from models import GraphSAGE
from losses import unsupervised_loss
from train import train_in_cpu


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\Usuario\pyenvs\python311_graphsage\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\Usuario\pyenvs\python311_graphsage\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\Usuario\pyenvs\python311_graphsage\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_lo

In [2]:
# Check CUDA status
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")
print("CUDA Device Count:", torch.cuda.device_count())

CUDA Available: False
GPU Name: No GPU detected
CUDA Device Count: 0


In [3]:
# This should be removed from this notebook
with open("../data/multihop_graph_w_sem_embeddings.pkl", "rb") as f:
    G = pickle.load(f)

cleaned_G = format_graph_for_graphsage(G, embedding_dim=1024)

In [5]:
# Convert the NetworkX graph to a PyTorch Geometric Data object
data = from_networkx(cleaned_G)

# Ensure the graph is undirected
data.edge_index = to_undirected(data.edge_index)

# Create data attribute "x" containing the embeddings of each node complying with the PyTorch Geometric API
data.x = data.embedding

# Instantiate the GraphSAGE model
model = GraphSAGE(
    in_channels=1024,   # Input features (BGE-M3 embeddings)
    hidden_channels=512,  # First hidden layer (alto para máxima capacidad)
    out_channels=256,   # Output embeddings (más ricos)
    num_layers=2        # Mantenemos 2 capas (2 hops)
)

# Set device for model training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the device
model = model.to(device)

# Instantiate the NeighborLoader for mini-batch training
train_loader = NeighborLoader(
    data,
    num_neighbors=[25, 15],  # 25 neighbors for the first layer, 15 for the second
    batch_size=512,  # Batch size
    shuffle=True
)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Define scaler if GPU is available
scaler = torch.cuda.amp.GradScaler() if torch.cuda.is_available() else None

In [None]:
if torch.cuda.is_available():
    train_in_cpu(model, train_loader, optimizer, num_epochs=5, loss_fn=unsupervised_loss, debug=True)

elif torch.cuda.is_available():
    pass # Not yet implemented


In [None]:
train_in_cpu(model, train_loader, optimizer, num_epochs=5, loss_fn=unsupervised_loss, debug=True)

Training epochs:   0%|          | 0/5 [00:00<?, ?it/s]

[DEBUG] Batch shapes - batch.x: torch.Size([2773, 1024]), batch.edge_index: torch.Size([2, 8382])
[DEBUG] Model output (z) shape: torch.Size([2773, 256])
[DEBUG] First 5 embeddings: tensor([[-0.0101, -0.4896,  0.1358,  ..., -0.7491,  0.0576,  0.1241],
        [ 0.0436, -0.3235, -0.1622,  ..., -0.5725, -0.0701, -0.1153],
        [ 0.1624, -0.2986, -0.0804,  ..., -0.6393, -0.1481, -0.5903],
        [ 0.3485, -0.3122, -0.0451,  ..., -0.5101,  0.2627, -0.1359],
        [ 0.2253,  0.0853, -0.2239,  ..., -0.7649,  0.4128, -0.1385]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 25.5706




[DEBUG] Accumulated loss so far: 25.5706
[DEBUG] Batch shapes - batch.x: torch.Size([2798, 1024]), batch.edge_index: torch.Size([2, 8838])
[DEBUG] Model output (z) shape: torch.Size([2798, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.0621, -0.0660, -0.2876,  ..., -0.4607, -0.0407, -0.0907],
        [ 0.2154, -0.0383, -0.1937,  ..., -0.6689,  0.2169, -0.5168],
        [ 0.0945, -0.0185,  0.0920,  ..., -0.6374,  0.5040, -0.1033],
        [ 0.3072, -0.1978,  0.0140,  ..., -0.8093, -0.0475, -0.3139],
        [ 0.1989, -0.3348, -0.1228,  ..., -0.2762, -0.0823,  0.1049]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 14.5771




[DEBUG] Accumulated loss so far: 40.1477
[DEBUG] Batch shapes - batch.x: torch.Size([2829, 1024]), batch.edge_index: torch.Size([2, 9014])
[DEBUG] Model output (z) shape: torch.Size([2829, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.4718,  0.0713,  0.0891,  ..., -0.3726, -0.4004, -0.0769],
        [ 0.1594,  0.0526, -0.0022,  ..., -0.5512, -0.0227, -0.2478],
        [ 0.1313, -0.1231,  0.0910,  ..., -0.4452, -0.0829, -0.0795],
        [ 0.1714, -0.0217,  0.1649,  ..., -0.8307, -0.2780, -0.3727],
        [ 0.2161, -0.2287,  0.0990,  ..., -0.4482, -0.1093, -0.0223]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 7.8850




[DEBUG] Accumulated loss so far: 48.0328
[DEBUG] Batch shapes - batch.x: torch.Size([2854, 1024]), batch.edge_index: torch.Size([2, 8864])
[DEBUG] Model output (z) shape: torch.Size([2854, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.5466, -0.0970,  0.1330,  ..., -0.4868, -0.5332, -0.0880],
        [ 0.2894, -0.1147,  0.0031,  ..., -0.4096, -0.4167,  0.0618],
        [ 0.1758, -0.1613, -0.0102,  ..., -0.5733, -0.3134, -0.1464],
        [ 0.3503,  0.0716,  0.0202,  ..., -0.6801, -0.2435, -0.5106],
        [-0.1137, -0.1539,  0.0058,  ..., -0.3366, -0.1248, -0.0309]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 4.5439




[DEBUG] Accumulated loss so far: 52.5766
[DEBUG] Batch shapes - batch.x: torch.Size([2890, 1024]), batch.edge_index: torch.Size([2, 8871])
[DEBUG] Model output (z) shape: torch.Size([2890, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3719, -0.2395,  0.4243,  ..., -0.4526, -0.5999, -0.0178],
        [ 0.4648,  0.1823,  0.1841,  ..., -0.4102, -0.5580, -0.0431],
        [ 0.1148,  0.0546,  0.1070,  ..., -0.5404, -0.4084, -0.4012],
        [ 0.1310,  0.1657,  0.2092,  ..., -0.7350, -0.4314, -0.2093],
        [ 0.2275, -0.0559, -0.0530,  ..., -0.4931, -0.4034, -0.3024]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 3.3183




[DEBUG] Accumulated loss so far: 55.8949
[DEBUG] Batch shapes - batch.x: torch.Size([2852, 1024]), batch.edge_index: torch.Size([2, 8660])
[DEBUG] Model output (z) shape: torch.Size([2852, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.1374, -0.0617,  0.2583,  ..., -0.6804,  0.1509, -0.5688],
        [ 0.0921, -0.1647,  0.4435,  ..., -0.6013, -0.6714, -0.0429],
        [-0.1905, -0.2266,  0.4538,  ..., -0.4621, -0.4210,  0.2639],
        [ 0.1401,  0.3312,  0.1466,  ..., -0.2685, -0.5533, -0.4255],
        [ 0.2251, -0.0286,  0.0260,  ..., -0.3888, -0.3370, -0.1145]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.9067




[DEBUG] Accumulated loss so far: 58.8016
[DEBUG] Batch shapes - batch.x: torch.Size([2406, 1024]), batch.edge_index: torch.Size([2, 6183])
[DEBUG] Model output (z) shape: torch.Size([2406, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.0459, -0.0556,  0.2159,  ..., -0.5213,  0.0624, -0.2174],
        [ 0.2345, -0.0442,  0.1402,  ..., -0.2845, -0.3533, -0.1663],
        [ 0.3217, -0.1812,  0.2958,  ..., -0.4938, -0.2418, -0.0506],
        [ 0.0911,  0.1471,  0.2066,  ..., -0.1141, -0.6457,  0.1680],
        [-0.0434,  0.0999,  0.2399,  ..., -0.5188, -0.1741, -0.1716]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 3.7911


Training epochs:  20%|██        | 1/5 [06:39<26:38, 399.60s/it, avg_loss=8.9418, mean_norm=3.4058, std_norm=1.0688]

[DEBUG] Accumulated loss so far: 62.5927
[DEBUG] Number of embeddings collected: 7
[DEBUG] Epoch embeddings shape: torch.Size([19402, 256])
[DEBUG] Embeddings norms statistics -> min: 0.4390, max: 6.4891, mean: 3.4058, std: 1.0688




[DEBUG] Batch shapes - batch.x: torch.Size([2806, 1024]), batch.edge_index: torch.Size([2, 8644])
[DEBUG] Model output (z) shape: torch.Size([2806, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.0470,  0.2023,  0.0781,  ..., -0.6321, -0.3384, -0.2242],
        [ 0.0562,  0.0536,  0.3626,  ..., -0.2599, -0.3160, -0.0556],
        [ 0.2222,  0.2649, -0.1964,  ..., -0.2492, -0.2320, -0.0896],
        [ 0.1363, -0.0483,  0.1352,  ..., -0.6678, -0.2166, -0.4793],
        [ 0.2180,  0.0561,  0.0741,  ..., -0.4968, -0.1927, -0.4303]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.5357




[DEBUG] Accumulated loss so far: 2.5357
[DEBUG] Batch shapes - batch.x: torch.Size([2806, 1024]), batch.edge_index: torch.Size([2, 8591])
[DEBUG] Model output (z) shape: torch.Size([2806, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.1985, -0.0076,  0.1607,  ..., -0.0387, -0.2682,  0.0510],
        [-0.0918,  0.1029,  0.5126,  ..., -0.4833, -0.0418,  0.0230],
        [ 0.2078,  0.1090,  0.1198,  ..., -0.6740, -0.3909, -0.3028],
        [-0.0549,  0.0479,  0.1723,  ..., -0.3135, -0.2090, -0.3726],
        [ 0.0584,  0.0595,  0.3689,  ..., -0.2239, -0.5520, -0.0312]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.4742




[DEBUG] Accumulated loss so far: 5.0099
[DEBUG] Batch shapes - batch.x: torch.Size([2845, 1024]), batch.edge_index: torch.Size([2, 8720])
[DEBUG] Model output (z) shape: torch.Size([2845, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2772,  0.1450,  0.5209,  ..., -0.1903, -0.5816,  0.0809],
        [ 0.0891,  0.1393,  0.2236,  ..., -0.3898, -0.1523, -0.3916],
        [-0.1833,  0.1204,  0.4451,  ..., -0.4590, -0.2104,  0.0066],
        [ 0.1333, -0.1049,  0.1234,  ..., -0.0477, -0.3847, -0.0121],
        [ 0.2881, -0.0772,  0.5542,  ..., -0.3071, -0.4960, -0.0125]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.3597




[DEBUG] Accumulated loss so far: 7.3696
[DEBUG] Batch shapes - batch.x: torch.Size([2879, 1024]), batch.edge_index: torch.Size([2, 8917])
[DEBUG] Model output (z) shape: torch.Size([2879, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2268,  0.0492,  0.2546,  ..., -0.3265, -0.5711, -0.1367],
        [ 0.1798,  0.2035,  0.4192,  ..., -0.2950, -0.4752, -0.1757],
        [ 0.2353,  0.1532,  0.2346,  ..., -0.4805, -0.2565, -0.4424],
        [ 0.1850, -0.1047,  0.1528,  ..., -0.1120, -0.2240, -0.2068],
        [ 0.1903,  0.0449,  0.6091,  ..., -0.1657, -0.0735, -0.6901]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.2515




[DEBUG] Accumulated loss so far: 9.6211
[DEBUG] Batch shapes - batch.x: torch.Size([2748, 1024]), batch.edge_index: torch.Size([2, 8647])
[DEBUG] Model output (z) shape: torch.Size([2748, 256])
[DEBUG] First 5 embeddings: tensor([[-0.0263,  0.2051,  0.1432,  ..., -0.5264, -0.0757, -0.5416],
        [ 0.3643,  0.1926,  0.4340,  ..., -0.4697, -0.2573, -0.5157],
        [ 0.1040, -0.1549,  0.1190,  ..., -0.3709, -0.6279, -0.2750],
        [ 0.1676,  0.2346,  0.1887,  ..., -0.5384, -0.4016, -0.2142],
        [ 0.2395,  0.0887,  0.5063,  ..., -0.5507, -0.1983, -0.4372]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.2429




[DEBUG] Accumulated loss so far: 11.8640
[DEBUG] Batch shapes - batch.x: torch.Size([2824, 1024]), batch.edge_index: torch.Size([2, 8565])
[DEBUG] Model output (z) shape: torch.Size([2824, 256])
[DEBUG] First 5 embeddings: tensor([[-0.1220,  0.1716,  0.5887,  ..., -0.2364, -0.4449, -0.3976],
        [ 0.1003,  0.2963,  0.1157,  ..., -0.5032, -0.1963, -0.4133],
        [ 0.0928,  0.0538,  0.0787,  ..., -0.1653, -0.3859, -0.4108],
        [ 0.2056,  0.0204,  0.0605,  ..., -0.1704, -0.4365, -0.3054],
        [ 0.1153,  0.1878,  0.1990,  ..., -0.5844, -0.4910, -0.3941]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.4104




[DEBUG] Accumulated loss so far: 14.2744
[DEBUG] Batch shapes - batch.x: torch.Size([2359, 1024]), batch.edge_index: torch.Size([2, 6387])
[DEBUG] Model output (z) shape: torch.Size([2359, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3541,  0.1138,  0.3918,  ..., -0.2143, -0.3387, -0.1860],
        [ 0.0600, -0.1749,  0.5074,  ..., -0.2238, -0.6953, -0.1951],
        [ 0.2597, -0.1498,  0.6086,  ..., -0.3014, -0.5990, -0.1343],
        [ 0.3109,  0.0443,  0.5209,  ..., -0.3601, -0.3076, -0.2531],
        [-0.0160, -0.1513,  0.5545,  ..., -0.3378, -0.0336,  0.0291]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.9731


Training epochs:  40%|████      | 2/5 [13:09<19:42, 394.04s/it, avg_loss=2.4639, mean_norm=3.6657, std_norm=1.4974]

[DEBUG] Accumulated loss so far: 17.2474
[DEBUG] Number of embeddings collected: 7
[DEBUG] Epoch embeddings shape: torch.Size([19267, 256])
[DEBUG] Embeddings norms statistics -> min: 0.4373, max: 7.1074, mean: 3.6657, std: 1.4974




[DEBUG] Batch shapes - batch.x: torch.Size([2885, 1024]), batch.edge_index: torch.Size([2, 8910])
[DEBUG] Model output (z) shape: torch.Size([2885, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.1922,  0.0493,  0.6151,  ..., -0.2620, -0.6148, -0.0654],
        [ 0.0585,  0.0452,  0.2979,  ..., -0.3822, -0.4804, -0.0854],
        [ 0.1152,  0.0284,  0.3339,  ..., -0.7122, -0.6350, -0.2686],
        [ 0.1766, -0.0100,  0.4114,  ..., -0.3708, -0.4349, -0.2257],
        [ 0.2152,  0.3070,  0.2967,  ..., -0.0657, -0.5135, -0.4032]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.2317




[DEBUG] Accumulated loss so far: 2.2317
[DEBUG] Batch shapes - batch.x: torch.Size([2766, 1024]), batch.edge_index: torch.Size([2, 8310])
[DEBUG] Model output (z) shape: torch.Size([2766, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2061, -0.1660,  0.4211,  ..., -0.2679, -0.1810, -0.5953],
        [ 0.3445,  0.0125,  0.3468,  ..., -0.2850, -0.4822, -0.2517],
        [ 0.2063,  0.0667,  0.2685,  ..., -0.5549, -0.4558, -0.5605],
        [ 0.0751, -0.0067,  0.0717,  ..., -0.2074, -0.5124, -0.2897],
        [ 0.0501,  0.0539, -0.0565,  ..., -0.1868, -0.8410, -0.0753]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.3331




[DEBUG] Accumulated loss so far: 4.5648
[DEBUG] Batch shapes - batch.x: torch.Size([2786, 1024]), batch.edge_index: torch.Size([2, 8534])
[DEBUG] Model output (z) shape: torch.Size([2786, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2338,  0.1606,  0.4001,  ..., -0.4662, -0.6151, -0.4738],
        [ 0.2193,  0.0864,  0.5152,  ..., -0.1697, -0.5510, -0.3216],
        [ 0.2114,  0.0993,  0.5347,  ..., -0.4274, -0.5056, -0.4078],
        [ 0.4386,  0.0636,  0.5588,  ..., -0.0753, -0.4516, -0.1543],
        [ 0.2411,  0.0446,  0.4528,  ..., -0.3935, -0.2746, -0.2295]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.1492




[DEBUG] Accumulated loss so far: 6.7141
[DEBUG] Batch shapes - batch.x: torch.Size([2750, 1024]), batch.edge_index: torch.Size([2, 8348])
[DEBUG] Model output (z) shape: torch.Size([2750, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.1617,  0.4305,  0.4720,  ..., -0.2742, -0.4756, -0.2028],
        [ 0.3053,  0.0822,  0.6215,  ..., -0.2653, -0.4871, -0.1889],
        [ 0.1339,  0.0067,  0.3717,  ..., -0.5554, -0.6573, -0.5196],
        [ 0.3784,  0.0445,  0.3911,  ..., -0.4906, -0.6839, -0.2329],
        [ 0.3523,  0.0582,  0.3530,  ..., -0.3209, -0.3444, -0.3927]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.1726




[DEBUG] Accumulated loss so far: 8.8867
[DEBUG] Batch shapes - batch.x: torch.Size([2892, 1024]), batch.edge_index: torch.Size([2, 9157])
[DEBUG] Model output (z) shape: torch.Size([2892, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3835,  0.2286,  0.3655,  ..., -0.4844, -0.6460, -0.7159],
        [ 0.2711,  0.0919,  0.3687,  ..., -0.5370, -0.0794, -0.6587],
        [ 0.2786,  0.0799,  0.3664,  ..., -0.2929, -0.6123, -0.3540],
        [ 0.2715,  0.2884,  0.3384,  ..., -0.6011, -0.3424, -0.5151],
        [ 0.3214,  0.2963,  0.4704,  ..., -0.4967, -0.4264, -0.3744]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.8960




[DEBUG] Accumulated loss so far: 10.7826
[DEBUG] Batch shapes - batch.x: torch.Size([2852, 1024]), batch.edge_index: torch.Size([2, 8886])
[DEBUG] Model output (z) shape: torch.Size([2852, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.1694,  0.1298,  0.4012,  ..., -0.5644, -0.3847, -0.6111],
        [ 0.0459, -0.2942,  0.4340,  ..., -0.3680, -0.4757, -0.2629],
        [ 0.2078,  0.2097,  0.4605,  ..., -0.4894, -0.7084, -0.7512],
        [ 0.2556, -0.0276,  0.3014,  ..., -0.3587, -0.5148, -0.6329],
        [ 0.3010, -0.0667,  0.2679,  ..., -0.7648, -0.7865, -0.6452]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.8431




[DEBUG] Accumulated loss so far: 12.6258
[DEBUG] Batch shapes - batch.x: torch.Size([2382, 1024]), batch.edge_index: torch.Size([2, 6133])
[DEBUG] Model output (z) shape: torch.Size([2382, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2537, -0.2252,  0.2051,  ..., -0.4556, -0.7526, -0.4772],
        [ 0.3726,  0.0623,  0.3618,  ..., -0.0409, -0.5009, -0.2764],
        [ 0.1556, -0.2455,  0.3042,  ..., -0.3108, -0.5647, -0.0567],
        [ 0.3227,  0.1791,  0.2187,  ..., -0.5935, -0.6921, -0.3183],
        [ 0.1611, -0.0269,  0.3571,  ..., -0.3375, -0.5286, -0.4139]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.8144


Training epochs:  60%|██████    | 3/5 [19:34<12:59, 389.98s/it, avg_loss=2.2057, mean_norm=4.2927, std_norm=2.1364]

[DEBUG] Accumulated loss so far: 15.4401
[DEBUG] Number of embeddings collected: 7
[DEBUG] Epoch embeddings shape: torch.Size([19313, 256])
[DEBUG] Embeddings norms statistics -> min: 0.4367, max: 8.0040, mean: 4.2927, std: 2.1364




[DEBUG] Batch shapes - batch.x: torch.Size([2889, 1024]), batch.edge_index: torch.Size([2, 8780])
[DEBUG] Model output (z) shape: torch.Size([2889, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2837, -0.0332,  0.0774,  ..., -0.6588, -0.8168, -0.3877],
        [ 0.2594,  0.1550,  0.2262,  ..., -0.3599, -0.3778, -0.3628],
        [ 0.1017,  0.1797,  0.5124,  ..., -0.3922, -0.5404, -0.6578],
        [ 0.1667, -0.0942,  0.3699,  ..., -0.2805, -0.5095, -0.1990],
        [ 0.1517,  0.0402,  0.3128,  ..., -0.1600, -0.3909, -0.5813]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.9579




[DEBUG] Accumulated loss so far: 1.9579
[DEBUG] Batch shapes - batch.x: torch.Size([2800, 1024]), batch.edge_index: torch.Size([2, 8638])
[DEBUG] Model output (z) shape: torch.Size([2800, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.5413,  0.0833,  0.3952,  ..., -0.5503, -0.1974, -0.6337],
        [ 0.3844,  0.0475,  0.3939,  ..., -0.2546, -0.4917, -0.4577],
        [ 0.2234, -0.1316,  0.2326,  ..., -0.3068, -0.6977, -0.3254],
        [ 0.4140, -0.0426,  0.2887,  ..., -0.2586, -0.4843, -0.3446],
        [ 0.3110,  0.0289,  0.1681,  ..., -0.3870, -0.7904, -0.3347]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.8429




[DEBUG] Accumulated loss so far: 3.8008
[DEBUG] Batch shapes - batch.x: torch.Size([2784, 1024]), batch.edge_index: torch.Size([2, 8771])
[DEBUG] Model output (z) shape: torch.Size([2784, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3537, -0.0163,  0.3220,  ..., -0.3257, -0.6041, -0.4898],
        [ 0.3427, -0.1527,  0.2004,  ..., -0.3842, -0.5421, -0.6784],
        [ 0.2279, -0.1471,  0.2607,  ..., -0.2943, -0.7689, -0.0661],
        [ 0.2902,  0.0279,  0.5068,  ..., -0.2260, -0.6080, -0.4051],
        [ 0.2588, -0.1658,  0.0357,  ..., -0.3046, -0.4985, -0.7199]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.7025




[DEBUG] Accumulated loss so far: 5.5033
[DEBUG] Batch shapes - batch.x: torch.Size([2854, 1024]), batch.edge_index: torch.Size([2, 8867])
[DEBUG] Model output (z) shape: torch.Size([2854, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2827,  0.0645,  0.2641,  ..., -0.4948, -0.6407, -0.5664],
        [ 0.2881,  0.1753,  0.4288,  ..., -0.5652, -0.6045, -0.5630],
        [ 0.2985, -0.2441,  0.1205,  ..., -0.3582, -0.7482, -0.5016],
        [ 0.3161,  0.0875,  0.3107,  ..., -0.5806, -0.3449, -0.6780],
        [ 0.3337,  0.0611,  0.4244,  ..., -0.5459, -0.6108, -0.1281]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.8153




[DEBUG] Accumulated loss so far: 7.3186
[DEBUG] Batch shapes - batch.x: torch.Size([2802, 1024]), batch.edge_index: torch.Size([2, 8800])
[DEBUG] Model output (z) shape: torch.Size([2802, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2668,  0.0871,  0.3107,  ..., -0.3322, -0.1926, -0.5892],
        [ 0.3428, -0.0014,  0.6682,  ..., -0.6321, -0.3858, -0.3626],
        [ 0.2744,  0.0833,  0.2790,  ..., -0.6371, -0.4449, -0.6226],
        [ 0.3945,  0.0615,  0.3867,  ..., -0.5298, -0.4566, -0.4558],
        [ 0.0965, -0.0627,  0.3224,  ..., -0.2823, -0.4309, -0.4407]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.6906




[DEBUG] Accumulated loss so far: 9.0092
[DEBUG] Batch shapes - batch.x: torch.Size([2804, 1024]), batch.edge_index: torch.Size([2, 8573])
[DEBUG] Model output (z) shape: torch.Size([2804, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3041,  0.0573,  0.4575,  ..., -0.5686, -0.1946, -0.5967],
        [ 0.2456,  0.1383,  0.4430,  ..., -0.1955, -0.5967, -0.4208],
        [ 0.3367, -0.1472,  0.0857,  ..., -0.1636, -0.8679, -0.5800],
        [ 0.2215,  0.2464,  0.1431,  ..., -0.6139, -0.3824, -0.5148],
        [ 0.2453, -0.0729,  0.2741,  ..., -0.4497, -0.5401, -0.5545]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.7818




[DEBUG] Accumulated loss so far: 10.7910
[DEBUG] Batch shapes - batch.x: torch.Size([2423, 1024]), batch.edge_index: torch.Size([2, 6444])
[DEBUG] Model output (z) shape: torch.Size([2423, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2191,  0.2102,  0.4886,  ..., -0.5531, -0.5146, -0.2580],
        [ 0.1924,  0.2220,  0.1624,  ..., -0.3235, -0.4914, -0.6537],
        [ 0.3377,  0.1252,  0.4028,  ..., -0.4688, -0.5944, -0.3222],
        [ 0.3292, -0.2777,  0.3547,  ..., -0.4991, -0.6519, -0.5868],
        [ 0.4829,  0.2937,  0.4211,  ..., -0.5163, -0.5608, -0.4707]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.3903


Training epochs:  80%|████████  | 4/5 [26:20<06:36, 396.20s/it, avg_loss=1.8830, mean_norm=4.7564, std_norm=2.6219]

[DEBUG] Accumulated loss so far: 13.1813
[DEBUG] Number of embeddings collected: 7
[DEBUG] Epoch embeddings shape: torch.Size([19356, 256])
[DEBUG] Embeddings norms statistics -> min: 0.4354, max: 9.1650, mean: 4.7564, std: 2.6219




[DEBUG] Batch shapes - batch.x: torch.Size([2795, 1024]), batch.edge_index: torch.Size([2, 8560])
[DEBUG] Model output (z) shape: torch.Size([2795, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3277, -0.0101,  0.4154,  ..., -0.3088, -0.5340, -0.5087],
        [ 0.3976,  0.2775,  0.2818,  ..., -0.2362, -0.4160, -0.4564],
        [ 0.3065,  0.1928,  0.2304,  ..., -0.7395, -0.1649, -0.4074],
        [ 0.1729,  0.1074,  0.1990,  ..., -0.1659, -0.5409, -0.4502],
        [ 0.1113, -0.0098,  0.2369,  ..., -0.4866, -0.4722, -0.7185]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.7487




[DEBUG] Accumulated loss so far: 1.7487
[DEBUG] Batch shapes - batch.x: torch.Size([2785, 1024]), batch.edge_index: torch.Size([2, 8340])
[DEBUG] Model output (z) shape: torch.Size([2785, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2336, -0.1675,  0.4509,  ..., -0.1411, -0.5777, -0.8033],
        [ 0.3440,  0.1273,  0.1850,  ..., -0.6093, -0.4617, -0.6069],
        [ 0.3207,  0.0521,  0.2820,  ..., -0.6605, -0.4404, -0.6414],
        [ 0.2256,  0.1049,  0.2061,  ..., -0.3810, -0.5818, -0.4300],
        [ 0.4286, -0.0547, -0.0583,  ..., -0.5105, -0.6870, -0.7705]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.8330




[DEBUG] Accumulated loss so far: 3.5817
[DEBUG] Batch shapes - batch.x: torch.Size([2930, 1024]), batch.edge_index: torch.Size([2, 8998])
[DEBUG] Model output (z) shape: torch.Size([2930, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.4096,  0.0749,  0.4121,  ..., -0.6657, -0.3841, -0.7085],
        [ 0.3127,  0.1978,  0.3142,  ..., -0.8632, -0.4962, -0.8640],
        [ 0.6495,  0.2107,  0.1181,  ..., -0.1865, -0.2582, -0.5027],
        [ 0.5478,  0.2318,  0.3735,  ..., -0.6707, -0.6845, -0.5810],
        [ 0.4532,  0.0515,  0.4222,  ..., -0.3597, -0.4222, -0.6993]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.7217




[DEBUG] Accumulated loss so far: 5.3034
[DEBUG] Batch shapes - batch.x: torch.Size([2797, 1024]), batch.edge_index: torch.Size([2, 8619])
[DEBUG] Model output (z) shape: torch.Size([2797, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.3510, -0.1994,  0.2283,  ..., -0.4365, -0.8389, -0.4603],
        [ 0.3388,  0.0074,  0.4425,  ..., -0.1792, -0.5969, -0.5290],
        [ 0.4168,  0.0289,  0.3785,  ..., -0.3978, -0.4384, -0.4695],
        [ 0.3633, -0.0554,  0.1131,  ..., -0.3959, -0.6639, -0.4465],
        [ 0.4129,  0.0451,  0.6237,  ..., -0.2535, -0.2577, -0.4858]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.6373




[DEBUG] Accumulated loss so far: 6.9407
[DEBUG] Batch shapes - batch.x: torch.Size([2716, 1024]), batch.edge_index: torch.Size([2, 8338])
[DEBUG] Model output (z) shape: torch.Size([2716, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.5027,  0.0290,  0.5306,  ..., -0.5743, -0.3836, -0.8134],
        [ 0.3461, -0.0043,  0.5708,  ..., -0.7560, -0.2998, -0.2275],
        [ 0.4561,  0.0297,  0.5090,  ..., -0.4550, -0.0548, -0.4251],
        [ 0.3477, -0.0216,  0.2632,  ..., -0.4864, -0.5904, -0.3621],
        [ 0.5156,  0.0961,  0.4605,  ..., -0.4799, -0.5555, -0.4197]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.6771




[DEBUG] Accumulated loss so far: 8.6178
[DEBUG] Batch shapes - batch.x: torch.Size([2872, 1024]), batch.edge_index: torch.Size([2, 9101])
[DEBUG] Model output (z) shape: torch.Size([2872, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.4203, -0.2545,  0.4813,  ..., -0.7020, -0.6237, -0.3962],
        [ 0.3759, -0.2042,  0.1762,  ..., -0.3605, -0.4915, -0.5231],
        [ 0.4724, -0.0147,  0.2795,  ..., -0.4084, -0.5542, -0.5146],
        [ 0.4535, -0.1075,  0.4356,  ..., -0.5183, -0.6925, -0.4122],
        [ 0.6190, -0.1299,  0.4555,  ..., -0.6974, -0.2556, -0.5016]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 1.5323




[DEBUG] Accumulated loss so far: 10.1500
[DEBUG] Batch shapes - batch.x: torch.Size([2444, 1024]), batch.edge_index: torch.Size([2, 6446])
[DEBUG] Model output (z) shape: torch.Size([2444, 256])
[DEBUG] First 5 embeddings: tensor([[ 0.2809,  0.0512,  0.5101,  ..., -0.6815, -0.4657, -0.6115],
        [ 0.4843, -0.0774,  0.4034,  ..., -0.6615, -0.3689, -0.7927],
        [ 0.2194,  0.0791,  0.3112,  ..., -0.3385, -0.4928, -0.3674],
        [ 0.7366, -0.0839,  0.3669,  ..., -0.7824, -0.4686, -0.7601],
        [ 0.2341,  0.1427,  0.5700,  ..., -0.4824, -0.4742, -0.4194]],
       grad_fn=<SliceBackward0>)
[DEBUG] Loss value for this batch: 2.2345


In [16]:
# Save embeddings as .npy file

## 1) Get the device from any parameter in the model
#device = next(model.parameters()).device
#
## 2) Now move your data.x and data.edge_index to that device:
#data_x = data.x.to(device)
#data_edge_index = data.edge_index.to(device)
#
## 3) Forward pass with torch.no_grad():
#with torch.no_grad():
#    embeddings = model(data_x, data_edge_index)
#
#embeddings_np = embeddings.cpu().numpy()
#np.save("../data/graphsage_embeddings.npy", embeddings_np)
#print("Embeddings saved to graphsage_embeddings.npy")


In [17]:
## Add Graph_SAGE embeddings to the baseline graph
#
## 1) Move data to the same device as the model
#device = next(model.parameters()).device
#data_x = data.x.to(device)
#data_edge_index = data.edge_index.to(device)
#
## 2) Obtain final embeddings from the trained model
#with torch.no_grad():
#    final_emb = model(data_x, data_edge_index)  # shape [num_nodes, embedding_dim]
#    final_emb_np = final_emb.cpu().numpy()
#
## 3) Add them back to the cleaned_G graph
#list_of_nodes = list(G.nodes())  # Must match the node ordering in data
#for i, node in enumerate(list_of_nodes):
#    # Store as a NumPy array (or you could store as a list if you prefer)
#    G.nodes[node]["SAGE_embedding"] = final_emb_np[i]
#
#print("SAGE embeddings added to G under 'SAGE_embedding' attribute.")
#

SAGE embeddings added to G under 'SAGE_embedding' attribute.


In [18]:
#with open(f"../data/multihop_graph_w_sage{num_epochs}_embeddings_1hop.pkl", "wb") as f:
#    pickle.dump(G, f)

In [19]:
## Save the model's state_dict to disk
#torch.save(model.state_dict(), f"../data/graphsage_{num_epochs}_model_1hop.pth")
#print("Model saved to graphsage_model.pth")
#

Model saved to graphsage_model.pth


In [20]:
#def get_new_node_embedding(model, new_feature, device, self_loop=True):
#    """
#    Generate an embedding for a new node using the trained GraphSAGE model.
#    
#    Parameters:
#      model (torch.nn.Module): The trained GraphSAGE model.
#      new_feature (torch.Tensor): The new node's initial features of shape (1024,).
#      device (torch.device): The device (e.g., cuda) on which the model is located.
#      self_loop (bool, default=True): If True, adds a self-loop edge (node connected to itself).
#                                      This simulates neighbor aggregation when no neighbors exist.
#    
#    Returns:
#      torch.Tensor: The new node's embedding of shape (out_channels,).
#    """
#    # Set the model to evaluation mode.
#    model.eval()
#    
#    # Move the new node's features to the correct device.
#    new_feature = new_feature.to(device)
#    
#    if self_loop:
#        # Create a self-loop edge_index. This indicates that the node is connected to itself.
#        # The edge_index tensor must have shape [2, num_edges]; here we create a single edge (0,0).
#        edge_index = torch.tensor([[0], [0]], dtype=torch.long, device=device)
#    else:
#        # Alternatively, if you prefer no edges, you can pass an empty edge_index.
#        # Note: Without a self-loop, the model may not transform the features as intended.
#        edge_index = torch.empty((2, 0), dtype=torch.long, device=device)
#    
#    # GraphSAGE expects a batch dimension, so unsqueeze new_feature to shape [1, 1024].
#    with torch.no_grad():
#        new_embedding = model(new_feature.unsqueeze(0), edge_index)
#    
#    # Remove the batch dimension to return a tensor of shape [out_channels].
#    return new_embedding.squeeze(0)
#
## Example usage:
## Assuming 'model' is your trained GraphSAGE and you have a new node feature vector of size 1024.
#new_feature = torch.randn(1024)  # Replace with the actual feature vector for the new node.
#device = next(model.parameters()).device  # Get the device from the model.
#new_node_embedding = get_new_node_embedding(model, new_feature, device, self_loop=True)
#print("New node embedding shape:", new_node_embedding.shape)
#

New node embedding shape: torch.Size([256])
