In [1]:

import os

import torch

from torch_geometric.nn import Node2Vec

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device


'cuda'

In [2]:
from scipy.spatial import distance

In [3]:
def adjacency_to_edge_index(file):
    edge_index = [[], []]
    for indx, line in enumerate(file):
        for edge in line.split(","):
            edge_index[0].append(indx)
            edge_index[1].append(int(edge))
    
    return edge_index

In [4]:
dataset_dir = os.path.join(
    os.getenv("CVF_PROJECT_DIR", ""), "cvf-analysis", "v2", "datasets", "coloring"
)
# edge_index_file = "tiny_graph_test_pt_adj_list.txt"
# edge_index_file = "small_graph_test_pt_adj_list.txt"
edge_index_file = "graph_1_pt_adj_list.txt"

f = open(os.path.join(dataset_dir, edge_index_file), "r")
edge_index = torch.tensor(adjacency_to_edge_index(f)).to(device)


In [5]:
edge_index

tensor([[   0,    0,    0,  ..., 5117, 5118, 5119],
        [ 512,  256,  128,  ..., 5117, 5118, 5119]], device='cuda:0')

In [6]:
model = Node2Vec(
    edge_index,
    embedding_dim=64,
    walks_per_node=10,
    walk_length=10,
    context_size=10,
    p=1.0,
    q=1.0,
    num_negative_samples=1,
).to(device)


In [7]:
loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)


def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

In [8]:
# @torch.no_grad()
# def test():
#     model.eval()
#     z = model()
#     acc = model.test(
#         train_z=z[data.train_mask],
#         train_y=data.y[data.train_mask],
#         test_z=z[data.test_mask],
#         test_y=data.y[data.test_mask],
#         max_iter=150,
#     )
#     return acc

In [9]:
for epoch in range(1, 101):
    loss = train()
    # acc = test()
    # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}')
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')


Epoch: 001, Loss: 4.0315
Epoch: 002, Loss: 3.5715
Epoch: 003, Loss: 3.1638
Epoch: 004, Loss: 2.8416
Epoch: 005, Loss: 2.5700
Epoch: 006, Loss: 2.3328
Epoch: 007, Loss: 2.1249
Epoch: 008, Loss: 1.9375
Epoch: 009, Loss: 1.7866
Epoch: 010, Loss: 1.6458
Epoch: 011, Loss: 1.5267
Epoch: 012, Loss: 1.4216
Epoch: 013, Loss: 1.3238
Epoch: 014, Loss: 1.2437
Epoch: 015, Loss: 1.1748
Epoch: 016, Loss: 1.1138
Epoch: 017, Loss: 1.0601
Epoch: 018, Loss: 1.0195
Epoch: 019, Loss: 0.9786
Epoch: 020, Loss: 0.9475
Epoch: 021, Loss: 0.9193
Epoch: 022, Loss: 0.8965
Epoch: 023, Loss: 0.8805
Epoch: 024, Loss: 0.8654
Epoch: 025, Loss: 0.8541
Epoch: 026, Loss: 0.8415
Epoch: 027, Loss: 0.8330
Epoch: 028, Loss: 0.8258
Epoch: 029, Loss: 0.8181
Epoch: 030, Loss: 0.8115
Epoch: 031, Loss: 0.8083
Epoch: 032, Loss: 0.8049
Epoch: 033, Loss: 0.8020
Epoch: 034, Loss: 0.7988
Epoch: 035, Loss: 0.7958
Epoch: 036, Loss: 0.7929
Epoch: 037, Loss: 0.7906
Epoch: 038, Loss: 0.7891
Epoch: 039, Loss: 0.7865
Epoch: 040, Loss: 0.7846


In [10]:
model()

Parameter containing:
tensor([[-0.0207, -0.1132, -0.0707,  ...,  0.4755,  0.4100, -0.2593],
        [ 0.1342, -0.1445,  0.0655,  ...,  0.2325,  0.4112, -0.4656],
        [-0.1490, -0.0810, -0.1624,  ...,  0.2866,  0.3027,  0.0756],
        ...,
        [-0.0076,  0.0551,  0.2964,  ..., -0.0704, -0.6598,  0.0297],
        [-0.3267, -0.1770,  0.2828,  ..., -0.2829,  0.1600, -0.1809],
        [-0.2600, -0.4171, -0.1889,  ...,  0.3057, -0.5799,  0.0369]],
       device='cuda:0', requires_grad=True)

In [16]:
indx1 = 0
indx2 = 4

model.eval()
params = model().cpu().detach().numpy()
cosine_distance = distance.cosine(params[indx1], params[indx2])
cosine_distance

0.6629246499697329