In [1]:

import os

import torch

from torch_geometric.nn import Node2Vec

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device


'cuda'

In [2]:
from scipy.spatial import distance

In [3]:
def adjacency_to_edge_index(file):
    edge_index = [[], []]
    for indx, line in enumerate(file):
        for edge in line.split(","):
            edge_index[0].append(indx)
            edge_index[1].append(int(edge))
    
    return edge_index

In [4]:
dataset_dir = os.path.join(
    os.getenv("CVF_PROJECT_DIR", ""), "cvf-analysis", "v2", "datasets", "coloring"
)
# edge_index_file = "tiny_graph_test_pt_adj_list.txt"
# edge_index_file = "small_graph_test_pt_adj_list.txt"
edge_index_file = "graph_1_pt_adj_list.txt"

f = open(os.path.join(dataset_dir, edge_index_file), "r")
edge_index = torch.tensor(adjacency_to_edge_index(f)).to(device)


In [5]:
edge_index

tensor([[   0,    0,    0,  ..., 5117, 5118, 5119],
        [ 512,  256,  128,  ..., 5117, 5118, 5119]], device='cuda:0')

In [None]:
model = Node2Vec(
    edge_index,
    embedding_dim=4,
    walks_per_node=5,
    walk_length=5,
    context_size=3,
    p=1.0,
    q=1.0,
    num_negative_samples=1,
).to(device)


In [7]:
loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)


def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

In [8]:
# @torch.no_grad()
# def test():
#     model.eval()
#     z = model()
#     acc = model.test(
#         train_z=z[data.train_mask],
#         train_y=data.y[data.train_mask],
#         test_z=z[data.test_mask],
#         test_y=data.y[data.test_mask],
#         max_iter=150,
#     )
#     return acc

In [9]:
for epoch in range(1, 101):
    loss = train()
    # acc = test()
    # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}')
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')


Epoch: 001, Loss: 1.2312
Epoch: 002, Loss: 1.1926
Epoch: 003, Loss: 1.1629
Epoch: 004, Loss: 1.1437
Epoch: 005, Loss: 1.1252
Epoch: 006, Loss: 1.1079
Epoch: 007, Loss: 1.0962
Epoch: 008, Loss: 1.0872
Epoch: 009, Loss: 1.0775
Epoch: 010, Loss: 1.0690
Epoch: 011, Loss: 1.0637
Epoch: 012, Loss: 1.0563
Epoch: 013, Loss: 1.0521
Epoch: 014, Loss: 1.0504
Epoch: 015, Loss: 1.0456
Epoch: 016, Loss: 1.0426
Epoch: 017, Loss: 1.0419
Epoch: 018, Loss: 1.0374
Epoch: 019, Loss: 1.0351
Epoch: 020, Loss: 1.0354
Epoch: 021, Loss: 1.0328
Epoch: 022, Loss: 1.0333
Epoch: 023, Loss: 1.0355
Epoch: 024, Loss: 1.0312
Epoch: 025, Loss: 1.0318
Epoch: 026, Loss: 1.0311
Epoch: 027, Loss: 1.0300
Epoch: 028, Loss: 1.0279
Epoch: 029, Loss: 1.0294
Epoch: 030, Loss: 1.0305
Epoch: 031, Loss: 1.0282
Epoch: 032, Loss: 1.0279
Epoch: 033, Loss: 1.0270
Epoch: 034, Loss: 1.0281
Epoch: 035, Loss: 1.0285
Epoch: 036, Loss: 1.0275
Epoch: 037, Loss: 1.0281
Epoch: 038, Loss: 1.0267
Epoch: 039, Loss: 1.0293
Epoch: 040, Loss: 1.0300


In [10]:
model()

Parameter containing:
tensor([[-0.1518, -0.2566],
        [ 0.0505, -0.0944],
        [-0.0475, -0.0400],
        ...,
        [-0.6885,  1.1569],
        [ 0.4284,  1.2810],
        [ 0.3399, -1.1968]], device='cuda:0', requires_grad=True)

In [32]:
indx1 = 1140
indx2 = 593

model.eval()
params = model().cpu().detach().numpy()
cosine_distance = distance.cosine(params[indx1], params[indx2])
cosine_distance

1.9824907842560924