In [1]:

import os

import torch

from torch_geometric.nn import Node2Vec

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device


'cuda'

In [2]:
from scipy.spatial import distance

In [3]:
def adjacency_to_edge_index(file):
    edge_index = [[], []]
    for indx, line in enumerate(file):
        for edge in line.rstrip().split(" "):
            edge_index[0].append(indx)
            edge_index[1].append(int(edge))
    
    return edge_index

In [4]:
# dataset_dir = os.path.join(
#     os.getenv("CVF_PROJECT_DIR", ""), "cvf-analysis", "v2", "datasets", "coloring"
# )
# # edge_index_file = "tiny_graph_test_pt_adj_list.txt"
# # edge_index_file = "small_graph_test_pt_adj_list.txt"
# edge_index_file = "graph_1_pt_adj_list.txt"

dataset_dir = os.path.join(
    os.getenv("CVF_PROJECT_DIR", ""), "cvf-analysis", "v2", "datasets", "coloring"
)

edge_index_file = "graph_1_adj_list.txt"

f = open(os.path.join(dataset_dir, edge_index_file), "r")
edge_index = torch.tensor(adjacency_to_edge_index(f)).to(device)


In [5]:
edge_index

tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
       device='cuda:0')

In [6]:
model = Node2Vec(
    edge_index,
    embedding_dim=2,
    walks_per_node=10,
    walk_length=10,
    context_size=5,
    p=1.0,
    q=1.0,
    num_negative_samples=5,
).to(device)


In [7]:
loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)


def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

In [None]:
# @torch.no_grad()
# def test():
#     model.eval()
#     z = model()
#     acc = model.test(
#         train_z=z[data.train_mask],
#         train_y=da,0.0ta.y[data.train_mask],
#         test_z=z[data.test_mask],
#         test_y=data.y[data.test_mask],
#         max_iter=150,
#     )
#     return acc

In [9]:
for epoch in range(1, 101):
    loss = train()
    # acc = test()
    # print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}')
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')


Epoch: 001, Loss: 1.8301
Epoch: 002, Loss: 1.7986
Epoch: 003, Loss: 1.8182
Epoch: 004, Loss: 1.8049
Epoch: 005, Loss: 1.7554
Epoch: 006, Loss: 1.7510
Epoch: 007, Loss: 1.7451
Epoch: 008, Loss: 1.7341
Epoch: 009, Loss: 1.6605
Epoch: 010, Loss: 1.7295
Epoch: 011, Loss: 1.6544
Epoch: 012, Loss: 1.6771
Epoch: 013, Loss: 1.6356
Epoch: 014, Loss: 1.6422
Epoch: 015, Loss: 1.6717
Epoch: 016, Loss: 1.5794
Epoch: 017, Loss: 1.5810
Epoch: 018, Loss: 1.6151
Epoch: 019, Loss: 1.6098
Epoch: 020, Loss: 1.6170
Epoch: 021, Loss: 1.6153
Epoch: 022, Loss: 1.5137
Epoch: 023, Loss: 1.5508
Epoch: 024, Loss: 1.5301
Epoch: 025, Loss: 1.5061
Epoch: 026, Loss: 1.5414
Epoch: 027, Loss: 1.5009
Epoch: 028, Loss: 1.5051
Epoch: 029, Loss: 1.5000
Epoch: 030, Loss: 1.4864
Epoch: 031, Loss: 1.5054
Epoch: 032, Loss: 1.4814
Epoch: 033, Loss: 1.4717
Epoch: 034, Loss: 1.4686
Epoch: 035, Loss: 1.4421
Epoch: 036, Loss: 1.4553
Epoch: 037, Loss: 1.4444
Epoch: 038, Loss: 1.4503
Epoch: 039, Loss: 1.4224
Epoch: 040, Loss: 1.4497


In [10]:
model()

Parameter containing:
tensor([[-1.5354,  0.2052],
        [-1.5025,  0.5224],
        [-0.1541,  0.2071],
        [ 0.1569,  0.1130],
        [-0.2320, -0.2065],
        [-0.3274,  0.7681],
        [ 0.1755, -0.8286],
        [-0.0501,  0.0788],
        [ 0.0115,  0.1823],
        [-0.3455, -0.4390]], device='cuda:0', requires_grad=True)

In [11]:
indx1 = 0
indx2 = 6

model.eval()
params = model().cpu().detach().numpy()
cosine_distance = distance.cosine(params[indx1], params[indx2])
cosine_distance

1.334983280343413