**Node2Vec: Pretraining Embeddings for Spatial Embeddings**

In [15]:
import os.path as osp
import sys
import numpy as np
from load_data import DataLoader

import matplotlib.pyplot as plt
import torch
from sklearn.manifold import TSNE

from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec

path = "../data/"

dataset = DataLoader(baseline=False, train_test_split=0.8)
dataset.load_data()
print(dataset.train[0])
print(dataset.test[0])


Data(x=[325, 2, 12], edge_index=[2, 2694], edge_attr=[2694], y=[325, 2, 12])
Data(x=[325, 2, 12], edge_index=[2, 2694], edge_attr=[2694], y=[325, 2, 12])


In [34]:
data = dataset.train[0]
data_test = dataset.test[0]

def create_mask(data, ratio):
    mask = np.zeros(len(data.edge_index), dtype=bool)
    num_samples = int(len(data.edge_index) * ratio)
    indices = np.random.choice(len(data.edge_index), size=num_samples, replace=False)
    mask[indices] = True
    return mask

train_ratio = 0.8
test_ratio = 0.2

train_mask = create_mask(data, train_ratio)
test_mask = create_mask(data, test_ratio)


device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Node2Vec(
    data.edge_index,
    embedding_dim=64,
    walk_length=80,
    context_size=10,
    walks_per_node=80,
    num_negative_samples=1,
    p=1.0,
    q=1.0,
    sparse=True,
).to(device)

num_workers = 4 if sys.platform == 'linux' else 0
loader = model.loader(batch_size=128, shuffle=True, num_workers=num_workers)
optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

@torch.no_grad()
def test():
    model.eval()
    z = model()
    print(z[train_mask].shape)
    print(data.y[train_mask].shape)
    # y_train_reshaped = data.y[train_mask].reshape(((data.y[train_mask].shape[0],data.y[train_mask].shape[1]*data.y[train_mask].shape[2])))
    # y_test_reshaped = data.y[test_mask].reshape(((data.y[test_mask].shape[0],data.y[test_mask].shape[1]*data.y[test_mask].shape[2])))
    y_train_reshaped = data.y[train_mask].flatten()
    y_test_reshaped = data.y[test_mask].flatten()
    acc = model.test(
        train_z=z[train_mask],
        train_y=y_train_reshaped,
        test_z=z[test_mask],
        test_y=y_test_reshaped,
        max_iter=10,
    )
    return acc


for epoch in range(1, 10):
    loss = train()
    # acc = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

Epoch: 001, Loss: 5.5056
Epoch: 002, Loss: 4.8848
Epoch: 003, Loss: 4.2975
Epoch: 004, Loss: 3.8227
Epoch: 005, Loss: 3.4731


In [25]:
# save embeddings to file
model.eval()
z = model().detach().numpy()
z = TSNE(n_components=2).fit_transform(z)
y = data.y.cpu().numpy()
np.save('embeddings.npy', z)

In [26]:
embed = np.load('embeddings.npy')
print(embed)

[[-9.1225064e-01 -1.9715682e+00]
 [-1.0490501e+00 -2.9864621e+00]
 [-1.2788947e-01 -3.0772957e-01]
 [ 1.8967636e+00  3.8060780e+00]
 [ 2.1169746e-01 -1.1647007e+00]
 [-2.7308071e-01 -4.3984351e+00]
 [-3.2505026e+00 -1.4962916e+00]
 [ 2.1576391e-01  4.6761475e+00]
 [-2.3654196e+00  1.6467147e+00]
 [ 1.1878657e+00 -4.2408261e+00]
 [ 2.6954250e+00  3.3188729e+00]
 [-1.1838104e+00 -4.0710993e+00]
 [ 9.6199691e-01 -7.6095909e-01]
 [ 2.2327115e-01  1.2942369e+00]
 [-6.3916963e-01 -8.5321164e-01]
 [-7.1245171e-02  1.3850993e+00]
 [ 1.5459650e+00  2.6340703e-02]
 [-1.6878901e+00  2.4687309e+00]
 [-2.8035355e+00 -1.3945698e+00]
 [ 9.4565916e-01 -2.8903973e-01]
 [-3.9831150e+00 -2.6723046e+00]
 [ 1.5625806e+00 -1.8684888e+00]
 [-1.1956438e-01  2.0055354e+00]
 [-2.4236634e+00  9.8866194e-02]
 [-1.5407826e+00 -1.2890596e+00]
 [ 3.2737122e+00  3.1178802e-01]
 [-5.6716621e-01 -1.1660002e-01]
 [-5.2352130e-01 -2.1201630e+00]
 [ 1.9861468e+00  1.2114702e+00]
 [ 1.5165595e+00 -3.3582988e+00]
 [-3.80678