In [1]:
import mgclient
import torch

conn = mgclient.connect(host="localhost", port=7687)
cursor = conn.cursor()

subject_map = {
    'Case_Based': 0,
    'Genetic_Algorithms': 1,
    'Neural_Networks': 2,
    'Probabilistic_Methods': 3,
    'Reinforcement_Learning': 4,
    'Rule_Learning': 5,
    'Theory': 6
}

cursor.execute("MATCH (n) RETURN n.features, n.subject")
r_node_properties = cursor.fetchall()
x = torch.tensor([x[0] for x in r_node_properties], dtype=torch.float32)
y = torch.tensor([subject_map.get(x[1], 7) for x in r_node_properties])

cursor.execute("MATCH (n)-[r]->(m) RETURN n.id, type(r), m.id")
r_edge_index = cursor.fetchall()
edge_index = torch.tensor([(n1, n2) for (n1, r, n2) in r_edge_index], dtype=torch.long).t().contiguous()

In [2]:
edge_index

tensor([[  31349,  686532, 1129442,  ...,  928873,   15076, 1111265],
        [  31336,   31336,   31336,  ...,   24043,   24043,   24043]])

In [3]:
unique_values, indices_edge_index = torch.unique(edge_index, return_inverse=True)

print(indices_edge_index)

tensor([[ 463, 1802, 2390,  ..., 1886,  278, 2094],
        [ 462,  462,  462,  ...,  376,  376,  376]])


In [4]:
from torch_geometric.data import Data
from torch_geometric.transforms import RandomNodeSplit
from torch_geometric.transforms import ToUndirected

data = Data(x=x, y=y, edge_index=indices_edge_index)
data = RandomNodeSplit('random', num_train_per_class=20)(data)
data = ToUndirected()(data)

In [5]:
sum(data.train_mask), sum(data.val_mask), sum(data.test_mask)

(tensor(140), tensor(500), tensor(1000))

In [6]:
from torch_geometric.datasets import Planetoid

dataset = Planetoid(root='/tmp/Cora', name='Cora')

In [7]:
from torch_geometric.nn import Node2Vec
from torch.optim import SparseAdam

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = Node2Vec(
    data.edge_index, 
    embedding_dim=128, 
    walk_length=20,
    context_size=10, 
    walks_per_node=10, 
    num_negative_samples=1,
    sparse=True
    ).to(device)

loader = model.loader(batch_size=128, shuffle=True, num_workers=8)
optimizer = SparseAdam(model.parameters(), lr=0.01)

In [8]:
for epoch in range(1, 11):
    model.train()

    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print('Epoch: {:02d}, Loss: {:.4f}'.format(epoch, total_loss / len(loader)))

Epoch: 01, Loss: 8.1461
Epoch: 02, Loss: 6.0581
Epoch: 03, Loss: 4.9558
Epoch: 04, Loss: 4.1438
Epoch: 05, Loss: 3.4827
Epoch: 06, Loss: 2.9640
Epoch: 07, Loss: 2.5497
Epoch: 08, Loss: 2.2196
Epoch: 09, Loss: 1.9519
Epoch: 10, Loss: 1.7411
