In [None]:
import mgclient
import torch

conn = mgclient.connect(host="localhost", port=7687)
cursor = conn.cursor()

subject_map = {
    'Case_Based': 0,
    'Genetic_Algorithms': 1,
    'Neural_Networks': 2,
    'Probabilistic_Methods': 3,
    'Reinforcement_Learning': 4,
    'Rule_Learning': 5,
    'Theory': 6
}

cursor.execute("MATCH (n) RETURN n.features, n.subject")
r_node_properties = cursor.fetchall()
x = torch.tensor([x[0] for x in r_node_properties], dtype=torch.float32)
y = torch.tensor([subject_map.get(x[1], 7) for x in r_node_properties])

cursor.execute("MATCH (n)-[r]->(m) RETURN n.id, type(r), m.id")
r_edge_index = cursor.fetchall()
edge_index = torch.tensor([(n1, n2) for (n1, r, n2) in r_edge_index], dtype=torch.long).t().contiguous()

In [None]:
edge_index

In [None]:
unique_values, indices_edge_index = torch.unique(edge_index, return_inverse=True)

print(indices_edge_index)

In [None]:
from torch_geometric.data import Data
from torch_geometric.transforms import RandomNodeSplit

data = Data(x=x, y=y, edge_index=indices_edge_index)
data = RandomNodeSplit('random', num_train_per_class=20)(data)

In [None]:
sum(data.train_mask), sum(data.val_mask), sum(data.test_mask)

In [None]:
from torch_geometric.nn import Node2Vec
from torch.optim import SparseAdam

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = Node2Vec(
    data.edge_index, 
    embedding_dim=256, 
    walk_length=5,
    context_size=5, 
    walks_per_node=30, 
    num_negative_samples=1,
    sparse=True
    ).to(device)

loader = model.loader(batch_size=128, shuffle=True, num_workers=8)
optimizer = SparseAdam(model.parameters(), lr=0.01)

In [None]:
for epoch in range(1, 101):
    model.train()

    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print('Epoch: {:02d}, Loss: {:.4f}'.format(epoch, total_loss / len(loader)))

In [None]:
from sklearn.manifold import TSNE

model.eval()

y = data.y.cpu().numpy()
embedding = model(torch.arange(data.num_nodes, device=device))
embedding_2 = TSNE(n_components=2).fit_transform(embedding.detach().numpy())

In [None]:
from matplotlib import pyplot as plt

plt.style.use('fivethirtyeight')

@torch.no_grad()
def output_vis():
    for i in range(7):
        output_dim_1 = embedding_2[y == i, 0]
        output_dim_2 = embedding_2[y == i, 1]
        label = list(subject_map.keys())[i]
        
        plt.scatter(output_dim_1, output_dim_2, label=label, s=15)

    plt.legend(loc=2, prop={'size': 7})
    plt.show()

output_vis()