In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch

from torch_geometric.nn import Node2Vec
from torch_geometric.utils.convert import from_networkx

from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

In [None]:
def make_deterministic(random_seed = 123):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)

make_deterministic()

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"target device: {device}")

In [None]:
from misc.loader import get_lazega_network
import networkx as nx
nx_net = get_lazega_network().layers["friendship"]

no_data_graph = nx.DiGraph() # change to Graph() if your graph is undirected
no_data_graph.add_nodes_from(nx_net.nodes())
no_data_graph.add_edges_from(nx_net.edges())

tg_net = from_networkx(no_data_graph, None, None)

print("edges: ", len(nx_net.edges()), "nodes: ", len(nx_net.nodes()))
print(np.unique(nx_net.nodes()))
print(tg_net)
print(torch.unique(tg_net.edge_index))

# torch.zeros(pt_net.num_nodes, dtype=torch.bool)
# torch.rand(10) < 0.5


In [None]:
model = Node2Vec(
    tg_net.edge_index,
    embedding_dim=64,
    walk_length=20,
    context_size=10,
    walks_per_node=10,
    num_negative_samples=1,
    p=1,
    q=1,
    sparse=False
).to(device)

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"number of trainable parameters: {params}")

loader = model.loader(batch_size=50, shuffle=True, num_workers=4)
optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

In [None]:
def train():
    model.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer.zero_grad()
        loss = model.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(1, 101):
    loss = train()
    acc = 0
    if epoch % 10 == 0:
        print(f"Epoch: {epoch:02d}, Loss: {loss:.4f}")

In [None]:
embeddings = model().cpu().detach().numpy()
embeddings.shape

In [None]:
num_clusters = 5
labels = KMeans(n_clusters=num_clusters, n_init=300).fit(embeddings)
print(np.unique(labels.labels_))

## Visualisation of embeddings

In [None]:
# obtain labels of generated clusters
embedding_labels = labels.labels_

# make a map of cluster ids and unique colors
cluster_labels = np.unique(embedding_labels)
cluster_colors = plt.cm.get_cmap("jet", len(cluster_labels))
color_map = {cl: cluster_colors(idx) for idx, cl in enumerate(cluster_labels)}

# assign collor to each node of the network
embedding_colors = np.array(np.vectorize(color_map.get)(embedding_labels)).T

In [None]:
def plot_embeddings(embedding_points, embedding_labels, cluster_color_map):
    emb_pts_reduced = TSNE(n_components = 2).fit_transform(embedding_points)
    plt.figure(figsize=(10, 8))
    for cluster_id in cluster_color_map.keys():
        plt.scatter(
            emb_pts_reduced[embedding_labels == cluster_id, 0],
            emb_pts_reduced[embedding_labels == cluster_id, 1],
            s=20,
            color=cluster_color_map[cluster_id],
        )
    plt.axis("off")
    plt.show()

plot_embeddings(embeddings, embedding_labels, color_map)

In [None]:
def plot_network(network, color_map):
    plt.figure(figsize=(10, 8))
    nx.draw_spring(network, node_size=30, arrows=False, node_color=color_map)
    plt.show()

plot_network(nx_net, embedding_colors)
