# ML Graph Embedding methods

In [9]:
#import libraries
import torch
import time
from torch_geometric.nn import Node2Vec

In [10]:
# Load the graph from the file
server_graph_data_filepath = './data/server_graph_data.pth'
graph_data = torch.load(server_graph_data_filepath)

  graph_data = torch.load(server_graph_data_filepath)


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(device)
graph_data = graph_data.to(device)

cuda


In [None]:
# Initialize the Node2Vec model
node2vec = Node2Vec(
    graph_data.edge_index,       # Edge list
    embedding_dim=3,      # Size of embeddings
    walk_length=20,        # Length of each random walk
    context_size=10,       # Window size for Skip-Gram
    walks_per_node=10,     # Number of walks per node
    num_negative_samples=1,  # Number of negative samples for Skip-Gram
    p=0.25,  # Return parameter: encourages staying close to the starting node
    q=4.0,   # In-out parameter: encourages exploring further away
    sparse=True            # Use sparse gradients for efficiency
).to(device)

# Define the optimizer
optimizer = torch.optim.SparseAdam(list(node2vec.parameters()), lr=0.01)

# Training loop
def train():
    node2vec.train()
    total_loss = 0
    loader = node2vec.loader(batch_size=8, shuffle=True)
    for pos_rw, neg_rw in loader:
        pos_rw = pos_rw.to(device)
        neg_rw = neg_rw.to(device)
        
        optimizer.zero_grad()
        loss = node2vec.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Run training for multiple epochs
for epoch in range(1, 101):
    # Save timestamp
    start = time.time()
    loss = train()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    # Save timestamp
    end = time.time()
    
    print(end - start)

# Obtain the node embeddings
node_embeddings = node2vec.embedding.weight.data

print("Node Embeddings Shape:", node_embeddings.shape)


112.99993205070496


In [13]:
node_embeddings

tensor([[-0.1104,  0.1179, -0.0875],
        [-0.0621,  0.1049, -0.2224],
        [-0.0053,  0.1343, -0.0652],
        ...,
        [-0.1721,  0.1979, -0.1593],
        [-0.1009,  0.2103, -0.1458],
        [-0.2406,  0.1375, -0.0170]], device='cuda:0')

In [14]:
node2vector_embedding_filepath = 'node2vector_embeddings.pt'
torch.save(node_embeddings, node2vector_embedding_filepath)

In [19]:
# Initialize the DeepWalk model by setting p and q to 1 (unbiased random walks)
deepwalk = Node2Vec(
    graph_data.edge_index,
    embedding_dim=64,
    walk_length=40,        # Longer walk length for DeepWalk
    context_size=10,
    walks_per_node=10,
    sparse=True
).to(device)

# Define the optimizer
optimizer = torch.optim.SparseAdam(list(deepwalk.parameters()), lr=0.01)

# Training loop
def train():
    deepwalk.train()
    total_loss = 0
    loader = deepwalk.loader(batch_size=8, shuffle=True)
    for pos_rw, neg_rw in loader:
        pos_rw = pos_rw.to(device)
        neg_rw = neg_rw.to(device)
        
        optimizer.zero_grad()
        loss = deepwalk.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Run training for multiple epochs
for epoch in range(1, 101):
    start = time.time()
    loss = train()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
        
    # Save timestamp
    end = time.time()
    
    print(end - start)

# Obtain the node embeddings
node_embeddings_deepwalk = deepwalk.embedding.weight.data

print("Node Embeddings Shape:", node_embeddings.shape)


5.163858652114868
5.252846002578735
5.350556373596191
5.3410985469818115
5.288780689239502
5.229307174682617
5.213980436325073
5.224516153335571
5.2328200340271
Epoch: 010, Loss: 0.7364
5.233344316482544
5.23323917388916
5.200892925262451
5.253555536270142
5.340257167816162
5.333372354507446
5.201008081436157
5.21733546257019
5.264699935913086
5.3019468784332275
Epoch: 020, Loss: 0.7291
5.2039642333984375
5.20359992980957
5.1994545459747314
5.242571115493774
5.2554755210876465
5.207273721694946
5.374294757843018
5.247932195663452
5.219162464141846
5.22107720375061
Epoch: 030, Loss: 0.7257
5.250631809234619
5.331761598587036
5.212571144104004
5.192092657089233
5.237618684768677
5.297112941741943
5.334291934967041
5.345263242721558
5.226499319076538
5.150882244110107
Epoch: 040, Loss: 0.7246
5.272575378417969
5.39014196395874
5.401442289352417
5.345121383666992
5.278644561767578
5.278395414352417
5.317960262298584
5.405659914016724
5.384824752807617
5.3386406898498535
Epoch: 050, Loss: 0

In [16]:
node_embeddings_deepwalk

tensor([[ 0.1174, -0.0395, -0.0959,  ..., -0.0124, -0.1057,  0.1665],
        [ 0.1506,  0.0084,  0.1439,  ..., -0.1527, -0.0970,  0.1060],
        [-0.0003,  0.0802,  0.0756,  ...,  0.0113, -0.0057,  0.0005],
        ...,
        [-0.0594,  0.0526, -0.0823,  ...,  0.0192,  0.0278,  0.1358],
        [-0.1261,  0.1444,  0.0894,  ..., -0.0533, -0.0411, -0.0324],
        [ 0.2370, -0.0750, -0.0027,  ..., -0.0609,  0.0380, -0.0327]],
       device='cuda:0')

In [18]:
deepwalk_embedding_filepath = 'deepwalk_embeddings.pt'
torch.save(node_embeddings, deepwalk_embedding_filepath)