# ML Graph Embedding methods

In [1]:
#import libraries
import torch
import time
from torch_geometric.nn import Node2Vec

In [2]:
print(torch.version.cuda)

11.8


In [3]:
# Load the graph from the file
server_graph_data_filepath = './data/server_graph_data.pth'
graph_data = torch.load(server_graph_data_filepath)

  graph_data = torch.load(server_graph_data_filepath)


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(device)
graph_data = graph_data.to(device)

cuda


In [5]:
# Initialize the Node2Vec model
node2vec = Node2Vec(
    graph_data.edge_index,       # Edge list
    embedding_dim=3,      # Size of embeddings
    walk_length=20,        # Length of each random walk
    context_size=10,       # Window size for Skip-Gram
    walks_per_node=10,     # Number of walks per node
    num_negative_samples=1,  # Number of negative samples for Skip-Gram
    p=0.25,  # Return parameter: encourages staying close to the starting node
    q=4.0,   # In-out parameter: encourages exploring further away
    sparse=True            # Use sparse gradients for efficiency
).to(device)

# Define the optimizer
optimizer = torch.optim.SparseAdam(list(node2vec.parameters()), lr=0.01)

# Training loop
def train():
    node2vec.train()
    total_loss = 0
    loader = node2vec.loader(batch_size=8, shuffle=True)
    for pos_rw, neg_rw in loader:
        pos_rw = pos_rw.to(device)
        neg_rw = neg_rw.to(device)
        
        optimizer.zero_grad()
        loss = node2vec.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Run training for multiple epochs
for epoch in range(1, 101):
    # Save timestamp
    start = time.time()
    loss = train()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
    # Save timestamp
    end = time.time()
    
    print(end - start)

# Obtain the node embeddings
node_embeddings = node2vec.embedding.weight.data

print("Node Embeddings Shape:", node_embeddings.shape)


135.73618745803833
130.99125599861145
129.40948152542114
127.02247142791748
124.91007542610168
123.32488918304443
121.82983803749084
121.6709771156311
121.91338753700256
Epoch: 010, Loss: 0.7759
121.6263575553894
121.31929612159729
121.51817274093628
120.66063284873962
120.74512791633606
120.56543374061584
120.4960515499115
120.55703234672546
120.96491742134094
120.94094729423523
Epoch: 020, Loss: 0.7695
120.7212336063385
120.11761331558228
120.26626205444336
120.29437780380249
120.40866780281067
120.33241963386536
119.86544489860535
119.94821524620056
120.18908286094666
119.97679734230042
Epoch: 030, Loss: 0.7690
119.84493684768677
119.8982183933258
120.04901552200317
119.38574004173279
119.92442488670349
120.31553220748901
120.44609451293945
119.51584243774414
119.482337474823
119.26975631713867
Epoch: 040, Loss: 0.7683
119.60675239562988
119.69831418991089
119.157066822052
119.69274067878723
119.18375968933105
119.0699234008789
119.5644862651825
120.14952087402344
118.92179465293884

In [6]:
node_embeddings

tensor([[-0.1627,  0.0256,  0.1298],
        [-0.2320, -0.0875,  0.1060],
        [-0.2535,  0.3937,  0.0748],
        ...,
        [-0.1484, -0.1693, -0.0323],
        [ 1.7283, -0.0232, -0.8917],
        [-0.1192, -0.2461,  0.0336]], device='cuda:0')

In [7]:
node2vector_embedding_filepath = 'node2vector_embeddings.pt'
torch.save(node_embeddings, node2vector_embedding_filepath)

In [8]:
# Initialize the DeepWalk model by setting p and q to 1 (unbiased random walks)
deepwalk = Node2Vec(
    graph_data.edge_index,
    embedding_dim=64,
    walk_length=40, # Longer walk length for DeepWalk
    context_size=10,
    walks_per_node=10,
    sparse=True
).to(device)

# Define the optimizer
optimizer = torch.optim.SparseAdam(list(deepwalk.parameters()), lr=0.01)

# Training loop
def train():
    deepwalk.train()
    total_loss = 0
    loader = deepwalk.loader(batch_size=8, shuffle=True)
    for pos_rw, neg_rw in loader:
        pos_rw = pos_rw.to(device)
        neg_rw = neg_rw.to(device)
        
        optimizer.zero_grad()
        loss = deepwalk.loss(pos_rw, neg_rw)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Run training for multiple epochs
for epoch in range(1, 101):
    start = time.time()
    loss = train()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')
        
    # Save timestamp
    end = time.time()
    
    print(end - start)

# Obtain the node embeddings
node_embeddings_deepwalk = deepwalk.embedding.weight.data

print("Node Embeddings Shape:", node_embeddings_deepwalk.shape)

5.767935037612915
5.873707056045532
6.007341623306274
7.0716023445129395
7.07146430015564
7.120197057723999
7.030393123626709
7.1353559494018555
7.134816884994507
Epoch: 010, Loss: 0.7360
7.025185823440552
7.079774379730225
7.025813817977905
7.14076828956604
7.079728841781616
7.0712809562683105
7.111080646514893
7.058572769165039
7.111705303192139
7.0954365730285645
Epoch: 020, Loss: 0.7288
6.99012565612793
7.09937858581543
6.995908498764038
7.118033170700073
7.116052865982056
7.028337717056274
7.146016597747803
7.083376884460449
7.144520282745361
7.165513515472412
Epoch: 030, Loss: 0.7255
6.93779730796814
7.138479709625244
7.085843801498413
7.058143854141235
7.117651462554932
7.010565280914307
7.082873106002808
7.090676784515381
7.058588743209839
7.075692176818848
Epoch: 040, Loss: 0.7240
7.000749349594116
7.124554872512817
7.134103298187256
7.064037799835205
7.0668556690216064
6.922300100326538
7.085397243499756
7.281412124633789
7.504371166229248
7.128816843032837
Epoch: 050, Loss: 

In [9]:
node_embeddings_deepwalk

tensor([[-1.8289e-01,  2.0963e-01,  9.8698e-02,  ...,  1.5152e-02,
         -1.9687e-01,  6.6291e-02],
        [ 4.3452e-02, -2.3687e-01, -4.3718e-02,  ..., -6.0400e-02,
         -7.0542e-02,  3.7939e-02],
        [ 2.9276e-03, -1.3018e-01,  9.8565e-02,  ..., -4.3489e-02,
         -2.1407e-01,  2.0897e-01],
        ...,
        [-4.0324e-02,  1.0460e-01,  4.2056e-02,  ...,  1.1005e-01,
          6.1901e-02,  1.4247e-04],
        [ 4.0746e-01, -4.9000e-01,  7.5014e-01,  ...,  2.6305e-01,
         -1.2246e-02,  2.0184e-01],
        [ 1.7585e-01, -1.7938e-01,  1.2310e-01,  ...,  1.9934e-01,
         -4.7857e-03, -9.7514e-02]], device='cuda:0')

In [10]:
deepwalk_embedding_filepath = 'deepwalk_embeddings.pt'
torch.save(node_embeddings_deepwalk, deepwalk_embedding_filepath)