In [None]:
%load_ext autoreload
%autoreload 2   

In [None]:
import torch
import torch.nn as nn
from torch_geometric.datasets import KarateClub

# Load the Zachary Karate Club dataset
dataset = KarateClub()
data = dataset[0]  # Single graph
num_nodes = data.num_nodes  # 34 nodes
edge_index = data.edge_index  # Edge list

# Construct the similarity matrix S (adjacency matrix)
S = torch.zeros((num_nodes, num_nodes))
S[edge_index[0], edge_index[1]] = 1  # Undirected graph
S = S + torch.eye(num_nodes)  # Add self-loops (optional)


# Define the model with embedding lookup
class EmbeddingFactorization(nn.Module):
    def __init__(self, num_nodes, embedding_dim):
        super(EmbeddingFactorization, self).__init__()
        # Embedding layer: each node gets a unique embedding
        self.embedding = nn.Embedding(num_nodes, embedding_dim)

    def forward(self, node_ids):
        # Lookup embeddings for all nodes
        Z = self.embedding(node_ids)
        return Z


# Set up model and optimizer
embedding_dim = 16  # Embedding dimension
node_ids = torch.arange(num_nodes)  # Node indices [0, 1, ..., 33]
model = EmbeddingFactorization(num_nodes=num_nodes, embedding_dim=embedding_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


# Training function
def train():
    model.train()
    optimizer.zero_grad()

    # Get embeddings
    Z = model(node_ids)

    # Reconstructed similarity: Z Z^T
    S_hat = torch.matmul(Z, Z.t())

    # L2 (Frobenius) loss: ||Z Z^T - S||^2
    loss = torch.norm(S_hat - S, p="fro") ** 2

    # Backprop
    loss.backward()
    optimizer.step()
    return loss.item()


# Train the model
for epoch in range(200):
    loss = train()
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Get final embeddings
with torch.no_grad():
    Z = model(node_ids)
    print("Final embeddings shape:", Z.shape)
    print("Sample embeddings:\n", Z[:5])  # First 5 nodes

# Evaluate reconstruction error
S_hat = torch.matmul(Z, Z.t())
reconstruction_error = torch.norm(S_hat - S, p="fro").item()
print(f"Final reconstruction error: {reconstruction_error:.4f}")

In [None]:
import torch
import torch.nn as nn
from torch_geometric.datasets import KarateClub

# Load the Zachary Karate Club dataset
dataset = KarateClub()
data = dataset[0]
num_nodes = data.num_nodes  # 34 nodes
edge_index = data.edge_index  # Edge list

# Construct adjacency matrix (without self-loops for neighborhood computation)
A = torch.zeros((num_nodes, num_nodes))
A[edge_index[0], edge_index[1]] = 1  # Undirected graph


# Compute Jaccard similarity matrix
def compute_jaccard_similarity(A):
    S = torch.zeros((num_nodes, num_nodes))
    for i in range(num_nodes):
        for j in range(i, num_nodes):  # Symmetric matrix, compute upper triangle
            neighbors_i = set(A[i].nonzero(as_tuple=True)[0].tolist())
            neighbors_j = set(A[j].nonzero(as_tuple=True)[0].tolist())
            intersection = len(neighbors_i & neighbors_j)
            union = len(neighbors_i | neighbors_j)
            if union > 0:  # Avoid division by zero
                sim = intersection / union
                S[i, j] = sim
                S[j, i] = sim  # Symmetry
    # No self-loops in Jaccard (typically similarity to self is not 1 unless defined)
    return S


S = compute_jaccard_similarity(A)
print("Jaccard similarity matrix sample:\n", S[:5, :5])


# Define the model with embedding lookup
class EmbeddingFactorization(nn.Module):
    def __init__(self, num_nodes, embedding_dim):
        super(EmbeddingFactorization, self).__init__()
        self.embedding = nn.Embedding(num_nodes, embedding_dim)

    def forward(self, node_ids):
        Z = self.embedding(node_ids)
        return Z


# Set up model and optimizer
embedding_dim = 16
node_ids = torch.arange(num_nodes)
model = EmbeddingFactorization(num_nodes=num_nodes, embedding_dim=embedding_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


# Training function
def train():
    model.train()
    optimizer.zero_grad()

    # Get embeddings
    Z = model(node_ids)

    # Reconstructed similarity: Z Z^T
    S_hat = torch.matmul(Z, Z.t())

    # L2 (Frobenius) loss: ||Z Z^T - S||^2
    loss = torch.norm(S_hat - S, p="fro") ** 2

    # Backprop
    loss.backward()
    optimizer.step()
    return loss.item()


# Train the model
for epoch in range(200):
    loss = train()
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Get final embeddings
with torch.no_grad():
    Z = model(node_ids)
    print("Final embeddings shape:", Z.shape)
    print("Sample embeddings:\n", Z[:5])

# Evaluate reconstruction error
S_hat = torch.matmul(Z, Z.t())
reconstruction_error = torch.norm(S_hat - S, p="fro").item()
print(f"Final reconstruction error: {reconstruction_error:.4f}")