# Method Comparison

This notebook compares the 4 embedding methods available in LSME.

## Methods
1. **Stochastic**: Permutation-averaged signature matrices + encoder
2. **Deterministic**: Edge probability vectors
3. **Random Walk**: Transition probabilities from random walks
4. **Eigenvalue**: Spectral properties of transition matrices

In [None]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
import time

from lsme import LSME

## 1. Create Test Graph

We'll use a barbell graph which has clear structural roles.

In [None]:
# Create barbell graph: two cliques connected by a path
G = nx.barbell_graph(10, 10)
print(f"Nodes: {G.number_of_nodes()}")
print(f"Edges: {G.number_of_edges()}")

# Assign structural roles
roles = []
for n in G.nodes():
    if n < 10:
        roles.append('clique_1')
    elif n >= 20:
        roles.append('clique_2')
    else:
        roles.append('bridge')
        
# Store roles as node attributes
for n, role in zip(G.nodes(), roles):
    G.nodes[n]['role'] = role

In [None]:
# Visualize the graph
plt.figure(figsize=(12, 6))

color_map = {'clique_1': '#1f77b4', 'bridge': '#2ca02c', 'clique_2': '#d62728'}
colors = [color_map[G.nodes[n]['role']] for n in G.nodes()]

pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, node_color=colors, with_labels=True, node_size=300)
plt.title("Barbell Graph\nBlue=Clique1, Green=Bridge, Red=Clique2")
plt.show()

## 2. Run All Methods

In [None]:
methods = ['stochastic', 'deterministic', 'random_walk', 'eigenvalue']
results = {}
times = {}

for method in methods:
    print(f"\nRunning {method}...")
    start = time.time()
    
    if method == 'stochastic':
        lsme = LSME(method=method, max_hops=3, n_samples=100, 
                    embedding_dim=32, encoder_epochs=50, verbose=False, random_state=42)
    elif method == 'random_walk':
        lsme = LSME(method=method, max_hops=3, rw_length=10, 
                    sample_size=100, verbose=False, random_state=42)
    else:
        lsme = LSME(method=method, max_hops=3, verbose=False)
    
    results[method] = lsme.fit_transform(G)
    times[method] = time.time() - start
    
    emb_dim = results[method]['embeddings'][0].shape[0]
    print(f"  Embedding dim: {emb_dim}")
    print(f"  Time: {times[method]:.2f}s")

## 3. Visual Comparison: 2D Projections

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

nodes = sorted(G.nodes())
colors = [color_map[G.nodes[n]['role']] for n in nodes]

for idx, method in enumerate(methods):
    ax = axes[idx // 2, idx % 2]
    
    # Get embeddings
    X = np.array([results[method]['embeddings'][n] for n in nodes])
    
    # PCA projection
    pca = PCA(n_components=2)
    X_2d = pca.fit_transform(X)
    
    # Plot
    ax.scatter(X_2d[:, 0], X_2d[:, 1], c=colors, s=100, alpha=0.7)
    ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%})')
    ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%})')
    ax.set_title(f'{method.title()} Method\nDim: {X.shape[1]}, Time: {times[method]:.2f}s')

plt.suptitle('2D Projections of Embeddings by Method', fontsize=14)
plt.tight_layout()
plt.show()

## 4. Quantitative Comparison: Silhouette Scores

In [None]:
# Compute silhouette scores
y = np.array([G.nodes[n]['role'] for n in nodes])

scores = {}
for method in methods:
    X = np.array([results[method]['embeddings'][n] for n in nodes])
    scores[method] = silhouette_score(X, y)
    print(f"{method:15s}: silhouette = {scores[method]:.4f}")

In [None]:
# Bar chart comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Silhouette scores
ax1.bar(methods, [scores[m] for m in methods], color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
ax1.set_ylabel('Silhouette Score')
ax1.set_title('Clustering Quality (higher is better)')
ax1.set_ylim(0, 1)

# Computation time
ax2.bar(methods, [times[m] for m in methods], color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])
ax2.set_ylabel('Time (seconds)')
ax2.set_title('Computation Time (lower is better)')

plt.tight_layout()
plt.show()

## 5. Summary Table

In [None]:
import pandas as pd

summary = pd.DataFrame({
    'Method': methods,
    'Embedding Dim': [results[m]['embeddings'][0].shape[0] for m in methods],
    'Silhouette Score': [f"{scores[m]:.4f}" for m in methods],
    'Time (s)': [f"{times[m]:.2f}" for m in methods]
})

print(summary.to_string(index=False))

## Recommendations

- **Stochastic**: Best quality, use when accuracy matters most
- **Deterministic**: Fast and reliable, good for quick analysis
- **Random Walk**: Good balance of speed and stochastic behavior
- **Eigenvalue**: Fastest, use for spectral properties