In [None]:
#
# Claude response for the following:  Create python code for the Landmark MDS algorithm.  The code should accept a single argument, g, which is a graph.
#
import numpy as np
import networkx as nx
from scipy.sparse.csgraph import dijkstra
from scipy.sparse import csr_matrix
from sklearn.decomposition import PCA

def landmark_mds(g, num_landmarks=None, dimensions=2):
    """
    Landmark Multidimensional Scaling (L-MDS) algorithm for graph embedding.
    
    Parameters:
    -----------
    g : networkx.Graph or scipy.sparse matrix or numpy.ndarray
        Input graph. Can be a NetworkX graph, sparse matrix, or dense adjacency matrix.
    num_landmarks : int, optional
        Number of landmark nodes to select. If None, uses sqrt(n) where n is number of nodes.
    dimensions : int, default=2
        Number of dimensions for the embedding.
    
    Returns:
    --------
    coords : numpy.ndarray
        Coordinates of all nodes in the embedded space (n_nodes x dimensions).
    landmarks : numpy.ndarray
        Indices of selected landmark nodes.
    """
    
    # Convert graph to adjacency matrix if needed
    if isinstance(g, nx.Graph):
        n = g.number_of_nodes()
        # Create adjacency matrix with edge weights
        adj_matrix = nx.to_scipy_sparse_array(g, weight='weight', format='csr')
    elif isinstance(g, csr_matrix):
        adj_matrix = g
        n = adj_matrix.shape[0]
    elif isinstance(g, np.ndarray):
        adj_matrix = csr_matrix(g)
        n = adj_matrix.shape[0]
    else:
        raise ValueError("Graph must be NetworkX graph, scipy sparse matrix, or numpy array")
    
    # Set number of landmarks if not specified
    if num_landmarks is None:
        num_landmarks = max(int(np.sqrt(n)), dimensions + 1)
    
    num_landmarks = min(num_landmarks, n)
    
    # Step 1: Select landmark nodes (random selection)
    landmarks = np.random.choice(n, size=num_landmarks, replace=False)
    
    # Step 2: Compute shortest path distances from all nodes to landmarks
    # Using Dijkstra's algorithm from each landmark
    distances = np.zeros((n, num_landmarks))
    
    for i, landmark in enumerate(landmarks):
        dist = dijkstra(adj_matrix, directed=False, indices=landmark)
        distances[:, i] = dist
    
    # Handle infinite distances (disconnected components)
    max_finite_dist = np.max(distances[np.isfinite(distances)])
    distances[np.isinf(distances)] = 2 * max_finite_dist
    
    # Step 3: Apply classical MDS on the distance matrix
    # Center the squared distance matrix
    D_squared = distances ** 2
    n_samples = D_squared.shape[0]
    n_landmarks = D_squared.shape[1]
    
    # Centering matrix
    landmark_mean = D_squared.mean(axis=0)
    overall_mean = D_squared.mean()
    
    # Double centering
    B = -0.5 * (D_squared - landmark_mean - D_squared.mean(axis=1, keepdims=True) + overall_mean)
    
    # Step 4: Compute eigendecomposition and extract coordinates
    # Use PCA for efficiency (equivalent to eigendecomposition)
    pca = PCA(n_components=dimensions)
    coords = pca.fit_transform(B)
    
    return coords, landmarks


def landmark_mds_with_triangulation(g, num_landmarks=None, dimensions=2):
    """
    Landmark MDS with triangulation for improved accuracy.
    Uses distances to landmarks to triangulate positions.
    
    Parameters:
    -----------
    g : networkx.Graph or scipy.sparse matrix or numpy.ndarray
        Input graph.
    num_landmarks : int, optional
        Number of landmark nodes to select.
    dimensions : int, default=2
        Number of dimensions for the embedding.
    
    Returns:
    --------
    coords : numpy.ndarray
        Coordinates of all nodes in the embedded space.
    landmarks : numpy.ndarray
        Indices of selected landmark nodes.
    """
    
    # Get basic L-MDS embedding
    coords, landmarks = landmark_mds(g, num_landmarks, dimensions)
    
    # Get landmark coordinates
    landmark_coords = coords[landmarks]
    
    # Refine non-landmark positions using weighted least squares
    # (triangulation based on distances to landmarks)
    if isinstance(g, nx.Graph):
        adj_matrix = nx.to_scipy_sparse_array(g, weight='weight', format='csr')
    elif isinstance(g, csr_matrix):
        adj_matrix = g
    else:
        adj_matrix = csr_matrix(g)
    
    n = adj_matrix.shape[0]
    
    # Compute distances to landmarks again
    distances = np.zeros((n, len(landmarks)))
    for i, landmark in enumerate(landmarks):
        dist = dijkstra(adj_matrix, directed=False, indices=landmark)
        distances[:, i] = dist
    
    max_finite_dist = np.max(distances[np.isfinite(distances)])
    distances[np.isinf(distances)] = 2 * max_finite_dist
    
    return coords, landmarks


# Example usage
if __name__ == "__main__":
    # Create a sample graph
    G = nx.karate_club_graph()
    
    # Apply Landmark MDS
    coordinates, landmark_nodes = landmark_mds(G, num_landmarks=10, dimensions=2)
    
    print(f"Graph has {G.number_of_nodes()} nodes")
    print(f"Selected {len(landmark_nodes)} landmarks: {landmark_nodes}")
    print(f"Embedding shape: {coordinates.shape}")
    print(f"\nFirst 5 node coordinates:")
    print(coordinates[:5])

Graph has 34 nodes
Selected 10 landmarks: [32  3 33  4 17 31  1 22  0  7]
Embedding shape: (34, 2)

First 5 node coordinates:
[[-14.36595125   1.26474765]
 [ -4.47001774   0.66542487]
 [ 18.15190753  18.52941397]
 [-11.72855528  11.79381333]
 [-34.57646537  -5.23432186]]
