graspologic-org · daxpryce · Aug 11, 2021 · Aug 10, 2021 · Aug 10, 2021 · Aug 10, 2021
diff --git a/docs/reference/reference/pipeline.rst b/docs/reference/reference/pipeline.rst
@@ -10,5 +10,5 @@ Embed
 -----
 .. automodule:: graspologic.pipeline.embed
 .. autoclass:: graspologic.pipeline.embed.embeddings.Embeddings
-.. autofunction:: graspologic.pipeline.embed.adjacency_spectral_embedding.adjacency_spectral_embedding
-
+.. autofunction:: graspologic.pipeline.embed.adjacency_spectral_embedding
+.. autofunction:: graspologic.pipeline.embed.laplacian_spectral_embedding
diff --git a/graspologic/embed/lse.py b/graspologic/embed/lse.py
@@ -17,8 +17,8 @@ class LaplacianSpectralEmbed(BaseSpectralEmbed):
 
     The laplacian spectral embedding (LSE) is a k-dimensional Euclidean representation
     of the graph based on its Laplacian matrix. It relies on an SVD to reduce
-    the dimensionality to the specified k, or if k is unspecified, can find a number
-    of dimensions automatically.
+    the dimensionality to the specified ``n_components``, or if ``n_components`` is
+    unspecified, can find a number of dimensions automatically.
 
     Parameters
     ----------
@@ -121,7 +121,7 @@ def __init__(
         self,
         form: str = "DAD",
         n_components: Optional[int] = None,
-        n_elbows: int = 2,
+        n_elbows: Optional[int] = 2,
         algorithm: str = "randomized",
         n_iter: int = 5,
         check_lcc: bool = True,

diff --git a/graspologic/pipeline/embed/__init__.py b/graspologic/pipeline/embed/__init__.py
@@ -12,3 +12,4 @@
 
 from .adjacency_spectral_embedding import adjacency_spectral_embedding
 from .embeddings import Embeddings, EmbeddingsView
+from .laplacian_spectral_embedding import laplacian_spectral_embedding
diff --git a/graspologic/pipeline/embed/adjacency_spectral_embedding.py b/graspologic/pipeline/embed/adjacency_spectral_embedding.py
@@ -9,13 +9,13 @@
 from beartype import beartype
 
 from graspologic.embed import AdjacencySpectralEmbed
-from graspologic.preconditions import (
-    check_argument,
-    check_argument_types,
-    check_optional_argument_types,
-    is_real_weighted,
+from graspologic.preconditions import check_argument, is_real_weighted
+from graspologic.utils import (
+    augment_diagonal,
+    is_fully_connected,
+    pass_to_ranks,
+    remove_loops,
 )
-from graspologic.utils import is_fully_connected, pass_to_ranks
 
 from . import __SVD_SOLVER_TYPES  # from the module init
 from ._elbow import _index_of_elbow
@@ -53,7 +53,7 @@ def adjacency_spectral_embedding(
 
     Parameters
     ----------
-    graph : Union[nx.Graph, nx.DiGraph, nx.OrderedGraph, nx.OrderedDiGraph]
+    graph : Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph]
         An undirected or directed graph. The graph **must**:
 
         - be fully numerically weighted (every edge must have a real, numeric weight
@@ -103,6 +103,11 @@ def adjacency_spectral_embedding(
     -------
     Embeddings
 
+    Raises
+    ------
+    beartype.roar.BeartypeCallHintPepParamException if parameters do not match type hints
+    ValueError if values are not within appropriate ranges or allowed values
+
     See Also
     --------
     graspologic.pipeline.embed.Embeddings
@@ -171,14 +176,20 @@ def adjacency_spectral_embedding(
         # not all of the weights are real numbers, if they exist at all
         # this weight=1.0 treatment actually happens in nx.to_scipy_sparse_matrix()
 
-    graph_as_csr = nx.to_scipy_sparse_matrix(graph, weight=weight_attribute)
+    node_labels = np.array(list(graph.nodes()))
+
+    graph_as_csr = nx.to_scipy_sparse_matrix(
+        graph, weight=weight_attribute, nodelist=node_labels
+    )
 
     if not is_fully_connected(graph):
         warnings.warn("More than one connected component detected")
 
-    node_labels = np.array(list(graph.nodes()))
+    graph_sans_loops = remove_loops(graph_as_csr)
+
+    ranked_graph = pass_to_ranks(graph_sans_loops)
 
-    graph_as_csr = pass_to_ranks(graph_as_csr)
+    augmented_graph = augment_diagonal(ranked_graph)
 
     embedder = AdjacencySpectralEmbed(
         n_components=dimensions,
@@ -187,9 +198,9 @@ def adjacency_spectral_embedding(
         n_iter=svd_solver_iterations,
         svd_seed=svd_seed,
         concat=False,
-        diag_aug=True,
+        diag_aug=False,
     )
-    results = embedder.fit_transform(graph_as_csr)
+    results = embedder.fit_transform(augmented_graph)
 
     if elbow_cut is None:
         if graph.is_directed():

diff --git a/graspologic/pipeline/embed/embeddings.py b/graspologic/pipeline/embed/embeddings.py
@@ -49,18 +49,14 @@ def __init__(self, labels: np.ndarray, embeddings: np.ndarray):
             The node labels that are positionally correlated with the embeddings.
             The dtype of labels is any object stored in a networkx Graph object,
             though type uniformity will be required
-        embeddings
+        embeddings : np.ndarray
+            The embedded values generated by the embedding technique.
+
+        Raises
+        ------
+        beartype.roar.BeartypeCallHintPepParamException if the types are invalid
+        ValueError if the row count of labels does not equal the row count of embeddings
         """
-        if labels is None:
-            raise ValueError("labels cannot be None")
-        if embeddings is None:
-            raise ValueError("embeddings cannot be None")
-        if not isinstance(labels, np.ndarray):
-            raise TypeError(f"labels must be numpy.ndarray, got: {type(labels)}")
-        if not isinstance(embeddings, np.ndarray):
-            raise TypeError(
-                f"embeddings must be numpy.ndarray, got: {type(embeddings)}"
-            )
         if labels.shape[0] != embeddings.shape[0]:
             raise ValueError(
                 f"labels and embeddings must have the same number of "

diff --git a/graspologic/pipeline/embed/laplacian_spectral_embedding.py b/graspologic/pipeline/embed/laplacian_spectral_embedding.py
@@ -0,0 +1,230 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import numbers
+import warnings
+from typing import Optional, Union
+
+import networkx as nx
+import numpy as np
+from beartype import beartype
+
+from graspologic.embed import LaplacianSpectralEmbed
+from graspologic.preconditions import check_argument, is_real_weighted
+from graspologic.utils import is_fully_connected, pass_to_ranks, remove_loops
+
+from . import __SVD_SOLVER_TYPES  # from the module init
+from ._elbow import _index_of_elbow
+from .embeddings import Embeddings
+
+__FORMS = ["DAD", "I-DAD", "R-DAD"]
+
+
+@beartype
+def laplacian_spectral_embedding(
+    graph: Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph],
+    form: str = "R-DAD",
+    dimensions: int = 100,
+    elbow_cut: Optional[int] = None,
+    svd_solver_algorithm: str = "randomized",
+    svd_solver_iterations: int = 5,
+    svd_seed: Optional[int] = None,
+    weight_attribute: str = "weight",
+    regularizer: Optional[numbers.Real] = None,
+) -> Embeddings:
+    """
+    Given a directed or undirected networkx graph (*not* multigraph), generate an
+    Embeddings object.
+
+    The laplacian spectral embedding process is similar to the adjacency spectral
+    embedding process, with the key differentiator being that the LSE process looks
+    further into the latent space when it captures changes, whereas the ASE process
+    is egocentric and focused on immediate differentiators in a node's periphery.
+
+    All weights will be rescaled based on their relative rank in the graph,
+    which is beneficial in minimizing anomalous results if some edge weights are
+    extremely atypical of the rest of the graph.
+
+    Parameters
+    ----------
+    graph : Union[nx.Graph, nx.OrderedGraph, nx.DiGraph, nx.OrderedDiGraph]
+        An undirected or directed graph. The graph **must**:
+
+        - be fully numerically weighted (every edge must have a real, numeric weight
+          or else it will be treated as an unweighted graph)
+        - be a basic graph (meaning it should not be a multigraph; if you have a
+          multigraph you must first decide how you want to handle the weights of the
+          edges between two nodes, whether summed, averaged, last-wins,
+          maximum-weight-only, etc)
+    form : str (default="R-DAD")
+        Specifies the type of Laplacian normalization to use. Allowed values are:
+        { "DAD", "I-DAD", "R-DAD" }
+    dimensions : int (default=100)
+        Dimensions to use for the svd solver.
+        For undirected graphs, if ``elbow_cut==None``, you will receive an embedding
+        that has ``nodes`` rows and ``dimensions`` columns.
+        For directed graphs, if ``elbow_cut==None``, you will receive an embedding that
+        has ``nodes`` rows and ``2*dimensions`` columns.
+        If ``elbow_cut`` is specified to be not ``None``, we will cut the embedding at
+        ``elbow_cut`` elbow, but the provided ``dimensions`` will be used in the
+        creation of the SVD.
+    elbow_cut : Optional[int] (default=None)
+        Using a process described by Zhu & Ghodsi in their paper "Automatic
+        dimensionality selection from the scree plot via the use of profile likelihood",
+        truncate the dimensionality of the return on the ``elbow_cut``-th elbow.
+        By default this value is ``None`` but can be used to reduce the dimensionality
+        of the returned tensors.
+    svd_solver_algorithm : str (default="randomized")
+        allowed values: {'randomized', 'full', 'truncated'}
+
+        SVD solver to use:
+
+            - 'randomized'
+                Computes randomized svd using
+                :func:`sklearn.utils.extmath.randomized_svd`
+            - 'full'
+                Computes full svd using :func:`scipy.linalg.svd`
+                Does not support ``graph`` input of type scipy.sparse.csr_matrix
+            - 'truncated'
+                Computes truncated svd using :func:`scipy.sparse.linalg.svds`
+    svd_solver_iterations : int (default=5)
+        Number of iterations for randomized SVD solver. Not used by 'full' or
+        'truncated'. The default is larger than the default in randomized_svd
+        to handle sparse matrices that may have large slowly decaying spectrum.
+    svd_seed : Optional[int] (default=None)
+        Used to seed the PRNG used in the ``randomized`` svd solver algorithm.
+    weight_attribute : str (default="weight")
+        The edge dictionary key that contains the weight of the edge.
+    regularizer : Optional[numbers.Real] (default=None)
+        Only used when form="R-DAD". Must be None or nonnegative.
+        Constant to be added to the diagonal of degree matrix. If None, average
+        node degree is added. If int or float, must be >= 0.
+
+    Returns
+    -------
+    Embeddings
+
+    Raises
+    ------
+    beartype.roar.BeartypeCallHintPepParamException if parameters do not match type hints
+    ValueError if values are not within appropriate ranges or allowed values
+
+    See Also
+    --------
+    graspologic.pipeline.embed.Embeddings
+    graspologic.embed.LaplacianSpectralEmbed
+    graspologic.embed.select_svd
+    graspologic.utils.to_laplacian
+
+    Notes
+    -----
+    The singular value decomposition:
+
+    .. math:: A = U \Sigma V^T
+
+    is used to find an orthonormal basis for a matrix, which in our case is the
+    Laplacian matrix of the graph. These basis vectors (in the matrices U or V) are
+    ordered according to the amount of variance they explain in the original matrix.
+    By selecting a subset of these basis vectors (through our choice of dimensionality
+    reduction) we can find a lower dimensional space in which to represent the graph.
+
+    References
+    ----------
+    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
+       Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
+       Journal of the American Statistical Association, Vol. 107(499), 2012.
+
+    .. [2] Von Luxburg, Ulrike. "A tutorial on spectral clustering," Statistics
+        and computing, Vol. 17(4), pp. 395-416, 2007.
+
+    .. [3] Rohe, Karl, Sourav Chatterjee, and Bin Yu. "Spectral clustering and
+        the high-dimensional stochastic blockmodel," The Annals of Statistics,
+        Vol. 39(4), pp. 1878-1915, 2011.
+
+    .. [4] Zhu, M. and Ghodsi, A. (2006). Automatic dimensionality selection from the
+        scree plot via the use of profile likelihood. Computational Statistics & Data
+        Analysis, 51(2), pp.918-930.
+
+    """
+    check_argument(
+        form in __FORMS, f"form must be one of the values in {','.join(__FORMS)}"
+    )
+
+    check_argument(dimensions >= 1, "dimensions must be positive")
+
+    check_argument(elbow_cut is None or elbow_cut >= 1, "elbow_cut must be positive")
+
+    check_argument(
+        svd_solver_algorithm in __SVD_SOLVER_TYPES,
+        f"svd_solver_algorithm must be one of the values in {','.join(__SVD_SOLVER_TYPES)}",
+    )
+
+    check_argument(svd_solver_iterations >= 1, "svd_solver_iterations must be positive")
+
+    check_argument(
+        svd_seed is None or 0 <= svd_seed <= 2 ** 32 - 1,
+        "svd_seed must be a nonnegative, 32-bit integer",
+    )
+
+    check_argument(
+        regularizer is None or regularizer >= 0, "regularizer must be nonnegative"
+    )
+
+    check_argument(
+        not graph.is_multigraph(),
+        "Multigraphs are not supported; you must determine how to represent at most "
+        "one edge between any two nodes, and handle the corresponding weights "
+        "accordingly",
+    )
+
+    if not is_real_weighted(graph, weight_attribute=weight_attribute):
+        warnings.warn(
+            f"Graphs with edges that do not have a real numeric weight set for every "
+            f"{weight_attribute} attribute on every edge are treated as an unweighted "
+            f"graph - which presumes all weights are `1.0`. If this is incorrect, "
+            f"please add a '{weight_attribute}' attribute to every edge with a real, "
+            f"numeric value (e.g. an integer or a float) and call this function again."
+        )
+        weight_attribute = None  # this supercedes what the user said, because
+        # not all of the weights are real numbers, if they exist at all
+        # this weight=1.0 treatment actually happens in nx.to_scipy_sparse_matrix()
+
+    node_labels = np.array(list(graph.nodes()))
+
+    graph_as_csr = nx.to_scipy_sparse_matrix(
+        graph, weight=weight_attribute, nodelist=node_labels
+    )
+
+    if not is_fully_connected(graph):
+        warnings.warn("More than one connected component detected")
+
+    graph_sans_loops = remove_loops(graph_as_csr)
+
+    ranked_graph = pass_to_ranks(graph_sans_loops)
+
+    embedder = LaplacianSpectralEmbed(
+        form=form,
+        n_components=dimensions,
+        n_elbows=None,  # in the short term, we do our own elbow finding
+        algorithm=svd_solver_algorithm,
+        n_iter=svd_solver_iterations,
+        svd_seed=svd_seed,
+        concat=False,
+    )
+    results = embedder.fit_transform(ranked_graph)
+
+    if elbow_cut is None:
+        if graph.is_directed():
+            results = np.concatenate(results, axis=1)
+    else:
+        column_index = _index_of_elbow(embedder.singular_values_, elbow_cut)
+        if graph.is_directed():
+            left, right = results
+            left = left[:, :column_index]
+            right = right[:, :column_index]
+            results = np.concatenate((left, right), axis=1)
+        else:
+            results = results[:, :column_index]
+
+    embeddings = Embeddings(node_labels, results)
+    return embeddings
diff --git a/tests/pipeline/embed/test_embeddings.py b/tests/pipeline/embed/test_embeddings.py
@@ -4,6 +4,7 @@
 import unittest
 
 import numpy as np
+from beartype.roar import BeartypeCallHintPepParamException
 
 from graspologic.pipeline.embed import Embeddings
 
@@ -43,3 +44,13 @@ def test_view(self):
         self.assertSetEqual(set(view.keys()), set(expected.keys()))
         for key in expected:
             np.testing.assert_array_equal(expected[key], view[key])
+
+    def test_argument_types(self):
+        with self.assertRaises(BeartypeCallHintPepParamException):
+            Embeddings(None, None)
+        with self.assertRaises(BeartypeCallHintPepParamException):
+            Embeddings(np.array(["hello"]), None)
+        with self.assertRaises(BeartypeCallHintPepParamException):
+            Embeddings(["hello"], [1.0])
+        with self.assertRaises(ValueError):
+            Embeddings(np.array(["hello"]), np.array([[1.1, 1.2], [2.1, 2.2]]))