graspologic-org · daxpryce · Sep 16, 2021 · Sep 15, 2021 · Sep 16, 2021 · Sep 16, 2021
diff --git a/graspologic/embed/lse.py b/graspologic/embed/lse.py
@@ -23,7 +23,8 @@ class LaplacianSpectralEmbed(BaseSpectralEmbed):
     Parameters
     ----------
     form : {'DAD' (default), 'I-DAD', 'R-DAD'}, optional
-        Specifies the type of Laplacian normalization to use.
+        Specifies the type of Laplacian normalization to use. See
+        :func:`~graspologic.utils.to_laplacian` for more details regarding form.
 
     n_components : int or None, default = None
         Desired dimensionality of output data. If "full",

diff --git a/graspologic/embed/n2v.py b/graspologic/embed/n2v.py
@@ -15,62 +15,69 @@
 def node2vec_embed(
     graph: Union[nx.Graph, nx.DiGraph],
     num_walks: int = 10,
-    walk_length: int = 80,
+    walk_length: int = 40,
     return_hyperparameter: float = 1.0,
     inout_hyperparameter: float = 1.0,
     dimensions: int = 128,
-    window_size: int = 10,
+    window_size: int = 2,
     workers: int = 8,
-    iterations: int = 1,
+    iterations: int = 3,
     interpolate_walk_lengths_by_node_degree: bool = True,
     random_seed: Optional[int] = None,
 ) -> Tuple[np.array, List[Any]]:
     """
-    Generates a node2vec embedding from a given graph. Will follow the word2vec algorithm to create the embedding.
+    Generates a node2vec embedding from a given graph. Will follow the word2vec
+    algorithm to create the embedding.
 
     Parameters
     ----------
 
     graph: Union[nx.Graph, nx.DiGraph]
-        A networkx graph or digraph.  A multigraph should be turned into a non-multigraph so that the calling user
-        properly handles the multi-edges (i.e. aggregate weights or take last edge weight).
-        If the graph is unweighted, the weight of each edge will default to 1.
+        A networkx graph or digraph.  A multigraph should be turned into a
+        non-multigraph so that the calling user properly handles the multi-edges
+        (i.e. aggregate weights or take last edge weight). If the graph is unweighted,
+        the weight of each edge will default to 1.
     num_walks : int
         Number of walks per source. Default is 10.
     walk_length: int
-        Length of walk per source. Default is 80.
+        Length of walk per source. Default is 40.
     return_hyperparameter : float
         Return hyperparameter (p). Default is 1.0
     inout_hyperparameter : float
         Inout hyperparameter (q). Default is 1.0
     dimensions : int
         Dimensionality of the word vectors. Default is 128.
     window_size : int
-        Maximum distance between the current and predicted word within a sentence. Default is 10.
+        Maximum distance between the current and predicted word within a sentence.
+        Default is 2.
     workers : int
         Use these many worker threads to train the model. Default is 8.
     iterations : int
-        Number of epochs in stochastic gradient descent (SGD)
+        Number of epochs in stochastic gradient descent (SGD). Default is 3.
     interpolate_walk_lengths_by_node_degree : bool
         Use a dynamic walk length that corresponds to each nodes
-        degree. If the node is in the bottom 20 percentile, default to a walk length of 1. If it is in the top 10
-        percentile, use ``walk_length``. If it is in the 20-80 percentiles, linearly interpolate between 1 and ``walk_length``.
-        This will reduce lower degree nodes from biasing your resulting embedding. If a low degree node has the same
-        number of walks as a high degree node (which it will if this setting is not on), then the lower degree nodes
-        will take a smaller breadth of random walks when compared to the high degree nodes. This will result in your
-        lower degree walks dominating your higher degree nodes.
+        degree. If the node is in the bottom 20 percentile, default to a walk length of
+        1. If it is in the top 10 percentile, use ``walk_length``. If it is in the
+        20-80 percentiles, linearly interpolate between 1 and ``walk_length``.
+        This will reduce lower degree nodes from biasing your resulting embedding. If a
+        low degree node has the same number of walks as a high degree node (which it
+        will if this setting is not on), then the lower degree nodes will take a
+        smaller breadth of random walks when compared to the high degree nodes. This
+        will result in your lower degree walks dominating your higher degree nodes.
     random_seed : int
-        Seed to be used for reproducible results. Default is None and will produce a random output. Note that for a fully
-        deterministically-reproducible run, you must also limit to a single worker thread (`workers=1`), to eliminate
-        ordering jitter from OS thread scheduling. In addition the environment variable ``PYTHONHASHSEED`` must be set
-        to control hash randomization.
+        Seed to be used for reproducible results. Default is None and will produce a
+        random output. Note that for a fully deterministically-reproducible run, you
+        must also limit to a single worker thread (`workers=1`), to eliminate ordering
+        jitter from OS thread scheduling. In addition the environment variable
+        ``PYTHONHASHSEED`` must be set to control hash randomization.
 
     Returns
     -------
     Tuple[np.array, List[Any]]
-        A tuple containing a matrix, with each row index corresponding to the embedding for each node. The tuple
-        also contains a vector containing the corresponding vertex labels for each row in the matrix.
-        The matrix and vector are positionally correlated.
+        A tuple containing a matrix, with each row index corresponding to the embedding
+        for each node. The tuple also contains a vector containing the corresponding
+        vertex labels for each row in the matrix. The matrix and vector are
+        positionally correlated.
 
     Notes
     -----
@@ -82,8 +89,8 @@ def node2vec_embed(
 
     References
     ----------
-    .. [1] Aditya Grover and Jure Leskovec  "node2vec: Scalable Feature Learning for Networks."
-        Knowledge Discovery and Data Mining, 2016.
+    .. [1] Aditya Grover and Jure Leskovec  "node2vec: Scalable Feature Learning for
+        Networks." Knowledge Discovery and Data Mining, 2016.
     """
 
     _preconditions(

diff --git a/graspologic/pipeline/embed/laplacian_spectral_embedding.py b/graspologic/pipeline/embed/laplacian_spectral_embedding.py
@@ -58,7 +58,8 @@ def laplacian_spectral_embedding(
           maximum-weight-only, etc)
     form : str (default="R-DAD")
         Specifies the type of Laplacian normalization to use. Allowed values are:
-        { "DAD", "I-DAD", "R-DAD" }
+        { "DAD", "I-DAD", "R-DAD" }.  See
+        :func:`~graspologic.utils.to_laplacian` for more details regarding form.
     dimensions : int (default=100)
         Dimensions to use for the svd solver.
         For undirected graphs, if ``elbow_cut==None``, you will receive an embedding

diff --git a/tests/embed/test_n2v.py b/tests/embed/test_n2v.py
@@ -62,8 +62,9 @@ def test_n2v_directed_undirected_returns_same_clustering(self):
         undirected_embedding = gc.embed.node2vec_embed(graph, random_seed=1)
         directed_embedding = gc.embed.node2vec_embed(graph_directed, random_seed=1)
 
-        k = KMeans(n_clusters=2)
+        k = KMeans(n_clusters=2, random_state=1234)
         undirected_labels = k.fit_predict(undirected_embedding[0])
+        k = KMeans(n_clusters=2, random_state=1234)
         directed_labels = k.fit_predict(directed_embedding[0])
 
         expected_labels = np.zeros(40, dtype=int)

diff --git a/tests/layouts/test_auto.py b/tests/layouts/test_auto.py
@@ -50,7 +50,7 @@ def test_layout_umap_directed_weighted(self):
         graph = nx.erdos_renyi_graph(10, 0.7, directed=True)
 
         for s, t in graph.edges():
-            graph.edges[s, t]["weight"] = numpy.random.randint(0, 10)
+            graph.edges[s, t]["weight"] = numpy.random.randint(1, 10)
 
         _, node_positions = layout_umap(graph=graph)