graspologic-org · nicaurvi · Jun 23, 2021 · May 18, 2021 · May 18, 2021 · May 18, 2021
diff --git a/graspologic/embed/n2v.py b/graspologic/embed/n2v.py
@@ -13,17 +13,17 @@
 
 
 def node2vec_embed(
-    graph: Union[nx.Graph, nx.DiGraph],
-    num_walks: int = 10,
-    walk_length: int = 80,
-    return_hyperparameter: float = 1.0,
-    inout_hyperparameter: float = 1.0,
-    dimensions: int = 128,
-    window_size: int = 10,
-    workers: int = 8,
-    iterations: int = 1,
-    interpolate_walk_lengths_by_node_degree: bool = True,
-    random_seed: Optional[int] = None,
+        graph: Union[nx.Graph, nx.DiGraph],
+        num_walks: int = 10,
+        walk_length: int = 80,
+        return_hyperparameter: float = 1.0,
+        inout_hyperparameter: float = 1.0,
+        dimensions: int = 128,
+        window_size: int = 10,
+        workers: int = 8,
+        iterations: int = 1,
+        interpolate_walk_lengths_by_node_degree: bool = True,
+        random_seed: Optional[int] = None,
 ) -> Tuple[np.array, List[Any]]:
     """
     Generates a node2vec embedding from a given graph. Will follow the word2vec algorithm to create the embedding.
@@ -130,7 +130,7 @@ def node2vec_embed(
         f"Completed. Ending time is {str(end)} Elapsed time is {str(start - end)}"
     )
 
-    labels = node2vec_graph.original_graph.nodes()
+    labels = list(node2vec_graph.original_graph.nodes())
     remapped_labels = node2vec_graph.label_map_to_string
 
     return (
@@ -154,16 +154,16 @@ def _assert_is_nonnegative_float(name: str, value: float):
 
 
 def _preconditions(
-    graph: Union[nx.Graph, nx.DiGraph],
-    num_walks: int,
-    walk_length: int,
-    return_hyperparameter: float,
-    inout_hyperparameter: float,
-    dimensions: int,
-    window_size: int,
-    workers: int,
-    iterations: int,
-    interpolate_walk_lengths_by_node_degree: bool,
+        graph: Union[nx.Graph, nx.DiGraph],
+        num_walks: int,
+        walk_length: int,
+        return_hyperparameter: float,
+        inout_hyperparameter: float,
+        dimensions: int,
+        window_size: int,
+        workers: int,
+        iterations: int,
+        interpolate_walk_lengths_by_node_degree: bool,
 ):
     if not isinstance(graph, nx.Graph):
         raise TypeError("graph must be a networkx Graph or DiGraph")
@@ -187,12 +187,12 @@ def _preconditions(
 
 
 def _learn_embeddings(
-    walks: List[Any],
-    dimensions: int,
-    window_size: int,
-    workers: int,
-    iterations: int,
-    random_seed: Optional[int],
+        walks: List[Any],
+        dimensions: int,
+        window_size: int,
+        workers: int,
+        iterations: int,
+        random_seed: Optional[int],
 ):
     """
     Learn embeddings by optimizing the skip-gram objective using SGD.
@@ -235,11 +235,11 @@ class _Node2VecGraph:
     """
 
     def __init__(
-        self,
-        graph: nx.Graph,
-        return_hyperparameter: float,
-        inout_hyperparameter: float,
-        random_state: Optional[np.random.RandomState] = None,
+            self,
+            graph: nx.Graph,
+            return_hyperparameter: float,
+            inout_hyperparameter: float,
+            random_state: Optional[np.random.RandomState] = None,
     ):
         self.original_graph: nx.Graph = graph
 
@@ -254,10 +254,10 @@ def __init__(
         self.random_state = random_state
 
     def node2vec_walk(
-        self,
-        walk_length: int,
-        start_node: Any,
-        degree_percentiles: Optional[np.ndarray],
+            self,
+            walk_length: int,
+            start_node: Any,
+            degree_percentiles: Optional[np.ndarray],
     ):
         """
         Simulate a random walk starting from start node.
@@ -313,7 +313,7 @@ def node2vec_walk(
 
     @staticmethod
     def _get_walk_length_interpolated(
-        degree: int, percentiles: list, max_walk_length: int
+            degree: int, percentiles: list, max_walk_length: int
     ):
         """
         Given a node's degree, determine the length of a walk that should be used. If the degree is less than the
@@ -345,10 +345,10 @@ def _get_walk_length_interpolated(
         return math.floor(new_walk_length)
 
     def _simulate_walks(
-        self,
-        num_walks: int,
-        walk_length: int,
-        interpolate_walk_lengths_by_node_degree: bool = False,
+            self,
+            num_walks: int,
+            walk_length: int,
+            interpolate_walk_lengths_by_node_degree: bool = False,
     ):
         """
         Repeatedly simulate random walks from each node.
@@ -513,7 +513,7 @@ def _alias_setup(probabilities: List[float]):
 
 
 def _alias_draw(
-    probabilities: List[float], alias: List[float], random_state: np.random.RandomState
+        probabilities: List[float], alias: List[float], random_state: np.random.RandomState
 ):
     """
     Draw sample from a non-uniform discrete distribution using alias sampling.

diff --git a/graspologic/utils/utils.py b/graspologic/utils/utils.py
@@ -945,7 +945,9 @@ def remap_labels(
 
 
 def remap_node_ids(
-    graph: nx.Graph, weight_attribute: str = "weight"
+        graph: nx.Graph,
+        weight_attribute: str = "weight",
+        weight_default: int = 1
 ) -> Tuple[nx.Graph, Dict[Any, str]]:
     """
     Given a graph with arbitrarily types node ids, return a new graph that contains the exact same edgelist
@@ -957,7 +959,8 @@ def remap_node_ids(
         A graph that has node ids of arbitrary types.
     weight_attribute : str,
         Default is ``weight``. An optional attribute to specify which column in your graph contains the weight value.
-
+    weight_default : str,
+        Default is edge weight if a weight is not found on an edge
     Returns
     -------
     Tuple[nx.Graph, Dict[Any, str]]
@@ -974,7 +977,7 @@ def remap_node_ids(
     node_id_dict = dict()
     graph_remapped = type(graph)()
 
-    for source, target, weight in graph.edges(data=weight_attribute):
+    for source, target, weight in graph.edges(data=weight_attribute, default=weight_default):
         if source not in node_id_dict:
             node_id_dict[source] = str(len(node_id_dict.keys()))
 

diff --git a/tests/embed/test_n2v.py b/tests/embed/test_n2v.py
@@ -106,6 +106,32 @@ def test_node2vec_embedding_florentine_graph_correct_shape_is_returned(self):
         # vocab list should have exactly 34 elements
         self.assertEqual(len(vocab_list), 15)
 
+    def test_node2vec_embedding_unweighted_florentine_graph_correct_shape_is_returned(self):
+        graph = nx.florentine_families_graph()
+
+        model = gc.embed.node2vec_embed(graph)
+        model_matrix: np.ndarray = model[0]
+        vocab_list = model[1]
+        self.assertIsNotNone(model)
+        self.assertIsNotNone(model[0])
+        self.assertIsNotNone(model[1])
+
+        # model matrix should be 34 x 128
+        self.assertEqual(model_matrix.shape[0], 15)
+        self.assertEqual(model_matrix.shape[1], 128)
+
+        # vocab list should have exactly 34 elements
+        self.assertEqual(len(vocab_list), 15)
+
+    def test_node2vec_same_labels_are_returned(self):
+        graph = nx.florentine_families_graph()
+        node_ids = list(graph.nodes())
+
+        embedding, labels = gc.embed.node2vec_embed(graph)
+
+        for i in range(len(node_ids)):
+            self.assertEqual(node_ids[i], labels[i])
+
     def test_node2vec_embedding_barbell_graph_correct_shape_is_returned(self):
         graph = nx.barbell_graph(25, 2)
         for s, t in graph.edges():