Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Default weight to 1 for unweighted graph during n2v #789

Merged
merged 13 commits into from
Jun 23, 2021
Merged
86 changes: 43 additions & 43 deletions graspologic/embed/n2v.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@


def node2vec_embed(
graph: Union[nx.Graph, nx.DiGraph],
num_walks: int = 10,
walk_length: int = 80,
return_hyperparameter: float = 1.0,
inout_hyperparameter: float = 1.0,
dimensions: int = 128,
window_size: int = 10,
workers: int = 8,
iterations: int = 1,
interpolate_walk_lengths_by_node_degree: bool = True,
random_seed: Optional[int] = None,
graph: Union[nx.Graph, nx.DiGraph],
nicaurvi marked this conversation as resolved.
Show resolved Hide resolved
num_walks: int = 10,
walk_length: int = 80,
return_hyperparameter: float = 1.0,
inout_hyperparameter: float = 1.0,
dimensions: int = 128,
window_size: int = 10,
workers: int = 8,
iterations: int = 1,
interpolate_walk_lengths_by_node_degree: bool = True,
random_seed: Optional[int] = None,
) -> Tuple[np.array, List[Any]]:
"""
Generates a node2vec embedding from a given graph. Will follow the word2vec algorithm to create the embedding.
Expand Down Expand Up @@ -130,7 +130,7 @@ def node2vec_embed(
f"Completed. Ending time is {str(end)} Elapsed time is {str(start - end)}"
)

labels = node2vec_graph.original_graph.nodes()
labels = list(node2vec_graph.original_graph.nodes())
nicaurvi marked this conversation as resolved.
Show resolved Hide resolved
remapped_labels = node2vec_graph.label_map_to_string

return (
Expand All @@ -154,16 +154,16 @@ def _assert_is_nonnegative_float(name: str, value: float):


def _preconditions(
graph: Union[nx.Graph, nx.DiGraph],
num_walks: int,
walk_length: int,
return_hyperparameter: float,
inout_hyperparameter: float,
dimensions: int,
window_size: int,
workers: int,
iterations: int,
interpolate_walk_lengths_by_node_degree: bool,
graph: Union[nx.Graph, nx.DiGraph],
num_walks: int,
walk_length: int,
return_hyperparameter: float,
inout_hyperparameter: float,
dimensions: int,
window_size: int,
workers: int,
iterations: int,
interpolate_walk_lengths_by_node_degree: bool,
):
if not isinstance(graph, nx.Graph):
raise TypeError("graph must be a networkx Graph or DiGraph")
Expand All @@ -187,12 +187,12 @@ def _preconditions(


def _learn_embeddings(
walks: List[Any],
dimensions: int,
window_size: int,
workers: int,
iterations: int,
random_seed: Optional[int],
walks: List[Any],
dimensions: int,
window_size: int,
workers: int,
iterations: int,
random_seed: Optional[int],
):
"""
Learn embeddings by optimizing the skip-gram objective using SGD.
Expand Down Expand Up @@ -235,11 +235,11 @@ class _Node2VecGraph:
"""

def __init__(
self,
graph: nx.Graph,
return_hyperparameter: float,
inout_hyperparameter: float,
random_state: Optional[np.random.RandomState] = None,
self,
graph: nx.Graph,
return_hyperparameter: float,
inout_hyperparameter: float,
random_state: Optional[np.random.RandomState] = None,
):
self.original_graph: nx.Graph = graph

Expand All @@ -254,10 +254,10 @@ def __init__(
self.random_state = random_state

def node2vec_walk(
self,
walk_length: int,
start_node: Any,
degree_percentiles: Optional[np.ndarray],
self,
walk_length: int,
start_node: Any,
degree_percentiles: Optional[np.ndarray],
):
"""
Simulate a random walk starting from start node.
Expand Down Expand Up @@ -313,7 +313,7 @@ def node2vec_walk(

@staticmethod
def _get_walk_length_interpolated(
degree: int, percentiles: list, max_walk_length: int
degree: int, percentiles: list, max_walk_length: int
):
"""
Given a node's degree, determine the length of a walk that should be used. If the degree is less than the
Expand Down Expand Up @@ -345,10 +345,10 @@ def _get_walk_length_interpolated(
return math.floor(new_walk_length)

def _simulate_walks(
self,
num_walks: int,
walk_length: int,
interpolate_walk_lengths_by_node_degree: bool = False,
self,
num_walks: int,
walk_length: int,
interpolate_walk_lengths_by_node_degree: bool = False,
):
"""
Repeatedly simulate random walks from each node.
Expand Down Expand Up @@ -513,7 +513,7 @@ def _alias_setup(probabilities: List[float]):


def _alias_draw(
probabilities: List[float], alias: List[float], random_state: np.random.RandomState
probabilities: List[float], alias: List[float], random_state: np.random.RandomState
):
"""
Draw sample from a non-uniform discrete distribution using alias sampling.
Expand Down
9 changes: 6 additions & 3 deletions graspologic/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,9 @@ def remap_labels(


def remap_node_ids(
graph: nx.Graph, weight_attribute: str = "weight"
graph: nx.Graph,
weight_attribute: str = "weight",
weight_default: int = 1
nicaurvi marked this conversation as resolved.
Show resolved Hide resolved
) -> Tuple[nx.Graph, Dict[Any, str]]:
"""
Given a graph with arbitrarily types node ids, return a new graph that contains the exact same edgelist
Expand All @@ -957,7 +959,8 @@ def remap_node_ids(
A graph that has node ids of arbitrary types.
weight_attribute : str,
Default is ``weight``. An optional attribute to specify which column in your graph contains the weight value.

weight_default : str,
Default is edge weight if a weight is not found on an edge
Returns
-------
Tuple[nx.Graph, Dict[Any, str]]
Expand All @@ -974,7 +977,7 @@ def remap_node_ids(
node_id_dict = dict()
graph_remapped = type(graph)()

for source, target, weight in graph.edges(data=weight_attribute):
for source, target, weight in graph.edges(data=weight_attribute, default=weight_default):
nicaurvi marked this conversation as resolved.
Show resolved Hide resolved
if source not in node_id_dict:
node_id_dict[source] = str(len(node_id_dict.keys()))

Expand Down
26 changes: 26 additions & 0 deletions tests/embed/test_n2v.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,32 @@ def test_node2vec_embedding_florentine_graph_correct_shape_is_returned(self):
# vocab list should have exactly 34 elements
self.assertEqual(len(vocab_list), 15)

def test_node2vec_embedding_unweighted_florentine_graph_correct_shape_is_returned(self):
graph = nx.florentine_families_graph()

model = gc.embed.node2vec_embed(graph)
model_matrix: np.ndarray = model[0]
vocab_list = model[1]
self.assertIsNotNone(model)
self.assertIsNotNone(model[0])
self.assertIsNotNone(model[1])

# model matrix should be 34 x 128
self.assertEqual(model_matrix.shape[0], 15)
self.assertEqual(model_matrix.shape[1], 128)

# vocab list should have exactly 34 elements
self.assertEqual(len(vocab_list), 15)

def test_node2vec_same_labels_are_returned(self):
graph = nx.florentine_families_graph()
node_ids = list(graph.nodes())

embedding, labels = gc.embed.node2vec_embed(graph)

for i in range(len(node_ids)):
self.assertEqual(node_ids[i], labels[i])

def test_node2vec_embedding_barbell_graph_correct_shape_is_returned(self):
graph = nx.barbell_graph(25, 2)
for s, t in graph.edges():
Expand Down