Skip to content

Commit

Permalink
Fix broken convert test of networkx (#1496)
Browse files Browse the repository at this point in the history
- add copy method to `NeighborDict`
- Fix broken convert test of networkx
  • Loading branch information
acezen committed Apr 29, 2022
1 parent c833e37 commit b035d0a
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 123 deletions.
3 changes: 3 additions & 0 deletions python/graphscope/nx/classes/dict_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,9 @@ def item_equal(a, b):

return len(self) == len(other) and item_equal(self, other)

def copy(self):
return self


class NeighborAttrDict(UserDict):
"""Wrapper for attributes of edge."""
Expand Down
148 changes: 148 additions & 0 deletions python/graphscope/nx/convert_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

import networkx.convert_matrix
from networkx.convert_matrix import from_pandas_edgelist as _from_pandas_edgelist
from networkx.convert_matrix import to_numpy_array as _to_numpy_array
from networkx.convert_matrix import to_numpy_matrix as _to_numpy_matrix
from networkx.convert_matrix import to_scipy_sparse_matrix as _to_scipy_sparse_matrix

from graphscope import nx
from graphscope.nx.utils.compat import import_as_graphscope_nx
Expand Down Expand Up @@ -91,3 +94,148 @@ def from_pandas_edgelist(
g.add_edges_from(edges)

return g


@patch_docstring(_to_numpy_array)
def to_numpy_array(
G,
nodelist=None,
dtype=None,
order=None,
multigraph_weight=sum,
weight="weight",
nonedge=0.0,
):
import numpy as np

if nodelist is None:
nodelist = list(G)
nodeset = G
nlen = len(G)
else:
nlen = len(nodelist)
nodeset = set(G.nbunch_iter(nodelist))
if nlen != len(nodeset):
for n in nodelist:
if n not in G:
raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
raise nx.NetworkXError("nodelist contains duplicates.")

undirected = not G.is_directed()
index = dict(zip(sorted(nodelist), range(nlen)))

if G.is_multigraph():
# Handle MultiGraphs and MultiDiGraphs
A = np.full((nlen, nlen), np.nan, order=order)
# use numpy nan-aware operations
operator = {sum: np.nansum, min: np.nanmin, max: np.nanmax}
try:
op = operator[multigraph_weight]
except Exception as e:
raise ValueError("multigraph_weight must be sum, min, or max") from e

for u, v, attrs in G.edges(data=True):
if (u in nodeset) and (v in nodeset):
i, j = index[u], index[v]
e_weight = attrs.get(weight, 1)
A[i, j] = op([e_weight, A[i, j]])
if undirected:
A[j, i] = A[i, j]
else:
# Graph or DiGraph, this is much faster than above
A = np.full((nlen, nlen), np.nan, order=order)
for u, nbrdict in G.adjacency():
for v, d in nbrdict.items():
try:
A[index[u], index[v]] = d.get(weight, 1)
except KeyError:
# This occurs when there are fewer desired nodes than
# there are nodes in the graph: len(nodelist) < len(G)
pass

A[np.isnan(A)] = nonedge
A = np.asarray(A, dtype=dtype)
return A


@patch_docstring(_to_numpy_matrix)
def to_numpy_matrix(
G,
nodelist=None,
dtype=None,
order=None,
multigraph_weight=sum,
weight="weight",
nonedge=0.0,
):
import numpy as np

A = to_numpy_array(
G,
nodelist=nodelist,
dtype=dtype,
order=order,
multigraph_weight=multigraph_weight,
weight=weight,
nonedge=nonedge,
)
M = np.asmatrix(A, dtype=dtype)
return M


@patch_docstring(_to_scipy_sparse_matrix)
def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, weight="weight", format="csr"):
import scipy as sp
import scipy.sparse # call as sp.sparse

if len(G) == 0:
raise nx.NetworkXError("Graph has no nodes or edges")

if nodelist is None:
nodelist = sorted(G)
nlen = len(G)
else:
nlen = len(nodelist)
if nlen == 0:
raise nx.NetworkXError("nodelist has no nodes")
nodeset = set(G.nbunch_iter(nodelist))
if nlen != len(nodeset):
for n in nodelist:
if n not in G:
raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
raise nx.NetworkXError("nodelist contains duplicates.")
if nlen < len(G):
G = G.subgraph(nodelist)

index = dict(zip(nodelist, range(nlen)))
coefficients = zip(
*((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))
)
try:
row, col, data = coefficients
except ValueError:
# there is no edge in the subgraph
row, col, data = [], [], []

if G.is_directed():
M = sp.sparse.coo_matrix((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
else:
# symmetrize matrix
d = data + data
r = row + col
c = col + row
# selfloop entries get double counted when symmetrizing
# so we subtract the data on the diagonal
selfloops = list(nx.selfloop_edges(G, data=weight, default=1))
if selfloops:
diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))
d += diag_data
r += diag_index
c += diag_index
M = sp.sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
try:
return M.asformat(format)
# From Scipy 1.1.0, asformat will throw a ValueError instead of an
# AttributeError if the format if not recognized.
except (AttributeError, ValueError) as e:
raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from e
46 changes: 38 additions & 8 deletions python/graphscope/nx/tests/convert/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
#

import pytest
from networkx.tests.test_convert import TestConvert
from networkx.tests.test_convert import TestConvert as _TestConvert
from networkx.utils import edges_equal
from networkx.utils import nodes_equal

import graphscope.nx as nx
from graphscope.nx.convert import from_dict_of_dicts
Expand All @@ -28,24 +30,52 @@
from graphscope.nx.convert import to_networkx_graph
from graphscope.nx.generators.classic import barbell_graph
from graphscope.nx.generators.classic import cycle_graph
from graphscope.nx.tests.utils import assert_edges_equal
from graphscope.nx.tests.utils import assert_graphs_equal
from graphscope.nx.tests.utils import assert_nodes_equal
from graphscope.nx.utils.compat import with_graphscope_nx_context


@pytest.mark.skip("AttributeError: 'NeighborDict' object has no attribute 'copy'")
@pytest.mark.usefixtures("graphscope_session")
@with_graphscope_nx_context(TestConvert)
@with_graphscope_nx_context(_TestConvert)
class TestConvert:
def test_attribute_dict_integrity(self):
# we must not replace dict-like graph data structures with dicts
G = nx.Graph()
G.add_nodes_from("abc")
H = to_networkx_graph(G, create_using=nx.Graph)
assert list(H.nodes) == list(G.nodes)
assert sorted(list(H.nodes)) == sorted(list(G.nodes))
H = nx.Graph(G)
assert list(H.nodes) == list(G.nodes)
assert sorted(list(H.nodes)) == sorted(list(G.nodes))

def test_graph(self):
g = nx.cycle_graph(10)
G = nx.Graph()
G.add_nodes_from(g)
G.add_weighted_edges_from((u, v, u) for u, v in g.edges())

# Dict of dicts
dod = to_dict_of_dicts(G)
GG = from_dict_of_dicts(dod, create_using=nx.Graph)
assert nodes_equal(sorted(G.nodes()), sorted(GG.nodes()))
assert edges_equal(sorted(G.edges()), sorted(GG.edges()))
GW = to_networkx_graph(dod, create_using=nx.Graph)
assert nodes_equal(sorted(G.nodes()), sorted(GW.nodes()))
assert edges_equal(sorted(G.edges()), sorted(GW.edges()))
GI = nx.Graph(dod)
assert nodes_equal(sorted(G.nodes()), sorted(GI.nodes()))
assert edges_equal(sorted(G.edges()), sorted(GI.edges()))

# Dict of lists
dol = to_dict_of_lists(G)
GG = from_dict_of_lists(dol, create_using=nx.Graph)
# dict of lists throws away edge data so set it to none
enone = [(u, v, {}) for (u, v, d) in G.edges(data=True)]
assert nodes_equal(sorted(G.nodes()), sorted(GG.nodes()))
assert edges_equal(enone, sorted(GG.edges(data=True)))
GW = to_networkx_graph(dol, create_using=nx.Graph)
assert nodes_equal(sorted(G.nodes()), sorted(GW.nodes()))
assert edges_equal(enone, sorted(GW.edges(data=True)))
GI = nx.Graph(dol)
assert nodes_equal(sorted(G.nodes()), sorted(GI.nodes()))
assert edges_equal(enone, sorted(GI.edges(data=True)))

def test_custom_node_attr_dict_safekeeping(self):
pass
125 changes: 18 additions & 107 deletions python/graphscope/nx/tests/convert/test_convert_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,134 +24,45 @@
np = pytest.importorskip("numpy")
np_assert_equal = np.testing.assert_equal

from networkx.tests.test_convert_numpy import TestConvertNumpyArray
from networkx.tests.test_convert_numpy import TestConvertNumpyMatrix
# fmt: off
from networkx.tests.test_convert_numpy import \
TestConvertNumpyArray as _TestConvertNumpyArray
from networkx.tests.test_convert_numpy import \
TestConvertNumpyMatrix as _TestConvertNumpyMatrix
from networkx.utils import edges_equal

import graphscope.nx as nx
from graphscope.nx.generators.classic import barbell_graph
from graphscope.nx.generators.classic import cycle_graph
from graphscope.nx.generators.classic import path_graph
from graphscope.nx.tests.utils import assert_graphs_equal
from graphscope.nx.utils.compat import with_graphscope_nx_context

# fmt: on


@pytest.mark.skip("AttributeError: 'NeighborDict' object has no attribute 'copy'")
@pytest.mark.usefixtures("graphscope_session")
@with_graphscope_nx_context(TestConvertNumpyMatrix)
@with_graphscope_nx_context(_TestConvertNumpyMatrix)
class TestConvertNumpyMatrix:
def assert_equal(self, G1, G2):
assert sorted(G1.nodes()) == sorted(G2.nodes())
assert edges_equal(sorted(G1.edges()), sorted(G2.edges()))

def test_from_numpy_matrix_type(self):
pass

def test_from_numpy_matrix_dtype(self):
pass

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_graph_matrix(self):
"""Conversion from digraph to matrix to digraph."""
A = nx.to_numpy_matrix(self.G2)
self.identity_conversion(self.G2, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_digraph_matrix(self):
"""Conversion from digraph to matrix to digraph."""
A = nx.to_numpy_matrix(self.G2)
self.identity_conversion(self.G2, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_graph_matrix(self):
"""Conversion from weighted graph to matrix to weighted graph."""
A = nx.to_numpy_matrix(self.G3)
self.identity_conversion(self.G3, A, nx.Graph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_digraph_matrix(self):
"""Conversion from weighted digraph to matrix to weighted digraph."""
A = nx.to_numpy_matrix(self.G4)
self.identity_conversion(self.G4, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_nodelist(self):
"""Conversion from graph to matrix to graph with nodelist."""
P4 = path_graph(4)
P3 = path_graph(3)
nodelist = list(P3)
A = nx.to_numpy_matrix(P4, nodelist=nodelist)
GA = nx.Graph(A)
self.assert_equal(GA, P3)

# Make nodelist ambiguous by containing duplicates.
nodelist += [nodelist[0]]
pytest.raises(nx.NetworkXError, nx.to_numpy_matrix, P3, nodelist=nodelist)


@pytest.mark.skip("AttributeError: 'NeighborDict' object has no attribute 'copy'")
@pytest.mark.usefixtures("graphscope_session")
@with_graphscope_nx_context(TestConvertNumpyArray)
@with_graphscope_nx_context(_TestConvertNumpyArray)
class TestConvertNumpyArray:
def assert_equal(self, G1, G2):
assert sorted(G1.nodes()) == sorted(G2.nodes())
assert edges_equal(sorted(G1.edges()), sorted(G2.edges()))

def test_from_numpy_array_type(self):
pass

def test_from_numpy_array_dtype(self):
pass

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_graph_array(self):
"Conversion from graph to array to graph."
A = nx.to_numpy_array(self.G1)
A = np.asarray(A)
self.identity_conversion(self.G1, A, nx.Graph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_digraph_array(self):
"""Conversion from digraph to array to digraph."""
A = nx.to_numpy_array(self.G2)
A = np.asarray(A)
self.identity_conversion(self.G2, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_graph_array(self):
"""Conversion from weighted graph to array to weighted graph."""
A = nx.to_numpy_array(self.G3)
A = np.asarray(A)
self.identity_conversion(self.G3, A, nx.Graph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_digraph_array(self):
"""Conversion from weighted digraph to array to weighted digraph."""
A = nx.to_numpy_array(self.G4)
A = np.asarray(A)
self.identity_conversion(self.G4, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_nodelist(self):
"""Conversion from graph to matrix to graph with nodelist."""
P4 = path_graph(4)
P3 = path_graph(3)
nodelist = list(P3)
A = nx.to_numpy_array(P4, nodelist=nodelist)
GA = nx.Graph(A)
self.assert_equal(GA, P3)

# Make nodelist ambiguous by containing duplicates.
nodelist += [nodelist[0]]
pytest.raises(nx.NetworkXError, nx.to_numpy_array, P3, nodelist=nodelist)
Loading

0 comments on commit b035d0a

Please sign in to comment.