Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix broken convert test of networkx #1496

Merged
merged 5 commits into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/graphscope/nx/classes/dict_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,9 @@ def item_equal(a, b):

return len(self) == len(other) and item_equal(self, other)

def copy(self):
return self


class NeighborAttrDict(UserDict):
"""Wrapper for attributes of edge."""
Expand Down
148 changes: 148 additions & 0 deletions python/graphscope/nx/convert_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

import networkx.convert_matrix
from networkx.convert_matrix import from_pandas_edgelist as _from_pandas_edgelist
from networkx.convert_matrix import to_numpy_array as _to_numpy_array
from networkx.convert_matrix import to_numpy_matrix as _to_numpy_matrix
from networkx.convert_matrix import to_scipy_sparse_matrix as _to_scipy_sparse_matrix

from graphscope import nx
from graphscope.nx.utils.compat import import_as_graphscope_nx
Expand Down Expand Up @@ -91,3 +94,148 @@ def from_pandas_edgelist(
g.add_edges_from(edges)

return g


@patch_docstring(_to_numpy_array)
def to_numpy_array(
G,
nodelist=None,
dtype=None,
order=None,
multigraph_weight=sum,
weight="weight",
nonedge=0.0,
):
import numpy as np

if nodelist is None:
nodelist = list(G)
nodeset = G
nlen = len(G)
else:
nlen = len(nodelist)
nodeset = set(G.nbunch_iter(nodelist))
if nlen != len(nodeset):
for n in nodelist:
if n not in G:
raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
raise nx.NetworkXError("nodelist contains duplicates.")

undirected = not G.is_directed()
index = dict(zip(sorted(nodelist), range(nlen)))

if G.is_multigraph():
# Handle MultiGraphs and MultiDiGraphs
A = np.full((nlen, nlen), np.nan, order=order)
# use numpy nan-aware operations
operator = {sum: np.nansum, min: np.nanmin, max: np.nanmax}
try:
op = operator[multigraph_weight]
except Exception as e:
raise ValueError("multigraph_weight must be sum, min, or max") from e

for u, v, attrs in G.edges(data=True):
if (u in nodeset) and (v in nodeset):
i, j = index[u], index[v]
e_weight = attrs.get(weight, 1)
A[i, j] = op([e_weight, A[i, j]])
if undirected:
A[j, i] = A[i, j]
else:
# Graph or DiGraph, this is much faster than above
A = np.full((nlen, nlen), np.nan, order=order)
for u, nbrdict in G.adjacency():
for v, d in nbrdict.items():
try:
A[index[u], index[v]] = d.get(weight, 1)
except KeyError:
# This occurs when there are fewer desired nodes than
# there are nodes in the graph: len(nodelist) < len(G)
pass

A[np.isnan(A)] = nonedge
A = np.asarray(A, dtype=dtype)
return A


@patch_docstring(_to_numpy_matrix)
def to_numpy_matrix(
G,
nodelist=None,
dtype=None,
order=None,
multigraph_weight=sum,
weight="weight",
nonedge=0.0,
):
import numpy as np

A = to_numpy_array(
G,
nodelist=nodelist,
dtype=dtype,
order=order,
multigraph_weight=multigraph_weight,
weight=weight,
nonedge=nonedge,
)
M = np.asmatrix(A, dtype=dtype)
return M


@patch_docstring(_to_scipy_sparse_matrix)
def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, weight="weight", format="csr"):
import scipy as sp
import scipy.sparse # call as sp.sparse

if len(G) == 0:
raise nx.NetworkXError("Graph has no nodes or edges")

if nodelist is None:
nodelist = sorted(G)
nlen = len(G)
else:
nlen = len(nodelist)
if nlen == 0:
raise nx.NetworkXError("nodelist has no nodes")
nodeset = set(G.nbunch_iter(nodelist))
if nlen != len(nodeset):
for n in nodelist:
if n not in G:
raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
raise nx.NetworkXError("nodelist contains duplicates.")
if nlen < len(G):
G = G.subgraph(nodelist)

index = dict(zip(nodelist, range(nlen)))
coefficients = zip(
*((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))
)
try:
row, col, data = coefficients
except ValueError:
# there is no edge in the subgraph
row, col, data = [], [], []

if G.is_directed():
M = sp.sparse.coo_matrix((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
else:
# symmetrize matrix
d = data + data
r = row + col
c = col + row
# selfloop entries get double counted when symmetrizing
# so we subtract the data on the diagonal
selfloops = list(nx.selfloop_edges(G, data=weight, default=1))
if selfloops:
diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))
d += diag_data
r += diag_index
c += diag_index
M = sp.sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
try:
return M.asformat(format)
# From Scipy 1.1.0, asformat will throw a ValueError instead of an
# AttributeError if the format if not recognized.
except (AttributeError, ValueError) as e:
raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from e
46 changes: 38 additions & 8 deletions python/graphscope/nx/tests/convert/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
#

import pytest
from networkx.tests.test_convert import TestConvert
from networkx.tests.test_convert import TestConvert as _TestConvert
from networkx.utils import edges_equal
from networkx.utils import nodes_equal

import graphscope.nx as nx
from graphscope.nx.convert import from_dict_of_dicts
Expand All @@ -28,24 +30,52 @@
from graphscope.nx.convert import to_networkx_graph
from graphscope.nx.generators.classic import barbell_graph
from graphscope.nx.generators.classic import cycle_graph
from graphscope.nx.tests.utils import assert_edges_equal
from graphscope.nx.tests.utils import assert_graphs_equal
from graphscope.nx.tests.utils import assert_nodes_equal
from graphscope.nx.utils.compat import with_graphscope_nx_context


@pytest.mark.skip("AttributeError: 'NeighborDict' object has no attribute 'copy'")
@pytest.mark.usefixtures("graphscope_session")
@with_graphscope_nx_context(TestConvert)
@with_graphscope_nx_context(_TestConvert)
class TestConvert:
def test_attribute_dict_integrity(self):
# we must not replace dict-like graph data structures with dicts
G = nx.Graph()
G.add_nodes_from("abc")
H = to_networkx_graph(G, create_using=nx.Graph)
assert list(H.nodes) == list(G.nodes)
assert sorted(list(H.nodes)) == sorted(list(G.nodes))
H = nx.Graph(G)
assert list(H.nodes) == list(G.nodes)
assert sorted(list(H.nodes)) == sorted(list(G.nodes))

def test_graph(self):
g = nx.cycle_graph(10)
G = nx.Graph()
G.add_nodes_from(g)
G.add_weighted_edges_from((u, v, u) for u, v in g.edges())

# Dict of dicts
dod = to_dict_of_dicts(G)
GG = from_dict_of_dicts(dod, create_using=nx.Graph)
assert nodes_equal(sorted(G.nodes()), sorted(GG.nodes()))
assert edges_equal(sorted(G.edges()), sorted(GG.edges()))
GW = to_networkx_graph(dod, create_using=nx.Graph)
assert nodes_equal(sorted(G.nodes()), sorted(GW.nodes()))
assert edges_equal(sorted(G.edges()), sorted(GW.edges()))
GI = nx.Graph(dod)
assert nodes_equal(sorted(G.nodes()), sorted(GI.nodes()))
assert edges_equal(sorted(G.edges()), sorted(GI.edges()))

# Dict of lists
dol = to_dict_of_lists(G)
GG = from_dict_of_lists(dol, create_using=nx.Graph)
# dict of lists throws away edge data so set it to none
enone = [(u, v, {}) for (u, v, d) in G.edges(data=True)]
assert nodes_equal(sorted(G.nodes()), sorted(GG.nodes()))
assert edges_equal(enone, sorted(GG.edges(data=True)))
GW = to_networkx_graph(dol, create_using=nx.Graph)
assert nodes_equal(sorted(G.nodes()), sorted(GW.nodes()))
assert edges_equal(enone, sorted(GW.edges(data=True)))
GI = nx.Graph(dol)
assert nodes_equal(sorted(G.nodes()), sorted(GI.nodes()))
assert edges_equal(enone, sorted(GI.edges(data=True)))

def test_custom_node_attr_dict_safekeeping(self):
pass
125 changes: 18 additions & 107 deletions python/graphscope/nx/tests/convert/test_convert_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,134 +24,45 @@
np = pytest.importorskip("numpy")
np_assert_equal = np.testing.assert_equal

from networkx.tests.test_convert_numpy import TestConvertNumpyArray
from networkx.tests.test_convert_numpy import TestConvertNumpyMatrix
# fmt: off
from networkx.tests.test_convert_numpy import \
TestConvertNumpyArray as _TestConvertNumpyArray
from networkx.tests.test_convert_numpy import \
TestConvertNumpyMatrix as _TestConvertNumpyMatrix
from networkx.utils import edges_equal

import graphscope.nx as nx
from graphscope.nx.generators.classic import barbell_graph
from graphscope.nx.generators.classic import cycle_graph
from graphscope.nx.generators.classic import path_graph
from graphscope.nx.tests.utils import assert_graphs_equal
from graphscope.nx.utils.compat import with_graphscope_nx_context

# fmt: on


@pytest.mark.skip("AttributeError: 'NeighborDict' object has no attribute 'copy'")
@pytest.mark.usefixtures("graphscope_session")
@with_graphscope_nx_context(TestConvertNumpyMatrix)
@with_graphscope_nx_context(_TestConvertNumpyMatrix)
class TestConvertNumpyMatrix:
def assert_equal(self, G1, G2):
assert sorted(G1.nodes()) == sorted(G2.nodes())
assert edges_equal(sorted(G1.edges()), sorted(G2.edges()))

def test_from_numpy_matrix_type(self):
pass

def test_from_numpy_matrix_dtype(self):
pass

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_graph_matrix(self):
"""Conversion from digraph to matrix to digraph."""
A = nx.to_numpy_matrix(self.G2)
self.identity_conversion(self.G2, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_digraph_matrix(self):
"""Conversion from digraph to matrix to digraph."""
A = nx.to_numpy_matrix(self.G2)
self.identity_conversion(self.G2, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_graph_matrix(self):
"""Conversion from weighted graph to matrix to weighted graph."""
A = nx.to_numpy_matrix(self.G3)
self.identity_conversion(self.G3, A, nx.Graph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_digraph_matrix(self):
"""Conversion from weighted digraph to matrix to weighted digraph."""
A = nx.to_numpy_matrix(self.G4)
self.identity_conversion(self.G4, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_nodelist(self):
"""Conversion from graph to matrix to graph with nodelist."""
P4 = path_graph(4)
P3 = path_graph(3)
nodelist = list(P3)
A = nx.to_numpy_matrix(P4, nodelist=nodelist)
GA = nx.Graph(A)
self.assert_equal(GA, P3)

# Make nodelist ambiguous by containing duplicates.
nodelist += [nodelist[0]]
pytest.raises(nx.NetworkXError, nx.to_numpy_matrix, P3, nodelist=nodelist)


@pytest.mark.skip("AttributeError: 'NeighborDict' object has no attribute 'copy'")
@pytest.mark.usefixtures("graphscope_session")
@with_graphscope_nx_context(TestConvertNumpyArray)
@with_graphscope_nx_context(_TestConvertNumpyArray)
class TestConvertNumpyArray:
def assert_equal(self, G1, G2):
assert sorted(G1.nodes()) == sorted(G2.nodes())
assert edges_equal(sorted(G1.edges()), sorted(G2.edges()))

def test_from_numpy_array_type(self):
pass

def test_from_numpy_array_dtype(self):
pass

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_graph_array(self):
"Conversion from graph to array to graph."
A = nx.to_numpy_array(self.G1)
A = np.asarray(A)
self.identity_conversion(self.G1, A, nx.Graph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_digraph_array(self):
"""Conversion from digraph to array to digraph."""
A = nx.to_numpy_array(self.G2)
A = np.asarray(A)
self.identity_conversion(self.G2, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_graph_array(self):
"""Conversion from weighted graph to array to weighted graph."""
A = nx.to_numpy_array(self.G3)
A = np.asarray(A)
self.identity_conversion(self.G3, A, nx.Graph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_identity_weighted_digraph_array(self):
"""Conversion from weighted digraph to array to weighted digraph."""
A = nx.to_numpy_array(self.G4)
A = np.asarray(A)
self.identity_conversion(self.G4, A, nx.DiGraph())

@pytest.mark.skipif(
os.environ.get("DEPLOYMENT", None) != "standalone", reason="edge order."
)
def test_nodelist(self):
"""Conversion from graph to matrix to graph with nodelist."""
P4 = path_graph(4)
P3 = path_graph(3)
nodelist = list(P3)
A = nx.to_numpy_array(P4, nodelist=nodelist)
GA = nx.Graph(A)
self.assert_equal(GA, P3)

# Make nodelist ambiguous by containing duplicates.
nodelist += [nodelist[0]]
pytest.raises(nx.NetworkXError, nx.to_numpy_array, P3, nodelist=nodelist)
Loading