Skip to content

Commit

Permalink
Return da.Array rather than dd.Series for non-ufunc elementwi…
Browse files Browse the repository at this point in the history
…se functions on ``dd.Series`` (#8558)
  • Loading branch information
jsignell committed Apr 7, 2022
1 parent f80c3e4 commit e07c98c
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 43 deletions.
42 changes: 12 additions & 30 deletions dask/array/ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,16 @@
from dask.array.core import Array, apply_infer_dtype, asarray, blockwise, elemwise
from dask.base import is_dask_collection, normalize_function
from dask.highlevelgraph import HighLevelGraph
from dask.utils import (
derived_from,
funcname,
is_dataframe_like,
is_index_like,
is_series_like,
)
from dask.utils import derived_from, funcname


def __array_wrap__(numpy_ufunc, x, *args, **kwargs):
return x.__array_wrap__(numpy_ufunc(x, *args, **kwargs))


def wrap_elemwise(numpy_ufunc, array_wrap=False, source=np):
def wrap_elemwise(numpy_ufunc, source=np):
"""Wrap up numpy function into dask.array"""

def wrapped(*args, **kwargs):
dsk = [arg for arg in args if hasattr(arg, "_elemwise")]
if len(dsk) > 0:
is_dataframe = (
is_dataframe_like(dsk[0])
or is_series_like(dsk[0])
or is_index_like(dsk[0])
)
if array_wrap and is_dataframe:
return dsk[0]._elemwise(__array_wrap__, numpy_ufunc, *args, **kwargs)
else:
return dsk[0]._elemwise(numpy_ufunc, *args, **kwargs)
return dsk[0]._elemwise(numpy_ufunc, *args, **kwargs)
else:
return numpy_ufunc(*args, **kwargs)

Expand Down Expand Up @@ -283,21 +265,21 @@ def outer(self, A, B, **kwargs):

# non-ufunc elementwise functions
clip = wrap_elemwise(np.clip)
isreal = wrap_elemwise(np.isreal, array_wrap=True)
iscomplex = wrap_elemwise(np.iscomplex, array_wrap=True)
real = wrap_elemwise(np.real, array_wrap=True)
imag = wrap_elemwise(np.imag, array_wrap=True)
fix = wrap_elemwise(np.fix, array_wrap=True)
i0 = wrap_elemwise(np.i0, array_wrap=True)
sinc = wrap_elemwise(np.sinc, array_wrap=True)
nan_to_num = wrap_elemwise(np.nan_to_num, array_wrap=True)
isreal = wrap_elemwise(np.isreal)
iscomplex = wrap_elemwise(np.iscomplex)
real = wrap_elemwise(np.real)
imag = wrap_elemwise(np.imag)
fix = wrap_elemwise(np.fix)
i0 = wrap_elemwise(np.i0)
sinc = wrap_elemwise(np.sinc)
nan_to_num = wrap_elemwise(np.nan_to_num)


@derived_from(np)
def angle(x, deg=0):
deg = bool(deg)
if hasattr(x, "_elemwise"):
return x._elemwise(__array_wrap__, np.angle, x, deg)
return x._elemwise(np.angle, x, deg)
return np.angle(x, deg=deg)


Expand Down
24 changes: 11 additions & 13 deletions dask/dataframe/tests/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,29 +143,29 @@ def test_ufunc(pandas_input, ufunc):
@pytest.mark.parametrize(
"ufunc",
[
pytest.param(
"isreal", marks=pytest.mark.filterwarnings("ignore::FutureWarning")
),
"isreal",
"iscomplex",
pytest.param("real", marks=pytest.mark.filterwarnings("ignore::FutureWarning")),
pytest.param("imag", marks=pytest.mark.filterwarnings("ignore::FutureWarning")),
"real",
"imag",
"angle",
"fix",
"i0",
"sinc",
"nan_to_num",
],
)
def test_ufunc_array_wrap(ufunc):
def test_ufunc_wrapped(ufunc):
"""
some np.ufuncs doesn't call __array_wrap__
(or __array_ufunc__ starting from numpy v.1.13.0), it should work as below
- da.ufunc(dd.Series) => dd.Series
- da.ufunc(dd.Series) => da.Array
- da.ufunc(pd.Series) => np.ndarray
- np.ufunc(dd.Series) => np.ndarray
- np.ufunc(pd.Series) => np.ndarray
"""
from dask.array.utils import assert_eq as da_assert_eq

if ufunc == "fix":
pytest.skip("fix calls floor in a way that we do not yet support")

Expand All @@ -178,8 +178,8 @@ def test_ufunc_array_wrap(ufunc):
ds = dd.from_pandas(s, 3)

# applying Dask ufunc doesn't trigger computation
assert isinstance(dafunc(ds), dd.Series)
assert_eq(dafunc(ds), pd.Series(npfunc(s), index=s.index))
assert isinstance(dafunc(ds), da.Array)
da_assert_eq(dafunc(ds), npfunc(s))

assert isinstance(npfunc(ds), np.ndarray)
np.testing.assert_equal(npfunc(ds), npfunc(s))
Expand All @@ -198,10 +198,8 @@ def test_ufunc_array_wrap(ufunc):
ddf = dd.from_pandas(df, 3)

# applying Dask ufunc doesn't trigger computation
assert isinstance(dafunc(ddf), dd.DataFrame)
# result may be read-only ndarray
exp = pd.DataFrame(npfunc(df).copy(), columns=df.columns, index=df.index)
assert_eq(dafunc(ddf), exp)
assert isinstance(dafunc(ddf), da.Array)
da_assert_eq(dafunc(ddf), npfunc(df))

assert isinstance(npfunc(ddf), np.ndarray)
np.testing.assert_array_equal(npfunc(ddf), npfunc(df))
Expand Down

0 comments on commit e07c98c

Please sign in to comment.