Skip to content

Commit

Permalink
Drop vendored copy of indices and uses thereof (#56)
Browse files Browse the repository at this point in the history
* Rewrite `center_of_mass` to use `arange`

Instead of using the vendored `indices` function to multiply the image
with, rewrite `center_of_mass` to just use `arange` and rely on Dask to
handle the broadcasting when multiplying with the image. Should be a bit
more efficient and should avoid the need for a compatibility function
using `indices` in `center_of_mass`.

* Rewrite `_ravel_shape_indices` without `indices`

Instead of relying on a vendored copy of `indices` to construct the
raveled shape indices, just use `arange` for each dimension and rely on
Dask to handle the broadcasting for us. Also handle any scalar
multiplication that would occur within the arguments to `arange`. This
creates a much simpler Dask graph and avoids relying on a vendored copy
of `indices`.

* Drop note in `_ravel_shape_indices`

As we now are leveraging just `arange` to handle construction of the
indices in `_ravel_shape_indices`, this note no longer applies. Plus we
are better able to handle chunking with what we are doing in this code
than would happen with `reshape` naively. So this really isn't a
workaround any more, but an efficient solution to the problem.

* Drop vendored copy of `indices`

As we are no longer using `indices` in `ndmeasure`, we no longer need to
include a vendored copy of `indices`. So go ahead and drop this function
from the `_compat` module.

* Drop tests for vendored `indices`

As we no longer include a vendored copy of `indices` in `ndmeasure`,
there is no need to have tests for it. So go ahead and drop the tests.
  • Loading branch information
jakirkham committed Sep 3, 2018
1 parent b0cfe21 commit cbbcea8
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 149 deletions.
20 changes: 11 additions & 9 deletions dask_image/ndmeasure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,20 @@ def center_of_mass(input, labels=None, index=None):

input_mtch_sum = sum(input, labels, index)

input_i = _compat._indices(
input.shape, chunks=input.chunks
)
input_wt_mtch_sum = []
for i in _pycompat.irange(input.ndim):
sl = input.ndim * [None]
sl[i] = slice(None)
sl = tuple(sl)

input_i = dask.array.arange(input.shape[i], chunks=input.chunks[i])
input_wt = input * input_i[sl]

input_i_wt = input[None] * input_i
input_wt_mtch_sum.append(sum(input_wt, labels, index))

input_i_wt_mtch_sum = []
for i in _pycompat.irange(len(input_i_wt)):
input_i_wt_mtch_sum.append(sum(input_i_wt[i], labels, index))
input_i_wt_mtch_sum = dask.array.stack(input_i_wt_mtch_sum, axis=-1)
input_wt_mtch_sum = dask.array.stack(input_wt_mtch_sum, axis=-1)

com_lbl = input_i_wt_mtch_sum / input_mtch_sum[..., None]
com_lbl = input_wt_mtch_sum / input_mtch_sum[..., None]

return com_lbl

Expand Down
62 changes: 0 additions & 62 deletions dask_image/ndmeasure/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,65 +29,3 @@ def _asarray(a):
a = dask.array.from_array(a, a.shape)

return a


def _indices(dimensions, dtype=int, chunks=None):
"""
Implements NumPy's ``indices`` for Dask Arrays.
Generates a grid of indices covering the dimensions provided.
The final array has the shape ``(len(dimensions), *dimensions)``. The
chunks are used to specify the chunking for axis 1 up to
``len(dimensions)``. The 0th axis always has chunks of length 1.
Parameters
----------
dimensions : sequence of ints
The shape of the index grid.
dtype : dtype, optional
Type to use for the array. Default is ``int``.
chunks : sequence of ints
The number of samples on each block. Note that the last block will
have fewer samples if ``len(array) % chunks != 0``.
Returns
-------
grid : dask array
Notes
-----
Borrowed from my Dask Array contribution.
"""
if chunks is None:
raise ValueError("Must supply a chunks= keyword argument")

dimensions = tuple(dimensions)
dtype = numpy.dtype(dtype)
chunks = tuple(chunks)

if len(dimensions) != len(chunks):
raise ValueError("Need one more chunk than dimensions.")

grid = []
if numpy.prod(dimensions):
for i in _pycompat.irange(len(dimensions)):
s = len(dimensions) * [None]
s[i] = slice(None)
s = tuple(s)

r = dask.array.arange(dimensions[i], dtype=dtype, chunks=chunks[i])
r = r[s]

for j in itertools.chain(_pycompat.irange(i),
_pycompat.irange(i + 1, len(dimensions))):
r = r.repeat(dimensions[j], axis=j)

grid.append(r)

if grid:
grid = dask.array.stack(grid)
else:
grid = dask.array.empty(
(len(dimensions),) + dimensions, dtype=dtype, chunks=(1,) + chunks
)

return grid
22 changes: 7 additions & 15 deletions dask_image/ndmeasure/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,15 @@ def _get_label_matches(labels, index):
def _ravel_shape_indices(dimensions, dtype=int, chunks=None):
"""
Gets the raveled indices shaped like input.
Normally this could have been solved with `arange` and `reshape`.
Unfortunately that doesn't work out of the box with older versions
of Dask. So we try to solve this by using indices and computing
the raveled index from that.
"""

dtype = numpy.dtype(dtype)

indices = _compat._indices(
dimensions, dtype=dtype, chunks=chunks
)

indices = list(indices)
for i in _pycompat.irange(len(indices)):
indices[i] *= dtype.type(numpy.prod(indices[i].shape[i + 1:]))
indices = dask.array.stack(indices).sum(axis=0)
indices = sum([
dask.array.arange(
0, numpy.prod(dimensions[i:]), numpy.prod(dimensions[i + 1:]),
dtype=dtype, chunks=c
)[i * (None,) + (slice(None),) + (len(dimensions) - i - 1) * (None,)]
for i, c in enumerate(chunks)
])

return indices

Expand Down
63 changes: 0 additions & 63 deletions tests/test_dask_image/test_ndmeasure/test__compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
# -*- coding: utf-8 -*-


import distutils.version as ver

import pytest

import numpy as np
Expand All @@ -15,10 +13,6 @@
import dask_image.ndmeasure._compat


dask_0_14_0 = ver.LooseVersion(dask.__version__) >= ver.LooseVersion("0.14.0")
dask_0_14_1 = ver.LooseVersion(dask.__version__) >= ver.LooseVersion("0.14.1")


@pytest.mark.parametrize("x", [
list(range(5)),
np.random.randint(10, size=(15, 16)),
Expand All @@ -33,60 +27,3 @@ def test_asarray(x):
x = np.asarray(x)

dau.assert_eq(d, x)


def test_indices_no_chunks():
with pytest.raises(ValueError):
dask_image.ndmeasure._compat._indices((1,))


def test_indices_wrong_chunks():
with pytest.raises(ValueError):
dask_image.ndmeasure._compat._indices((1,), chunks=tuple())


@pytest.mark.parametrize(
"dimensions, dtype, chunks",
[
(tuple(), int, tuple()),
(tuple(), float, tuple()),
((0,), float, (1,)),
((0, 1, 2), float, (1, 1, 2)),
]
)
def test_empty_indicies(dimensions, dtype, chunks):
darr = dask_image.ndmeasure._compat._indices(
dimensions, dtype, chunks=chunks
)
nparr = np.indices(dimensions, dtype)

assert darr.shape == nparr.shape
assert darr.dtype == nparr.dtype

try:
dau.assert_eq(darr, nparr)
except IndexError:
if len(dimensions) and not dask_0_14_0:
pytest.skip(
"Dask pre-0.14.0 is unable to compute this empty array."
)
else:
raise


def test_indicies():
darr = dask_image.ndmeasure._compat._indices((1,), chunks=(1,))
nparr = np.indices((1,))
dau.assert_eq(darr, nparr)

darr = dask_image.ndmeasure._compat._indices((1,), float, chunks=(1,))
nparr = np.indices((1,), float)
dau.assert_eq(darr, nparr)

darr = dask_image.ndmeasure._compat._indices((2, 1), chunks=(2, 1))
nparr = np.indices((2, 1))
dau.assert_eq(darr, nparr)

darr = dask_image.ndmeasure._compat._indices((2, 3), chunks=(1, 2))
nparr = np.indices((2, 3))
dau.assert_eq(darr, nparr)

0 comments on commit cbbcea8

Please sign in to comment.