Skip to content

Commit

Permalink
Add braycurtis
Browse files Browse the repository at this point in the history
Implements a Dask array function for computing the Bray-Curtis distance
between two 1-D arrays. Also tests it by comparing to the SciPy
implementation of the same name.
  • Loading branch information
jakirkham committed Sep 27, 2017
1 parent 22252bc commit e77eb42
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 0 deletions.
32 changes: 32 additions & 0 deletions dask_distance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,38 @@
from . import _utils


#######################################
# #
# Numeric vector distance functions #
# #
#######################################


@_utils._broadcast_uv_wrapper
def braycurtis(u, v):
"""
Finds the Bray-Curtis distance between two 1-D arrays.
.. math::
\\frac{ \sum_{i} \lvert u_{i} - v_{i} \\rvert }{ \sum_{i} \lvert u_{i} + v_{i} \\rvert }
Args:
u: 1-D array or collection of 1-D arrays
v: 1-D array or collection of 1-D arrays
Returns:
float: Bray-Curtis distance
"""

u = u.astype(float)
v = v.astype(float)

result = abs(u - v).sum(axis=-1) / abs(u + v).sum(axis=-1)

return result


#####################################################
# #
# Boolean vector distance/dissimilarity functions #
Expand Down
86 changes: 86 additions & 0 deletions tests/test_dask_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,92 @@
import dask_distance


@pytest.mark.parametrize(
"funcname", [
"braycurtis",
]
)
@pytest.mark.parametrize("et, u, v", [
(ValueError, np.zeros((2,), dtype=bool), np.zeros((3,), dtype=bool)),
(ValueError, np.zeros((1, 2, 1,), dtype=bool), np.zeros((3,), dtype=bool)),
(ValueError, np.zeros((2,), dtype=bool), np.zeros((1, 3, 1,), dtype=bool)),
])
def test_1d_dist_err(funcname, et, u, v):
da_func = getattr(dask_distance, funcname)

with pytest.raises(et):
da_func(u, v)


@pytest.mark.parametrize(
"funcname", [
"braycurtis",
]
)
@pytest.mark.parametrize(
"seed", [
0,
137,
34,
]
)
@pytest.mark.parametrize(
"size, chunks", [
(10, 5),
]
)
def test_1d_dist(funcname, seed, size, chunks):
np.random.seed(seed)

a_u = 2 * np.random.random((size,)) - 1
a_v = 2 * np.random.random((size,)) - 1

d_u = da.from_array(a_u, chunks=chunks)
d_v = da.from_array(a_v, chunks=chunks)

sp_func = getattr(spdist, funcname)
da_func = getattr(dask_distance, funcname)

a_r = sp_func(a_u, a_v)
d_r = da_func(d_u, d_v)

assert np.allclose(np.array(d_r)[()], a_r, equal_nan=True)


@pytest.mark.parametrize(
"funcname", [
"braycurtis",
]
)
@pytest.mark.parametrize(
"seed", [
0,
137,
34,
]
)
@pytest.mark.parametrize(
"u_shape, u_chunks, v_shape, v_chunks", [
((2, 10), (1, 5), (3, 10), (1, 5)),
]
)
def test_2d_dist(funcname, seed, u_shape, u_chunks, v_shape, v_chunks):
np.random.seed(seed)

a_u = 2 * np.random.random(u_shape) - 1
a_v = 2 * np.random.random(v_shape) - 1

d_u = da.from_array(a_u, chunks=u_chunks)
d_v = da.from_array(a_v, chunks=v_chunks)

da_func = getattr(dask_distance, funcname)

a_r = spdist.cdist(a_u, a_v, funcname)
d_r = da_func(d_u, d_v)

assert np.allclose(np.array(d_r)[()], a_r, equal_nan=True)


@pytest.mark.parametrize(
"funcname", [
"dice",
Expand Down

0 comments on commit e77eb42

Please sign in to comment.