Skip to content

Commit

Permalink
Add wminkowski
Browse files Browse the repository at this point in the history
Implements a Dask array function for computing the weighted Minkowski
distance between two 1-D arrays. Also tests it by comparing to the SciPy
implementation of the same name.
  • Loading branch information
jakirkham committed Sep 29, 2017
1 parent cba9c62 commit 3de52f8
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
43 changes: 43 additions & 0 deletions dask_distance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,49 @@ def sqeuclidean(u, v):
return result


def wminkowski(u, v, p, w):
"""
Finds the weighted Minkowski distance between two 1-D arrays.
.. math::
\left(
\sum_{i} \lvert w_{i} \cdot (u_{i} - v_{i}) \\rvert^{p}
\\right)^{
\\frac{1}{p}
}
Args:
u: 1-D array or collection of 1-D arrays
v: 1-D array or collection of 1-D arrays
p: degree of the norm to use
w: 1-D array of weights
Returns:
float: Minkowski distance
"""

p = _compat._asarray(p)
w = _compat._asarray(w)

if w.ndim != 1:
raise ValueError("w must have a dimension of 1.")

U, V = _utils._broadcast_uv(u, v)
W = w[None, None].repeat(U.shape[0], axis=0).repeat(U.shape[1], axis=1)

U = U.astype(float)
V = V.astype(float)
p = p.astype(float)
W = W.astype(float)

result = (abs(W * (U - V)) ** p).sum(axis=-1) ** (1 / p)

result = _utils._unbroadcast_uv(u, v, result)

return result


#####################################################
# #
# Boolean vector distance/dissimilarity functions #
Expand Down
6 changes: 6 additions & 0 deletions tests/test_dask_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
("minkowski", {"p": 3}),
("minkowski", {"p": 1.4}),
("sqeuclidean", {}),
("wminkowski", {"p": 3, "w": 1}),
]
)
@pytest.mark.parametrize("et, u, v", [
Expand All @@ -51,6 +52,8 @@ def test_1d_dist_err(funcname, kw, et, u, v):
("minkowski", {"p": 3}),
("minkowski", {"p": 1.4}),
("sqeuclidean", {}),
("wminkowski", {"p": 4}),
("wminkowski", {"p": 1.6}),
]
)
@pytest.mark.parametrize(
Expand All @@ -77,6 +80,9 @@ def test_1d_dist(funcname, kw, seed, size, chunks):
sp_func = getattr(spdist, funcname)
da_func = getattr(dask_distance, funcname)

if funcname == "wminkowski":
kw["w"] = 2 * np.random.random((size,)) - 1

a_r = sp_func(a_u, a_v, **kw)
d_r = da_func(d_u, d_v, **kw)

Expand Down

0 comments on commit 3de52f8

Please sign in to comment.