Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify mean chunk functions to return dicts rather than arrays #4513

Merged
merged 2 commits into from Feb 25, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 20 additions & 18 deletions dask/array/reductions.py
Expand Up @@ -22,7 +22,7 @@
from ..compatibility import getargspec, builtins
from ..base import tokenize
from ..highlevelgraph import HighLevelGraph
from ..utils import ignoring, funcname, Dispatch
from ..utils import ignoring, funcname, Dispatch, deepmap
from .. import config

# Generic functions to support chunks of different types
Expand Down Expand Up @@ -331,26 +331,27 @@ def nannumel(x, **kwargs):
def mean_chunk(x, sum=chunk.sum, numel=numel, dtype='f8', **kwargs):
n = numel(x, dtype=dtype, **kwargs)
total = sum(x, dtype=dtype, **kwargs)
empty = empty_lookup.dispatch(type(n))
result = empty(n.shape, dtype=[('total', total.dtype), ('n', n.dtype)])
result['n'] = n
result['total'] = total
return result
return {'n': n, 'total': total}


def mean_combine(pair, sum=chunk.sum, numel=numel, dtype='f8', **kwargs):
n = sum(pair['n'], **kwargs)
total = sum(pair['total'], **kwargs)
empty = empty_lookup.dispatch(type(n))
result = empty(n.shape, dtype=pair.dtype)
result['n'] = n
result['total'] = total
return result
def mean_combine(pairs, sum=chunk.sum, numel=numel, dtype='f8', axis=None, **kwargs):
if not isinstance(pairs, list):
pairs = [pairs]
ns = deepmap(lambda pair: pair['n'], pairs)
totals = deepmap(lambda pair: pair['total'], pairs)
n = _concatenate2(ns, axes=axis).sum(axis=axis, **kwargs)
total = _concatenate2(totals, axes=axis).sum(axis=axis, **kwargs)
return {'n': n, 'total': total}


def mean_agg(pair, dtype='f8', **kwargs):
return divide(pair['total'].sum(dtype=dtype, **kwargs),
pair['n'].sum(dtype=dtype, **kwargs), dtype=dtype)
def mean_agg(pairs, dtype='f8', axis=None, **kwargs):
ns = deepmap(lambda pair: pair['n'], pairs)
totals = deepmap(lambda pair: pair['total'], pairs)
n = _concatenate2(ns, axes=axis).sum(axis=axis, **kwargs)
total = _concatenate2(totals, axes=axis).sum(axis=axis, **kwargs)

return divide(total.sum(dtype=dtype, **kwargs),
n.sum(dtype=dtype, **kwargs), dtype=dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found the mistake, you're calling .sum() twice for both total and n. Get rid of one and you should be good to go!



@wraps(chunk.mean)
Expand All @@ -361,7 +362,7 @@ def mean(a, axis=None, dtype=None, keepdims=False, split_every=None, out=None):
dt = getattr(np.mean(np.empty(shape=(1,), dtype=a.dtype)), 'dtype', object)
return reduction(a, mean_chunk, mean_agg, axis=axis, keepdims=keepdims,
dtype=dt, split_every=split_every, combine=mean_combine,
out=out)
out=out, concatenate=False)


def nanmean(a, axis=None, dtype=None, keepdims=False, split_every=None,
Expand All @@ -373,6 +374,7 @@ def nanmean(a, axis=None, dtype=None, keepdims=False, split_every=None,
return reduction(a, partial(mean_chunk, sum=chunk.nansum, numel=nannumel),
mean_agg, axis=axis, keepdims=keepdims, dtype=dt,
split_every=split_every, out=out,
concatenate=False,
combine=partial(mean_combine, sum=chunk.nansum, numel=nannumel))


Expand Down