Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Give informative meta= warning #4637

Merged
merged 2 commits into from Mar 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 25 additions & 14 deletions dask/dataframe/core.py
Expand Up @@ -2285,16 +2285,10 @@ def apply(self, func, convert_dtype=True, meta=no_default, args=(), **kwds):
dask.Series.map_partitions
"""
if meta is no_default:
msg = ("`meta` is not specified, inferred from partial data. "
"Please provide `meta` if the result is unexpected.\n"
" Before: .apply(func)\n"
" After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n"
" or: .apply(func, meta=('x', 'f8')) for series result")
warnings.warn(msg)

meta = _emulate(M.apply, self._meta_nonempty, func,
convert_dtype=convert_dtype,
args=args, udf=True, **kwds)
warnings.warn(meta_warning(meta))

return map_partitions(M.apply, self, func,
convert_dtype, args, meta=meta, **kwds)
Expand Down Expand Up @@ -3145,15 +3139,9 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None,
raise NotImplementedError(msg)

if meta is no_default:
msg = ("`meta` is not specified, inferred from partial data. "
"Please provide `meta` if the result is unexpected.\n"
" Before: .apply(func)\n"
" After: .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n"
" or: .apply(func, meta=('x', 'f8')) for series result")
warnings.warn(msg)

meta = _emulate(M.apply, self._meta_nonempty, func,
args=args, udf=True, **kwds)
warnings.warn(meta_warning(meta))

return map_partitions(M.apply, self, func, args=args, meta=meta, **kwds)

Expand Down Expand Up @@ -4746,3 +4734,26 @@ def partitionwise_graph(func, name, *args, **kwargs):
else:
pairs.extend([arg, None])
return blockwise(func, name, 'i', *pairs, numblocks=numblocks, concatenate=True, **kwargs)


def meta_warning(df):
"""
Provide an informative message when the user is asked to provide metadata
"""
if is_dataframe_like(df):
meta_str = {k: str(v) for k, v in df.dtypes.to_dict().items()}
elif is_series_like(df):
meta_str = (df.name, str(df.dtype))
else:
meta_str = None
msg = ("\nYou did not provide metadata, so Dask is running your "
"function on a small dataset to guess output types. "
"It is possible that Dask will guess incorrectly.\n"
"To provide an explicit output types or to silence this message, "
"please provide the `meta=` keyword, as described in the map or "
"apply function that you are using.")
if meta_str:
msg += ("\n"
" Before: .apply(func)\n"
" After: .apply(func, meta=%s)\n" % str(meta_str))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we also know the method they are using, or is that trying to go too far here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into fixing this but it looks like we only use this message on apply currently.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK then - definitely an improvement.

return msg
6 changes: 6 additions & 0 deletions dask/dataframe/tests/test_dataframe.py
Expand Up @@ -2117,6 +2117,12 @@ def test_apply_warns():
ddf.apply(func, axis=1, meta=(None, int))
assert len(w) == 0

with pytest.warns(UserWarning) as w:
ddf.apply(lambda x: x, axis=1)
assert len(w) == 1
assert "'x'" in str(w[0].message)
assert "int64" in str(w[0].message)


def test_applymap():
df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [10, 20, 30, 40]})
Expand Down