From 3858302ba23d6bfb70614c1228ffca300d4e1f1c Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 15 Mar 2021 13:47:40 -0400 Subject: [PATCH] Just use kwargs in aca --- dask/dataframe/core.py | 13 ++++--------- dask/dataframe/methods.py | 10 ++++++---- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py index 983d4f0a18c..39aaca5dd2b 100644 --- a/dask/dataframe/core.py +++ b/dask/dataframe/core.py @@ -3134,12 +3134,8 @@ def value_counts( Note: dropna is only supported in pandas >= 1.1.0, in which case it defaults to True. """ - aggregate_kwargs = { - "sort": sort, - "ascending": ascending, - "normalize": normalize, - } - kwargs = {} + kwargs = {"sort": sort, "ascending": ascending, "normalize": normalize} + if dropna is not None: if not PANDAS_GT_110: raise NotImplementedError( @@ -3148,8 +3144,8 @@ def value_counts( ) kwargs["dropna"] = dropna - if split_out > 1 and normalize: - aggregate_kwargs["length"] = ( + if split_out > 1: + kwargs["total_length"] = ( len(self) if dropna is False else len(self.dropna()) ) @@ -3163,7 +3159,6 @@ def value_counts( split_every=split_every, split_out=split_out, split_out_setup=split_out_on_index, - aggregate_kwargs=aggregate_kwargs, **kwargs, ) diff --git a/dask/dataframe/methods.py b/dask/dataframe/methods.py index 9c70e14f670..02a22fec7b2 100644 --- a/dask/dataframe/methods.py +++ b/dask/dataframe/methods.py @@ -304,17 +304,19 @@ def unique(x, series_name=None): return out -def value_counts_combine(x, **groupby_kwargs): - # sort, ascending, and normalize don't actually matter until the agg step +def value_counts_combine( + x, sort=True, ascending=False, normalize=False, total_length=None, **groupby_kwargs +): + # sort, ascending, normalize, and total_length don't actually matter until the agg step return x.groupby(level=0, **groupby_kwargs).sum() def value_counts_aggregate( - x, sort=True, ascending=False, normalize=False, length=None, **groupby_kwargs + x, sort=True, ascending=False, normalize=False, total_length=None, **groupby_kwargs ): out = value_counts_combine(x, **groupby_kwargs) if normalize: - out /= length if length is not None else out.sum() + out /= total_length if total_length is not None else out.sum() if sort: return out.sort_values(ascending=ascending) return out