Skip to content

Commit

Permalink
Rename shuffle to shuffle_method in remaining methods
Browse files Browse the repository at this point in the history
  • Loading branch information
milesgranger committed Jan 15, 2024
1 parent 91dd425 commit 8f6b61d
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 81 deletions.
20 changes: 10 additions & 10 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,9 @@ def _concat(args, ignore_index=False):
)


def _determine_split_out_shuffle(shuffle, split_out):
def _determine_split_out_shuffle(shuffle_method, split_out):
"""Determine the default shuffle behavior based on split_out"""
if shuffle is None:
if shuffle_method is None:
if split_out > 1:
# FIXME: This is using a different default but it is not fully
# understood why this is a better choice.
Expand All @@ -209,9 +209,9 @@ def _determine_split_out_shuffle(shuffle, split_out):
return config.get("dataframe.shuffle.method", None) or "tasks"
else:
return False
if shuffle is True:
if shuffle_method is True:
return config.get("dataframe.shuffle.method", None) or "tasks"
return shuffle
return shuffle_method


def finalize(results):
Expand Down Expand Up @@ -944,7 +944,7 @@ def drop_duplicates(
subset=None,
split_every=None,
split_out=1,
shuffle=None,
shuffle_method=None,
ignore_index=False,
**kwargs,
):
Expand All @@ -964,12 +964,12 @@ def drop_duplicates(
# Check if we should use a shuffle-based algorithm,
# which is typically faster when we are not reducing
# to a small number of partitions
shuffle = _determine_split_out_shuffle(shuffle, split_out)
if shuffle:
shuffle_method = _determine_split_out_shuffle(shuffle_method, split_out)
if shuffle_method:
return self._drop_duplicates_shuffle(
split_out,
split_every,
shuffle,
shuffle_method,
ignore_index,
**kwargs,
)
Expand Down Expand Up @@ -4891,15 +4891,15 @@ def drop_duplicates(
self,
split_every=None,
split_out=1,
shuffle=None,
shuffle_method=None,
**kwargs,
):
if not self.known_divisions:
# Use base class if we have unknown divisions
return super().drop_duplicates(
split_every=split_every,
split_out=split_out,
shuffle=shuffle,
shuffle_method=shuffle_method,
**kwargs,
)

Expand Down

0 comments on commit 8f6b61d

Please sign in to comment.