Skip to content

Commit

Permalink
Make repartition a no-op when divisions match (#9924)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrbourbeau committed Feb 9, 2023
1 parent dcd008f commit 0058050
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
4 changes: 4 additions & 0 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7761,6 +7761,10 @@ def repartition(df, divisions=None, force=False):
>>> ddf = dd.repartition(df, [0, 5, 10, 20]) # doctest: +SKIP
"""

# no-op fastpath for when we already have matching divisions
if is_dask_collection(df) and df.divisions == divisions:
return df

token = tokenize(df, divisions)
if isinstance(df, _Frame):
tmp = "repartition-split-" + token
Expand Down
12 changes: 12 additions & 0 deletions dask/dataframe/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2369,6 +2369,18 @@ def test_repartition_freq_day():
)


def test_repartition_noop():
df = pd.DataFrame({"x": [1, 2, 4, 5], "y": [6, 7, 8, 9]}, index=[-1, 0, 2, 7])
ddf = dd.from_pandas(df, npartitions=2)
# DataFrame method
ddf2 = ddf.repartition(divisions=ddf.divisions)
assert ddf2 is ddf

# Top-level dask.dataframe method
ddf3 = dd.repartition(ddf, divisions=ddf.divisions)
assert ddf3 is ddf


@pytest.mark.parametrize(
"freq, expected_freq",
[
Expand Down

0 comments on commit 0058050

Please sign in to comment.