From b76bd7a7f6b8a3b2c97bfedc61f06f070ca56e2a Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Thu, 17 Aug 2023 12:42:17 +0200 Subject: [PATCH] Update docs --- distributed/shuffle/_rechunk.py | 11 ++++------- distributed/shuffle/_shuffle.py | 9 +++------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/distributed/shuffle/_rechunk.py b/distributed/shuffle/_rechunk.py index 0a28da5a4b..d9f3e17975 100644 --- a/distributed/shuffle/_rechunk.py +++ b/distributed/shuffle/_rechunk.py @@ -293,15 +293,14 @@ class ArrayRechunkRun(ShuffleRun[NDIndex, "np.ndarray"]): This object is responsible for splitting, sending, receiving and combining data shards. - It is entirely agnostic to the distributed system and can perform a shuffle - with other `Shuffle` instances using `rpc` and `broadcast`. + It is entirely agnostic to the distributed system and can perform a rechunk + with other run instances using `rpc``. - The user of this needs to guarantee that only `Shuffle`s of the same unique - `ShuffleID` interact. + The user of this needs to guarantee that only `ArrayRechunkRun`s of the same unique + `ShuffleID` and `run_id` interact. Parameters ---------- - # FIXME worker_for: A mapping partition_id -> worker_address. old: @@ -318,8 +317,6 @@ class ArrayRechunkRun(ShuffleRun[NDIndex, "np.ndarray"]): The scratch directory to buffer data in. executor: Thread pool to use for offloading compute. - loop: - The event loop. rpc: A callable returning a PooledRPCCall to contact other Shuffle instances. Typically a ConnectionPool. diff --git a/distributed/shuffle/_shuffle.py b/distributed/shuffle/_shuffle.py index 78cec1a740..e16340c639 100644 --- a/distributed/shuffle/_shuffle.py +++ b/distributed/shuffle/_shuffle.py @@ -342,14 +342,13 @@ class DataFrameShuffleRun(ShuffleRun[int, "pd.DataFrame"]): data shards. It is entirely agnostic to the distributed system and can perform a shuffle - with other `Shuffle` instances using `rpc` and `broadcast`. + with other run instances using `rpc`. - The user of this needs to guarantee that only `Shuffle`s of the same unique - `ShuffleID` interact. + The user of this needs to guarantee that only `DataFrameShuffleRun`s of the + same unique `ShuffleID` and `run_id` interact. Parameters ---------- - # FIXME worker_for: A mapping partition_id -> worker_address. column: @@ -364,8 +363,6 @@ class DataFrameShuffleRun(ShuffleRun[int, "pd.DataFrame"]): The scratch directory to buffer data in. executor: Thread pool to use for offloading compute. - loop: - The event loop. rpc: A callable returning a PooledRPCCall to contact other Shuffle instances. Typically a ConnectionPool.