Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CLEBORN-1555] Replace deprecated config celeborn.storage.activeTypes in docs and tests #2675

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD

# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128
Expand Down Expand Up @@ -199,7 +199,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD

# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1766,7 +1766,7 @@ object CelebornConf extends Logging {
.categories("network")
.version("0.2.0")
.doc("Timeout for RPC ask operations. " +
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`")
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("60s")

Expand Down Expand Up @@ -2627,7 +2627,7 @@ object CelebornConf extends Logging {
.categories("master")
.version("0.3.0")
.doc("Policy for master to assign slots, Celeborn supports two types of policy: roundrobin and loadaware. " +
"Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.activeTypes`")
"Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.availableTypes`")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(Set(
Expand Down Expand Up @@ -3167,7 +3167,7 @@ object CelebornConf extends Logging {
buildConf("celeborn.worker.replicate.fastFail.duration")
.categories("worker")
.doc("If a replicate request not replied during the duration, worker will mark the replicate data request as failed." +
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.2.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("60s")
Expand Down Expand Up @@ -3197,7 +3197,7 @@ object CelebornConf extends Logging {
.categories("worker")
.version("0.3.0")
.doc("Thread number of worker to commit shuffle data files asynchronously. " +
"It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.intConf
.createWithDefault(32)

Expand All @@ -3222,7 +3222,7 @@ object CelebornConf extends Logging {
.withAlternative("celeborn.worker.shuffle.commit.timeout")
.categories("worker")
.doc("Timeout for a Celeborn worker to commit files of a shuffle. " +
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.3.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("120s")
Expand All @@ -3241,7 +3241,7 @@ object CelebornConf extends Logging {
.withAlternative("celeborn.worker.partitionSorter.threads")
.categories("worker")
.doc("PartitionSorter's thread counts. " +
"It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.3.0")
.intConf
.createOptional
Expand Down Expand Up @@ -4010,7 +4010,7 @@ object CelebornConf extends Logging {
.categories("client")
.doc("When true, Celeborn worker will replicate shuffle data to another Celeborn worker " +
"asynchronously to ensure the pushed shuffle data won't be lost after the node failure. " +
"It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.3.0")
.booleanConf
.createWithDefault(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,25 +195,25 @@ class CelebornConfSuite extends CelebornFunSuite {

test("Test empty working dir") {
val conf = new CelebornConf()
conf.set("celeborn.storage.activeTypes", "HDFS")
conf.set("celeborn.storage.availableTypes", "HDFS")
conf.set("celeborn.storage.hdfs.dir", "hdfs:///xxx")
assert(conf.workerBaseDirs.isEmpty)

conf.set("celeborn.storage.activeTypes", "SSD,HDD,HDFS")
conf.set("celeborn.storage.availableTypes", "SSD,HDD,HDFS")
conf.set("celeborn.storage.hdfs.dir", "hdfs:///xxx")
assert(conf.workerBaseDirs.isEmpty)

conf.set("celeborn.storage.activeTypes", "SSD,HDD")
conf.set("celeborn.storage.availableTypes", "SSD,HDD")
assert(!conf.workerBaseDirs.isEmpty)
}

test("Test commit file threads") {
val conf = new CelebornConf()
conf.set("celeborn.storage.activeTypes", "HDFS")
conf.set("celeborn.storage.availableTypes", "HDFS")
conf.set("celeborn.storage.hdfs.dir", "hdfs:///xxx")
assert(conf.workerCommitThreads === 128)

conf.set("celeborn.storage.activeTypes", "SSD,HDD")
conf.set("celeborn.storage.availableTypes", "SSD,HDD")
assert(conf.workerCommitThreads === 32)
}

Expand Down
2 changes: 1 addition & 1 deletion docs/configuration/client.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ license: |
| celeborn.client.push.maxReqsInFlight.perWorker | 32 | false | Amount of Netty in-flight requests per worker. Default max memory of in flight requests per worker is `celeborn.client.push.maxReqsInFlight.perWorker` * `celeborn.client.push.buffer.max.size` * compression ratio(1 in worst case): 64KiB * 32 = 2MiB. The maximum memory will not exceed `celeborn.client.push.maxReqsInFlight.total`. | 0.3.0 | |
| celeborn.client.push.maxReqsInFlight.total | 256 | false | Amount of total Netty in-flight requests. The maximum memory is `celeborn.client.push.maxReqsInFlight.total` * `celeborn.client.push.buffer.max.size` * compression ratio(1 in worst case): 64KiB * 256 = 16MiB | 0.3.0 | celeborn.push.maxReqsInFlight |
| celeborn.client.push.queue.capacity | 512 | false | Push buffer queue size for a task. The maximum memory is `celeborn.client.push.buffer.max.size` * `celeborn.client.push.queue.capacity`, default: 64KiB * 512 = 32MiB | 0.3.0 | celeborn.push.queue.capacity |
| celeborn.client.push.replicate.enabled | false | false | When true, Celeborn worker will replicate shuffle data to another Celeborn worker asynchronously to ensure the pushed shuffle data won't be lost after the node failure. It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.push.replicate.enabled |
| celeborn.client.push.replicate.enabled | false | false | When true, Celeborn worker will replicate shuffle data to another Celeborn worker asynchronously to ensure the pushed shuffle data won't be lost after the node failure. It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.push.replicate.enabled |
| celeborn.client.push.retry.threads | 8 | false | Thread number to process shuffle re-send push data requests. | 0.3.0 | celeborn.push.retry.threads |
| celeborn.client.push.revive.batchSize | 2048 | false | Max number of partitions in one Revive request. | 0.3.0 | |
| celeborn.client.push.revive.interval | 100ms | false | Interval for client to trigger Revive to LifecycleManager. The number of partitions in one Revive request is `celeborn.client.push.revive.batchSize`. | 0.3.0 | |
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration/master.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ license: |
| celeborn.master.slot.assign.loadAware.flushTimeWeight | 0.0 | false | Weight of average flush time when calculating ordering in load-aware assignment strategy | 0.3.0 | celeborn.slots.assign.loadAware.flushTimeWeight |
| celeborn.master.slot.assign.loadAware.numDiskGroups | 5 | false | This configuration is a guidance for load-aware slot allocation algorithm. This value is control how many disk groups will be created. | 0.3.0 | celeborn.slots.assign.loadAware.numDiskGroups |
| celeborn.master.slot.assign.maxWorkers | 10000 | false | Max workers that slots of one shuffle can be allocated on. Will choose the smaller positive one from Master side and Client side, see `celeborn.client.slot.assign.maxWorkers`. | 0.3.1 | |
| celeborn.master.slot.assign.policy | ROUNDROBIN | false | Policy for master to assign slots, Celeborn supports two types of policy: roundrobin and loadaware. Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.activeTypes` | 0.3.0 | celeborn.slots.assign.policy |
| celeborn.master.slot.assign.policy | ROUNDROBIN | false | Policy for master to assign slots, Celeborn supports two types of policy: roundrobin and loadaware. Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.availableTypes` | 0.3.0 | celeborn.slots.assign.policy |
| celeborn.master.userResourceConsumption.update.interval | 30s | false | Time length for a window about compute user resource consumption. | 0.3.0 | |
| celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | false | Worker unavailable info would be cleared when the retention period is expired. Set -1 to disable the expiration. | 0.3.1 | |
| celeborn.quota.enabled | true | false | When Master side sets to true, the master will enable to check the quota via QuotaManager. When Client side sets to true, LifecycleManager will request Master side to check whether the current user has enough quota before registration of shuffle. Fallback to the default shuffle service of Spark when Master side checks that there is no enough quota for current user. | 0.2.0 | |
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration/network.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ license: |
| celeborn.network.memory.allocator.verbose.metric | false | false | Whether to enable verbose metric for pooled allocator. | 0.3.0 | |
| celeborn.network.timeout | 240s | false | Default timeout for network operations. | 0.2.0 | |
| celeborn.port.maxRetries | 1 | false | When port is occupied, we will retry for max retry times. | 0.2.0 | |
| celeborn.rpc.askTimeout | 60s | false | Timeout for RPC ask operations. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes` | 0.2.0 | |
| celeborn.rpc.askTimeout | 60s | false | Timeout for RPC ask operations. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes` | 0.2.0 | |
| celeborn.rpc.connect.threads | 64 | false | | 0.2.0 | |
| celeborn.rpc.dispatcher.threads | 0 | false | Threads number of message dispatcher event loop. Default to 0, which is availableCore. | 0.3.0 | celeborn.rpc.dispatcher.numThreads |
| celeborn.rpc.inbox.capacity | 0 | false | Specifies size of the in memory bounded capacity. | 0.5.0 | |
Expand Down
8 changes: 4 additions & 4 deletions docs/configuration/worker.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ license: |
| celeborn.worker.bufferStream.threadsPerMountpoint | 8 | false | Threads count for read buffer per mount point. | 0.3.0 | |
| celeborn.worker.clean.threads | 64 | false | Thread number of worker to clean up expired shuffle keys. | 0.3.2 | |
| celeborn.worker.closeIdleConnections | false | false | Whether worker will close idle connections. | 0.2.0 | |
| celeborn.worker.commitFiles.threads | 32 | false | Thread number of worker to commit shuffle data files asynchronously. It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.worker.commit.threads |
| celeborn.worker.commitFiles.timeout | 120s | false | Timeout for a Celeborn worker to commit files of a shuffle. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.worker.shuffle.commit.timeout |
| celeborn.worker.commitFiles.threads | 32 | false | Thread number of worker to commit shuffle data files asynchronously. It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.worker.commit.threads |
| celeborn.worker.commitFiles.timeout | 120s | false | Timeout for a Celeborn worker to commit files of a shuffle. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.worker.shuffle.commit.timeout |
| celeborn.worker.commitFiles.wait.threads | 32 | false | Thread number of worker to wait for commit shuffle data files to finish. | 0.5.0 | |
| celeborn.worker.congestionControl.check.interval | 10ms | false | Interval of worker checks congestion if celeborn.worker.congestionControl.enabled is true. | 0.3.2 | |
| celeborn.worker.congestionControl.enabled | false | false | Whether to enable congestion control or not. | 0.3.0 | |
Expand Down Expand Up @@ -143,7 +143,7 @@ license: |
| celeborn.worker.readBuffer.target.updateInterval | 100ms | false | The interval for memory manager to calculate new read buffer's target memory. | 0.3.0 | |
| celeborn.worker.readBuffer.toTriggerReadMin | 32 | false | Min buffers count for map data partition to trigger read. | 0.3.0 | |
| celeborn.worker.register.timeout | 180s | false | Worker register timeout. | 0.2.0 | |
| celeborn.worker.replicate.fastFail.duration | 60s | false | If a replicate request not replied during the duration, worker will mark the replicate data request as failed.It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.2.0 | |
| celeborn.worker.replicate.fastFail.duration | 60s | false | If a replicate request not replied during the duration, worker will mark the replicate data request as failed.It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.2.0 | |
| celeborn.worker.replicate.io.threads | <undefined> | false | Netty IO thread number of worker to replicate shuffle data. The default threads number is the number of flush thread. | 0.2.0 | |
| celeborn.worker.replicate.port | 0 | false | Server port for Worker to receive replicate data request from other Workers. | 0.2.0 | |
| celeborn.worker.replicate.randomConnection.enabled | true | false | Whether worker will create random connection to peer when replicate data. When false, worker tend to reuse the same cached TransportClient to a specific replicate worker; when true, worker tend to use different cached TransportClient. Netty will use the same thread to serve the same connection, so with more connections replicate server can leverage more netty threads | 0.2.1 | |
Expand All @@ -156,7 +156,7 @@ license: |
| celeborn.worker.sortPartition.indexCache.maxWeight | 100000 | false | PartitionSorter's cache max weight for index buffer. | 0.4.0 | |
| celeborn.worker.sortPartition.prefetch.enabled | true | false | When true, partition sorter will prefetch the original partition files to page cache and reserve memory configured by `celeborn.worker.sortPartition.reservedMemoryPerPartition` to allocate a block of memory for prefetching while sorting a shuffle file off-heap with page cache for non-hdfs files. Otherwise, partition sorter seeks to position of each block and does not prefetch for non-hdfs files. | 0.5.0 | |
| celeborn.worker.sortPartition.reservedMemoryPerPartition | 1mb | false | Reserved memory when sorting a shuffle file off-heap. | 0.3.0 | celeborn.worker.partitionSorter.reservedMemoryPerPartition |
| celeborn.worker.sortPartition.threads | <undefined> | false | PartitionSorter's thread counts. It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.worker.partitionSorter.threads |
| celeborn.worker.sortPartition.threads | <undefined> | false | PartitionSorter's thread counts. It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.worker.partitionSorter.threads |
| celeborn.worker.sortPartition.timeout | 220s | false | Timeout for a shuffle file to sort. | 0.3.0 | celeborn.worker.partitionSorter.sort.timeout |
| celeborn.worker.storage.checkDirsEmpty.maxRetries | 3 | false | The number of retries for a worker to check if the working directory is cleaned up before registering with the master. | 0.3.0 | celeborn.worker.disk.checkFileClean.maxRetries |
| celeborn.worker.storage.checkDirsEmpty.timeout | 1000ms | false | The wait time per retry for a worker to check if the working directory is cleaned up before registering with the master. | 0.3.0 | celeborn.worker.disk.checkFileClean.timeout |
Expand Down
4 changes: 2 additions & 2 deletions docs/deploy.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD

# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128
Expand Down Expand Up @@ -98,7 +98,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD

# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128
Expand Down
Loading