diff --git a/docs/source/en/deepspeed.md b/docs/source/en/deepspeed.md index ae304404f615..069e3835321e 100644 --- a/docs/source/en/deepspeed.md +++ b/docs/source/en/deepspeed.md @@ -196,7 +196,7 @@ ZeRO-2 shards the optimizer and gradient states across GPUs. This stage is prima "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 5e8, - "contiguous_gradients": true + "contiguous_gradients": true, "round_robin_gradients": true } } diff --git a/docs/source/ko/deepspeed.md b/docs/source/ko/deepspeed.md index 0390f65197ec..8c14905329e5 100644 --- a/docs/source/ko/deepspeed.md +++ b/docs/source/ko/deepspeed.md @@ -211,7 +211,7 @@ ZeRO-2는 GPU에서 옵티마이저와 그레이디언트를 분할합니다. "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 5e8, - "contiguous_gradients": true + "contiguous_gradients": true, "round_robin_gradients": true } }