From 62eccd987c2b6ac776e0cc0f16c1b1bd03176d99 Mon Sep 17 00:00:00 2001 From: Edenzzzz Date: Tue, 2 Apr 2024 18:20:47 +0800 Subject: [PATCH] fix incorrect sharding without zero --- colossalai/shardformer/shard/shard_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/colossalai/shardformer/shard/shard_config.py b/colossalai/shardformer/shard/shard_config.py index 646b611932b7..ce78a7e945d1 100644 --- a/colossalai/shardformer/shard/shard_config.py +++ b/colossalai/shardformer/shard/shard_config.py @@ -74,8 +74,10 @@ def _turn_on_all_optimization(self): self.enable_fused_normalization = True self.enable_flash_attention = True self.enable_jit_fused = True - self.enable_sequence_parallelism = True - self.enable_sequence_overlap = True + # This can cause non-in-place param sharding when used without ZeRO. + # It may also slow down training when seq len is small. Plz enable manually. + # self.enable_sequence_parallelism = True + # self.enable_sequence_overlap = True def _infer(self): """