From 2e54b933c7b8c075c4ff2ac6280d1c5c9c4c1911 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Thu, 24 Oct 2024 13:27:24 +0300 Subject: [PATCH 01/16] configurable layers --- .../dreambooth/train_dreambooth_lora_sd3.py | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 4b39dcfe41b0..5ca9da8470c8 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -571,6 +571,25 @@ def parse_args(input_args=None): "--adam_weight_decay_text_encoder", type=float, default=1e-03, help="Weight decay to use for text_encoder" ) + parser.add_argument( + "--lora_layers", + type=str, + default=None, + help=( + "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. " + 'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md' + ), + ) + parser.add_argument( + "--lora_blocks", + type=str, + default=None, + help=( + "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. " + 'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md' + ), + ) + parser.add_argument( "--adam_epsilon", type=float, @@ -1222,13 +1241,39 @@ def main(args): if args.train_text_encoder: text_encoder_one.gradient_checkpointing_enable() text_encoder_two.gradient_checkpointing_enable() + if args.lora_layers is not None: + target_modules = [layer.strip() for layer in args.lora_layers.split(",")] + else: + target_modules = [ + "attn.add_k_proj", + "attn.add_k_proj", + "attn.add_q_proj", + "attn.add_q_proj", + "attn.add_v_proj", + "attn.add_v_proj", + "attn.to_add_out", + "attn.to_add_out", + "attn.to_k", + "attn.to_k", + "attn.to_out.0", + "attn.to_out.0", + "attn.to_q", + "attn.to_q", + "attn.to_v", + "attn.to_v", + ] + if args.lora_blocks is not None: + target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")] + target_modules = [f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules] + print(target_modules) + # now we will add new LoRA weights to the attention layers transformer_lora_config = LoraConfig( r=args.rank, lora_alpha=args.rank, init_lora_weights="gaussian", - target_modules=["to_k", "to_q", "to_v", "to_out.0"], + target_modules=target_modules, ) transformer.add_adapter(transformer_lora_config) From df919b84f9856599bf9b513e5d0395a6a9872200 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 25 Oct 2024 11:55:57 +0300 Subject: [PATCH 02/16] configurable layers --- examples/dreambooth/train_dreambooth_lora_sd3.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 5ca9da8470c8..d2128682a029 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -1246,20 +1246,12 @@ def main(args): else: target_modules = [ "attn.add_k_proj", - "attn.add_k_proj", - "attn.add_q_proj", "attn.add_q_proj", "attn.add_v_proj", - "attn.add_v_proj", - "attn.to_add_out", "attn.to_add_out", "attn.to_k", - "attn.to_k", - "attn.to_out.0", "attn.to_out.0", "attn.to_q", - "attn.to_q", - "attn.to_v", "attn.to_v", ] if args.lora_blocks is not None: From dfd88978fb5012d5d87a6dc1278f9cfabc6cc5ba Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 25 Oct 2024 16:14:26 +0300 Subject: [PATCH 03/16] update README --- examples/dreambooth/README_sd3.md | 35 +++++++++++++++++++ .../dreambooth/train_dreambooth_lora_sd3.py | 8 ++--- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md index a340be350db8..957f69415921 100644 --- a/examples/dreambooth/README_sd3.md +++ b/examples/dreambooth/README_sd3.md @@ -147,6 +147,41 @@ accelerate launch train_dreambooth_lora_sd3.py \ --push_to_hub ``` +### Targeting Specific Blocks & Layers +As image generation models get bigger & more powerful, more fine-tuners come to find that training only part of the +transformer blocks (sometimes as little as two) can be enough to get great results. +In some cases, it can be even better to maintain some of the blocks/layers frozen. + +For **SD3.5-Large** specifically, you may find this information useful (taken from: [Stable Diffusion 3.5 Large Fine-tuning Tutorial](https://stabilityai.notion.site/Stable-Diffusion-3-5-Large-Fine-tuning-Tutorial-11a61cdcd1968027a15bdbd7c40be8c6#12461cdcd19680788a23c650dab26b93): +> [!NOTE] +> A commonly believed heuristic that we verified once again during the construction of the SD3.5 family of models is that later/higher layers (i.e. `30 - 37`)* impact tertiary details more heavily. Conversely, earlier layers (i.e. `12 - 24` )* influence the overall composition/primary form more. +> So, freezing other layers/targeting specific layers is a viable approach. +> `*`These suggested layers are speculative and not 100% guaranteed. The tips here are more or less a general idea for next steps. +> **Photorealism** +> In preliminary testing, we observed that freezing the last few layers of the architecture significantly improved model training when using a photorealistic dataset, preventing detail degradation introduced by small dataset from happening. +> **Anatomy preservation** +> To dampen any possible degradation of anatomy, training only the attention layers and **not** the adaptive linear layers could help. For reference, below is one of the transformer blocks. + + +we've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. +- with `--lora_blocks` you can specify the block numbers for training. E.g. passing - +```bash +--lora_blocks="12,13,14,15,16,17,18,19,20,21,22,23,24,30,31,32,33,34,35,36,37" +``` +will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. +- with `--lora_layers` you can specify the types of layers you wish to train. +By default, the trained layers are - +`"attn.add_k_proj","attn.add_q_proj","attn.add_v_proj", "attn.to_add_out","attn.to_k","attn.to_out.0","attn.to_q","attn.to_v"` +If you wish to have a leaner LoRA / train more blocks over layers you could pass - +```bash +--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0" +``` +This will reduce LoRA size by roughly 50% for the same rank compared to the default. +However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and +freezing some of the early & blocks is usually better. + +The following configuration + ### Text Encoder Training Alongside the transformer, LoRA fine-tuning of the CLIP text encoders is now also supported. To do so, just specify `--train_text_encoder` while launching training. Please keep the following points in mind: diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index d2128682a029..4ef1ee0d91ec 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -576,8 +576,8 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. " - 'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md' + "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string. " + 'For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' ), ) parser.add_argument( @@ -585,8 +585,8 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. " - 'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md' + "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner. " + 'E.g. - "12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' ), ) From 62e152acf92b02246a6a458a114235bf0b0b7546 Mon Sep 17 00:00:00 2001 From: Linoy Date: Fri, 25 Oct 2024 13:25:22 +0000 Subject: [PATCH 04/16] style --- examples/dreambooth/train_dreambooth_lora_sd3.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 4ef1ee0d91ec..7c70762e4607 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -577,7 +577,7 @@ def parse_args(input_args=None): default=None, help=( "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string. " - 'For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' + "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md" ), ) parser.add_argument( @@ -1256,10 +1256,11 @@ def main(args): ] if args.lora_blocks is not None: target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")] - target_modules = [f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules] + target_modules = [ + f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules + ] print(target_modules) - # now we will add new LoRA weights to the attention layers transformer_lora_config = LoraConfig( r=args.rank, From e285d694a5528a5eadb07a5538f40ee09062616b Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 25 Oct 2024 16:31:51 +0300 Subject: [PATCH 05/16] add test --- .../dreambooth/test_dreambooth_lora_sd3.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index ec323be4143e..52a82927f3e3 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -136,6 +136,40 @@ def test_dreambooth_lora_latent_caching(self): starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys()) self.assertTrue(starts_with_transformer) + def test_dreambooth_lora_block(self): + with tempfile.TemporaryDirectory() as tmpdir: + test_args = f""" + {self.script_path} + --pretrained_model_name_or_path {self.pretrained_model_name_or_path} + --instance_data_dir {self.instance_data_dir} + --instance_prompt {self.instance_prompt} + --resolution 64 + --train_batch_size 1 + --gradient_accumulation_steps 1 + --max_train_steps 2 + --lora_blocks 0 + --learning_rate 5.0e-04 + --scale_lr + --lr_scheduler constant + --lr_warmup_steps 0 + --output_dir {tmpdir} + """.split() + + run_command(self._launch_args + test_args) + # save_pretrained smoke test + self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) + + # make sure the state_dict has the correct naming in the parameters. + lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) + is_lora = all("lora" in k for k in lora_state_dict.keys()) + self.assertTrue(is_lora) + + # when not training the text encoder, all the parameters in the state dict should start + # with `"transformer"` in their names. + # In this test, only params of transformer block 0 should be in the state dict + starts_with_transformer = all(key.startswith("transformer.transformer_blocks.0") for key in lora_state_dict.keys()) + self.assertTrue(starts_with_transformer) + def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" From f0730149dcd3fc9fa3b3526b51a359619cf20266 Mon Sep 17 00:00:00 2001 From: Linoy Date: Fri, 25 Oct 2024 13:32:48 +0000 Subject: [PATCH 06/16] style --- examples/dreambooth/test_dreambooth_lora_sd3.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index 52a82927f3e3..abc9d97d8f1e 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -167,7 +167,9 @@ def test_dreambooth_lora_block(self): # when not training the text encoder, all the parameters in the state dict should start # with `"transformer"` in their names. # In this test, only params of transformer block 0 should be in the state dict - starts_with_transformer = all(key.startswith("transformer.transformer_blocks.0") for key in lora_state_dict.keys()) + starts_with_transformer = all( + key.startswith("transformer.transformer_blocks.0") for key in lora_state_dict.keys() + ) self.assertTrue(starts_with_transformer) def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self): From 701bd35ce815e1801245d7e23cfa629b042be5eb Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 25 Oct 2024 18:05:48 +0300 Subject: [PATCH 07/16] add layer test, update readme, add nargs --- examples/dreambooth/README_sd3.md | 9 +++-- .../dreambooth/test_dreambooth_lora_sd3.py | 36 +++++++++++++++++++ .../dreambooth/train_dreambooth_lora_sd3.py | 15 ++++---- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md index 957f69415921..496841cbce7d 100644 --- a/examples/dreambooth/README_sd3.md +++ b/examples/dreambooth/README_sd3.md @@ -163,24 +163,23 @@ For **SD3.5-Large** specifically, you may find this information useful (taken fr > To dampen any possible degradation of anatomy, training only the attention layers and **not** the adaptive linear layers could help. For reference, below is one of the transformer blocks. -we've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. +We've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. - with `--lora_blocks` you can specify the block numbers for training. E.g. passing - ```bash ---lora_blocks="12,13,14,15,16,17,18,19,20,21,22,23,24,30,31,32,33,34,35,36,37" +--lora_blocks 12 13 14 15 16 17 18 19 20 21 22 23 24 30 31 32 33 34 35 36 37 ``` will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. - with `--lora_layers` you can specify the types of layers you wish to train. By default, the trained layers are - -`"attn.add_k_proj","attn.add_q_proj","attn.add_v_proj", "attn.to_add_out","attn.to_k","attn.to_out.0","attn.to_q","attn.to_v"` +`attn.add_k_proj attn.add_q_proj attn.add_v_proj attn.to_add_out attn.to_k attn.to_out.0 attn.to_q attn.to_v` If you wish to have a leaner LoRA / train more blocks over layers you could pass - ```bash ---lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0" +--lora_layers attn.to_k attn.to_q attn.to_v attn.to_out.0 ``` This will reduce LoRA size by roughly 50% for the same rank compared to the default. However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and freezing some of the early & blocks is usually better. -The following configuration ### Text Encoder Training Alongside the transformer, LoRA fine-tuning of the CLIP text encoders is now also supported. diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index abc9d97d8f1e..2ba0b3d3586c 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -172,6 +172,42 @@ def test_dreambooth_lora_block(self): ) self.assertTrue(starts_with_transformer) + def test_dreambooth_lora_layer(self): + with tempfile.TemporaryDirectory() as tmpdir: + test_args = f""" + {self.script_path} + --pretrained_model_name_or_path {self.pretrained_model_name_or_path} + --instance_data_dir {self.instance_data_dir} + --instance_prompt {self.instance_prompt} + --resolution 64 + --train_batch_size 1 + --gradient_accumulation_steps 1 + --max_train_steps 2 + --lora_layers attn.to_k + --learning_rate 5.0e-04 + --scale_lr + --lr_scheduler constant + --lr_warmup_steps 0 + --output_dir {tmpdir} + """.split() + + run_command(self._launch_args + test_args) + # save_pretrained smoke test + self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) + + # make sure the state_dict has the correct naming in the parameters. + lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) + is_lora = all("lora" in k for k in lora_state_dict.keys()) + self.assertTrue(is_lora) + + # when not training the text encoder, all the parameters in the state dict should start + # with `"transformer"` in their names. + # In this test, only params of transformer block 0 should be in the state dict + starts_with_transformer = all( + "attn.to_k" in key for key in lora_state_dict.keys() + ) + self.assertTrue(starts_with_transformer) + def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 7c70762e4607..cb4e6773ed26 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -573,20 +573,22 @@ def parse_args(input_args=None): parser.add_argument( "--lora_layers", + nargs="+", type=str, default=None, help=( - "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string. " + "The transformer block layers to apply LoRA training on. " "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md" ), ) parser.add_argument( "--lora_blocks", - type=str, + nargs="+", + type=int, default=None, help=( - "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner. " - 'E.g. - "12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' + "The transformer blocks to apply LoRA training on." + 'E.g. - "--lora_blocks 12 30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' ), ) @@ -1242,7 +1244,7 @@ def main(args): text_encoder_one.gradient_checkpointing_enable() text_encoder_two.gradient_checkpointing_enable() if args.lora_layers is not None: - target_modules = [layer.strip() for layer in args.lora_layers.split(",")] + target_modules = args.lora_layers else: target_modules = [ "attn.add_k_proj", @@ -1255,9 +1257,8 @@ def main(args): "attn.to_v", ] if args.lora_blocks is not None: - target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")] target_modules = [ - f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules + f"transformer_blocks.{block}.{module}" for block in args.lora_blocks for module in target_modules ] print(target_modules) From 90550a81de34ef6cce788ec86e490c4a1da88986 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 25 Oct 2024 18:15:10 +0300 Subject: [PATCH 08/16] readme --- examples/dreambooth/README_sd3.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md index 496841cbce7d..6f0385010883 100644 --- a/examples/dreambooth/README_sd3.md +++ b/examples/dreambooth/README_sd3.md @@ -165,7 +165,7 @@ For **SD3.5-Large** specifically, you may find this information useful (taken fr We've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. - with `--lora_blocks` you can specify the block numbers for training. E.g. passing - -```bash +```diff --lora_blocks 12 13 14 15 16 17 18 19 20 21 22 23 24 30 31 32 33 34 35 36 37 ``` will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. @@ -173,7 +173,7 @@ will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, al By default, the trained layers are - `attn.add_k_proj attn.add_q_proj attn.add_v_proj attn.to_add_out attn.to_k attn.to_out.0 attn.to_q attn.to_v` If you wish to have a leaner LoRA / train more blocks over layers you could pass - -```bash +```diff --lora_layers attn.to_k attn.to_q attn.to_v attn.to_out.0 ``` This will reduce LoRA size by roughly 50% for the same rank compared to the default. From 2cba0c96de1b81032d81517ed60002331a515e74 Mon Sep 17 00:00:00 2001 From: Linoy Date: Fri, 25 Oct 2024 15:16:12 +0000 Subject: [PATCH 09/16] test style --- examples/dreambooth/test_dreambooth_lora_sd3.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index 2ba0b3d3586c..deaed0ef3eb9 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -203,9 +203,7 @@ def test_dreambooth_lora_layer(self): # when not training the text encoder, all the parameters in the state dict should start # with `"transformer"` in their names. # In this test, only params of transformer block 0 should be in the state dict - starts_with_transformer = all( - "attn.to_k" in key for key in lora_state_dict.keys() - ) + starts_with_transformer = all("attn.to_k" in key for key in lora_state_dict.keys()) self.assertTrue(starts_with_transformer) def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self): From 128826b8a252f4b4af38cedb2db6eae8095f98f3 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Fri, 25 Oct 2024 22:07:37 +0300 Subject: [PATCH 10/16] remove print, change nargs --- examples/dreambooth/train_dreambooth_lora_sd3.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index cb4e6773ed26..3fae643cd934 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -573,7 +573,7 @@ def parse_args(input_args=None): parser.add_argument( "--lora_layers", - nargs="+", + nargs="*", type=str, default=None, help=( @@ -583,7 +583,7 @@ def parse_args(input_args=None): ) parser.add_argument( "--lora_blocks", - nargs="+", + nargs="*", type=int, default=None, help=( @@ -1260,7 +1260,6 @@ def main(args): target_modules = [ f"transformer_blocks.{block}.{module}" for block in args.lora_blocks for module in target_modules ] - print(target_modules) # now we will add new LoRA weights to the attention layers transformer_lora_config = LoraConfig( From 0c7fa8b301d14546d5ce1e6cb115532574dd628b Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 12:41:21 +0200 Subject: [PATCH 11/16] test arg change --- examples/dreambooth/train_dreambooth_lora_sd3.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 3fae643cd934..126e2ef51282 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -573,7 +573,6 @@ def parse_args(input_args=None): parser.add_argument( "--lora_layers", - nargs="*", type=str, default=None, help=( @@ -583,8 +582,7 @@ def parse_args(input_args=None): ) parser.add_argument( "--lora_blocks", - nargs="*", - type=int, + type=str, default=None, help=( "The transformer blocks to apply LoRA training on." @@ -1244,7 +1242,8 @@ def main(args): text_encoder_one.gradient_checkpointing_enable() text_encoder_two.gradient_checkpointing_enable() if args.lora_layers is not None: - target_modules = args.lora_layers + #target_modules = args.lora_layers + target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: target_modules = [ "attn.add_k_proj", @@ -1257,8 +1256,9 @@ def main(args): "attn.to_v", ] if args.lora_blocks is not None: + target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")] target_modules = [ - f"transformer_blocks.{block}.{module}" for block in args.lora_blocks for module in target_modules + f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules ] # now we will add new LoRA weights to the attention layers From 10a26595bf3adbfe1a2a9b91d98334405f1cd84e Mon Sep 17 00:00:00 2001 From: Linoy Date: Mon, 28 Oct 2024 10:45:37 +0000 Subject: [PATCH 12/16] style --- examples/dreambooth/train_dreambooth_lora_sd3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 126e2ef51282..0b250f85cbf5 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -1242,7 +1242,7 @@ def main(args): text_encoder_one.gradient_checkpointing_enable() text_encoder_two.gradient_checkpointing_enable() if args.lora_layers is not None: - #target_modules = args.lora_layers + # target_modules = args.lora_layers target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: target_modules = [ From ad6c2f33f7b5328bac059c40d478f8f8b716e2cb Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 13:18:05 +0200 Subject: [PATCH 13/16] revert nargs 2/2 --- examples/dreambooth/README_sd3.md | 6 +++--- examples/dreambooth/train_dreambooth_lora_sd3.py | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md index 6f0385010883..d60b9ae86466 100644 --- a/examples/dreambooth/README_sd3.md +++ b/examples/dreambooth/README_sd3.md @@ -166,15 +166,15 @@ For **SD3.5-Large** specifically, you may find this information useful (taken fr We've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. - with `--lora_blocks` you can specify the block numbers for training. E.g. passing - ```diff ---lora_blocks 12 13 14 15 16 17 18 19 20 21 22 23 24 30 31 32 33 34 35 36 37 +--lora_blocks "12,13,14,15,16,17,18,19,20,21,22,23,24,30,31,32,33,34,35,36,37" ``` will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. - with `--lora_layers` you can specify the types of layers you wish to train. By default, the trained layers are - -`attn.add_k_proj attn.add_q_proj attn.add_v_proj attn.to_add_out attn.to_k attn.to_out.0 attn.to_q attn.to_v` +`attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,attn.to_k,attn.to_out.0,attn.to_q,attn.to_v` If you wish to have a leaner LoRA / train more blocks over layers you could pass - ```diff ---lora_layers attn.to_k attn.to_q attn.to_v attn.to_out.0 +--lora_layers attn.to_k,attn.to_q,attn.to_v,attn.to_out.0 ``` This will reduce LoRA size by roughly 50% for the same rank compared to the default. However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py index 126e2ef51282..fc3c69b8901f 100644 --- a/examples/dreambooth/train_dreambooth_lora_sd3.py +++ b/examples/dreambooth/train_dreambooth_lora_sd3.py @@ -576,7 +576,7 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer block layers to apply LoRA training on. " + "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string." "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md" ), ) @@ -585,8 +585,8 @@ def parse_args(input_args=None): type=str, default=None, help=( - "The transformer blocks to apply LoRA training on." - 'E.g. - "--lora_blocks 12 30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' + "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner." + 'E.g. - "--lora_blocks 12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md' ), ) @@ -1242,7 +1242,6 @@ def main(args): text_encoder_one.gradient_checkpointing_enable() text_encoder_two.gradient_checkpointing_enable() if args.lora_layers is not None: - #target_modules = args.lora_layers target_modules = [layer.strip() for layer in args.lora_layers.split(",")] else: target_modules = [ From df018ddcb392818be4a9d017923f408954b0903f Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 15:47:13 +0200 Subject: [PATCH 14/16] address sayaks comments --- examples/dreambooth/README_sd3.md | 2 +- examples/dreambooth/test_dreambooth_lora_sd3.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md index d60b9ae86466..89d87d65dd44 100644 --- a/examples/dreambooth/README_sd3.md +++ b/examples/dreambooth/README_sd3.md @@ -174,7 +174,7 @@ By default, the trained layers are - `attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,attn.to_k,attn.to_out.0,attn.to_q,attn.to_v` If you wish to have a leaner LoRA / train more blocks over layers you could pass - ```diff ---lora_layers attn.to_k,attn.to_q,attn.to_v,attn.to_out.0 ++ --lora_layers attn.to_k,attn.to_q,attn.to_v,attn.to_out.0 ``` This will reduce LoRA size by roughly 50% for the same rank compared to the default. However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index deaed0ef3eb9..bec890e55ffc 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -38,6 +38,8 @@ class DreamBoothLoRASD3(ExamplesTestsAccelerate): pretrained_model_name_or_path = "hf-internal-testing/tiny-sd3-pipe" script_path = "examples/dreambooth/train_dreambooth_lora_sd3.py" + LORA_BLOCK_TO_TEST = 0 + LORA_LAYER_TO_TEST = "attn.to_k" def test_dreambooth_lora_sd3(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" @@ -147,7 +149,7 @@ def test_dreambooth_lora_block(self): --train_batch_size 1 --gradient_accumulation_steps 1 --max_train_steps 2 - --lora_blocks 0 + --lora_blocks {LORA_BLOCK_TO_TEST} --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -183,7 +185,7 @@ def test_dreambooth_lora_layer(self): --train_batch_size 1 --gradient_accumulation_steps 1 --max_train_steps 2 - --lora_layers attn.to_k + --lora_layers {LORA_LAYER_TO_TEST} --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -200,9 +202,7 @@ def test_dreambooth_lora_layer(self): is_lora = all("lora" in k for k in lora_state_dict.keys()) self.assertTrue(is_lora) - # when not training the text encoder, all the parameters in the state dict should start - # with `"transformer"` in their names. - # In this test, only params of transformer block 0 should be in the state dict + # In this test, only transformer params of attention layers `attn.to_k` should be in the state dict starts_with_transformer = all("attn.to_k" in key for key in lora_state_dict.keys()) self.assertTrue(starts_with_transformer) From 65dd59d5e6d4583fa009782f8eab9e52b66fcc70 Mon Sep 17 00:00:00 2001 From: Linoy Date: Mon, 28 Oct 2024 13:48:02 +0000 Subject: [PATCH 15/16] style --- examples/dreambooth/test_dreambooth_lora_sd3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index bec890e55ffc..a052ea3b6497 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -40,6 +40,7 @@ class DreamBoothLoRASD3(ExamplesTestsAccelerate): LORA_BLOCK_TO_TEST = 0 LORA_LAYER_TO_TEST = "attn.to_k" + def test_dreambooth_lora_sd3(self): with tempfile.TemporaryDirectory() as tmpdir: test_args = f""" From 559b0bc860ea644a88097cb0986860e425bce6c6 Mon Sep 17 00:00:00 2001 From: linoytsaban Date: Mon, 28 Oct 2024 15:54:12 +0200 Subject: [PATCH 16/16] address sayaks comments --- examples/dreambooth/test_dreambooth_lora_sd3.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py index a052ea3b6497..5d6c8bb9938a 100644 --- a/examples/dreambooth/test_dreambooth_lora_sd3.py +++ b/examples/dreambooth/test_dreambooth_lora_sd3.py @@ -38,8 +38,8 @@ class DreamBoothLoRASD3(ExamplesTestsAccelerate): pretrained_model_name_or_path = "hf-internal-testing/tiny-sd3-pipe" script_path = "examples/dreambooth/train_dreambooth_lora_sd3.py" - LORA_BLOCK_TO_TEST = 0 - LORA_LAYER_TO_TEST = "attn.to_k" + transformer_block_idx = 0 + layer_type = "attn.to_k" def test_dreambooth_lora_sd3(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -150,7 +150,7 @@ def test_dreambooth_lora_block(self): --train_batch_size 1 --gradient_accumulation_steps 1 --max_train_steps 2 - --lora_blocks {LORA_BLOCK_TO_TEST} + --lora_blocks {self.transformer_block_idx} --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant @@ -186,7 +186,7 @@ def test_dreambooth_lora_layer(self): --train_batch_size 1 --gradient_accumulation_steps 1 --max_train_steps 2 - --lora_layers {LORA_LAYER_TO_TEST} + --lora_layers {self.layer_type} --learning_rate 5.0e-04 --scale_lr --lr_scheduler constant