From 2e54b933c7b8c075c4ff2ac6280d1c5c9c4c1911 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Thu, 24 Oct 2024 13:27:24 +0300
Subject: [PATCH 01/16] configurable layers

---
 .../dreambooth/train_dreambooth_lora_sd3.py   | 47 ++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 4b39dcfe41b0..5ca9da8470c8 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -571,6 +571,25 @@ def parse_args(input_args=None):
         "--adam_weight_decay_text_encoder", type=float, default=1e-03, help="Weight decay to use for text_encoder"
     )
 
+    parser.add_argument(
+        "--lora_layers",
+        type=str,
+        default=None,
+        help=(
+            "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. "
+            'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md'
+        ),
+    )
+    parser.add_argument(
+        "--lora_blocks",
+        type=str,
+        default=None,
+        help=(
+            "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. "
+            'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md'
+        ),
+    )
+
     parser.add_argument(
         "--adam_epsilon",
         type=float,
@@ -1222,13 +1241,39 @@ def main(args):
         if args.train_text_encoder:
             text_encoder_one.gradient_checkpointing_enable()
             text_encoder_two.gradient_checkpointing_enable()
+    if args.lora_layers is not None:
+        target_modules = [layer.strip() for layer in args.lora_layers.split(",")]
+    else:
+        target_modules = [
+            "attn.add_k_proj",
+            "attn.add_k_proj",
+            "attn.add_q_proj",
+            "attn.add_q_proj",
+            "attn.add_v_proj",
+            "attn.add_v_proj",
+            "attn.to_add_out",
+            "attn.to_add_out",
+            "attn.to_k",
+            "attn.to_k",
+            "attn.to_out.0",
+            "attn.to_out.0",
+            "attn.to_q",
+            "attn.to_q",
+            "attn.to_v",
+            "attn.to_v",
+        ]
+    if args.lora_blocks is not None:
+        target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")]
+        target_modules = [f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules]
+        print(target_modules)
+
 
     # now we will add new LoRA weights to the attention layers
     transformer_lora_config = LoraConfig(
         r=args.rank,
         lora_alpha=args.rank,
         init_lora_weights="gaussian",
-        target_modules=["to_k", "to_q", "to_v", "to_out.0"],
+        target_modules=target_modules,
     )
     transformer.add_adapter(transformer_lora_config)
 

From df919b84f9856599bf9b513e5d0395a6a9872200 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 11:55:57 +0300
Subject: [PATCH 02/16] configurable layers

---
 examples/dreambooth/train_dreambooth_lora_sd3.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 5ca9da8470c8..d2128682a029 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -1246,20 +1246,12 @@ def main(args):
     else:
         target_modules = [
             "attn.add_k_proj",
-            "attn.add_k_proj",
-            "attn.add_q_proj",
             "attn.add_q_proj",
             "attn.add_v_proj",
-            "attn.add_v_proj",
-            "attn.to_add_out",
             "attn.to_add_out",
             "attn.to_k",
-            "attn.to_k",
-            "attn.to_out.0",
             "attn.to_out.0",
             "attn.to_q",
-            "attn.to_q",
-            "attn.to_v",
             "attn.to_v",
         ]
     if args.lora_blocks is not None:

From dfd88978fb5012d5d87a6dc1278f9cfabc6cc5ba Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 16:14:26 +0300
Subject: [PATCH 03/16] update README

---
 examples/dreambooth/README_sd3.md             | 35 +++++++++++++++++++
 .../dreambooth/train_dreambooth_lora_sd3.py   |  8 ++---
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md
index a340be350db8..957f69415921 100644
--- a/examples/dreambooth/README_sd3.md
+++ b/examples/dreambooth/README_sd3.md
@@ -147,6 +147,41 @@ accelerate launch train_dreambooth_lora_sd3.py \
   --push_to_hub
 ```
 
+### Targeting Specific Blocks & Layers
+As image generation models get bigger & more powerful, more fine-tuners come to find that training only part of the 
+transformer blocks (sometimes as little as two) can be enough to get great results. 
+In some cases, it can be even better to maintain some of the blocks/layers frozen.
+
+For **SD3.5-Large** specifically, you may find this information useful (taken from: [Stable Diffusion 3.5 Large Fine-tuning Tutorial](https://stabilityai.notion.site/Stable-Diffusion-3-5-Large-Fine-tuning-Tutorial-11a61cdcd1968027a15bdbd7c40be8c6#12461cdcd19680788a23c650dab26b93):
+> [!NOTE]
+> A commonly believed heuristic that we verified once again during the construction of the SD3.5 family of models is that later/higher layers (i.e. `30 - 37`)* impact tertiary details more heavily. Conversely, earlier layers (i.e. `12 - 24` )* influence the overall composition/primary form more. 
+> So, freezing other layers/targeting specific layers is a viable approach.
+> `*`These suggested layers are speculative and not 100% guaranteed. The tips here are more or less a general idea for next steps.
+> **Photorealism**
+> In preliminary testing, we observed that freezing the last few layers of the architecture significantly improved model training when using a photorealistic dataset, preventing detail degradation introduced by small dataset from happening.
+> **Anatomy preservation**
+> To dampen any possible degradation of anatomy, training only the attention layers and **not** the adaptive linear layers could help. For reference, below is one of the transformer blocks.
+
+
+we've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. 
+- with `--lora_blocks` you can specify the block numbers for training. E.g. passing - 
+```bash
+--lora_blocks="12,13,14,15,16,17,18,19,20,21,22,23,24,30,31,32,33,34,35,36,37"
+```
+will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. 
+- with `--lora_layers` you can specify the types of layers you wish to train. 
+By default, the trained layers are -  
+`"attn.add_k_proj","attn.add_q_proj","attn.add_v_proj", "attn.to_add_out","attn.to_k","attn.to_out.0","attn.to_q","attn.to_v"`
+If you wish to have a leaner LoRA / train more blocks over layers you could pass - 
+```bash
+--lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"
+```
+This will reduce LoRA size by roughly 50% for the same rank compared to the default. 
+However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and
+freezing some of the early & blocks is usually better. 
+
+The following configuration
+
 ### Text Encoder Training
 Alongside the transformer, LoRA fine-tuning of the CLIP text encoders is now also supported.
 To do so, just specify `--train_text_encoder` while launching training. Please keep the following points in mind:
diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index d2128682a029..4ef1ee0d91ec 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -576,8 +576,8 @@ def parse_args(input_args=None):
         type=str,
         default=None,
         help=(
-            "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. "
-            'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md'
+            "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string. "
+            'For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
         ),
     )
     parser.add_argument(
@@ -585,8 +585,8 @@ def parse_args(input_args=None):
         type=str,
         default=None,
         help=(
-            "The transformer modules to apply LoRA training on. Please specify the layers in a comma seperated. "
-            'E.g. - "to_k,to_q,to_v,to_out.0" will result in lora training of attention layers only. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/README_flux.md'
+            "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner. "
+            'E.g. - "12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
         ),
     )
 

From 62e152acf92b02246a6a458a114235bf0b0b7546 Mon Sep 17 00:00:00 2001
From: Linoy <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 13:25:22 +0000
Subject: [PATCH 04/16] style

---
 examples/dreambooth/train_dreambooth_lora_sd3.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 4ef1ee0d91ec..7c70762e4607 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -577,7 +577,7 @@ def parse_args(input_args=None):
         default=None,
         help=(
             "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string. "
-            'For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
+            "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md"
         ),
     )
     parser.add_argument(
@@ -1256,10 +1256,11 @@ def main(args):
         ]
     if args.lora_blocks is not None:
         target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")]
-        target_modules = [f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules]
+        target_modules = [
+            f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules
+        ]
         print(target_modules)
 
-
     # now we will add new LoRA weights to the attention layers
     transformer_lora_config = LoraConfig(
         r=args.rank,

From e285d694a5528a5eadb07a5538f40ee09062616b Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 16:31:51 +0300
Subject: [PATCH 05/16] add test

---
 .../dreambooth/test_dreambooth_lora_sd3.py    | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index ec323be4143e..52a82927f3e3 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -136,6 +136,40 @@ def test_dreambooth_lora_latent_caching(self):
             starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys())
             self.assertTrue(starts_with_transformer)
 
+    def test_dreambooth_lora_block(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_args = f"""
+                {self.script_path}
+                --pretrained_model_name_or_path {self.pretrained_model_name_or_path}
+                --instance_data_dir {self.instance_data_dir}
+                --instance_prompt {self.instance_prompt}
+                --resolution 64
+                --train_batch_size 1
+                --gradient_accumulation_steps 1
+                --max_train_steps 2
+                --lora_blocks 0
+                --learning_rate 5.0e-04
+                --scale_lr
+                --lr_scheduler constant
+                --lr_warmup_steps 0
+                --output_dir {tmpdir}
+                """.split()
+
+            run_command(self._launch_args + test_args)
+            # save_pretrained smoke test
+            self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))
+
+            # make sure the state_dict has the correct naming in the parameters.
+            lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
+            is_lora = all("lora" in k for k in lora_state_dict.keys())
+            self.assertTrue(is_lora)
+
+            # when not training the text encoder, all the parameters in the state dict should start
+            # with `"transformer"` in their names.
+            # In this test, only params of transformer block 0 should be in the state dict
+            starts_with_transformer = all(key.startswith("transformer.transformer_blocks.0") for key in lora_state_dict.keys())
+            self.assertTrue(starts_with_transformer)
+
     def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             test_args = f"""

From f0730149dcd3fc9fa3b3526b51a359619cf20266 Mon Sep 17 00:00:00 2001
From: Linoy <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 13:32:48 +0000
Subject: [PATCH 06/16] style

---
 examples/dreambooth/test_dreambooth_lora_sd3.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index 52a82927f3e3..abc9d97d8f1e 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -167,7 +167,9 @@ def test_dreambooth_lora_block(self):
             # when not training the text encoder, all the parameters in the state dict should start
             # with `"transformer"` in their names.
             # In this test, only params of transformer block 0 should be in the state dict
-            starts_with_transformer = all(key.startswith("transformer.transformer_blocks.0") for key in lora_state_dict.keys())
+            starts_with_transformer = all(
+                key.startswith("transformer.transformer_blocks.0") for key in lora_state_dict.keys()
+            )
             self.assertTrue(starts_with_transformer)
 
     def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self):

From 701bd35ce815e1801245d7e23cfa629b042be5eb Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 18:05:48 +0300
Subject: [PATCH 07/16] add layer test, update readme, add nargs

---
 examples/dreambooth/README_sd3.md             |  9 +++--
 .../dreambooth/test_dreambooth_lora_sd3.py    | 36 +++++++++++++++++++
 .../dreambooth/train_dreambooth_lora_sd3.py   | 15 ++++----
 3 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md
index 957f69415921..496841cbce7d 100644
--- a/examples/dreambooth/README_sd3.md
+++ b/examples/dreambooth/README_sd3.md
@@ -163,24 +163,23 @@ For **SD3.5-Large** specifically, you may find this information useful (taken fr
 > To dampen any possible degradation of anatomy, training only the attention layers and **not** the adaptive linear layers could help. For reference, below is one of the transformer blocks.
 
 
-we've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. 
+We've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. 
 - with `--lora_blocks` you can specify the block numbers for training. E.g. passing - 
 ```bash
---lora_blocks="12,13,14,15,16,17,18,19,20,21,22,23,24,30,31,32,33,34,35,36,37"
+--lora_blocks 12 13 14 15 16 17 18 19 20 21 22 23 24 30 31 32 33 34 35 36 37
 ```
 will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. 
 - with `--lora_layers` you can specify the types of layers you wish to train. 
 By default, the trained layers are -  
-`"attn.add_k_proj","attn.add_q_proj","attn.add_v_proj", "attn.to_add_out","attn.to_k","attn.to_out.0","attn.to_q","attn.to_v"`
+`attn.add_k_proj attn.add_q_proj attn.add_v_proj attn.to_add_out attn.to_k attn.to_out.0 attn.to_q attn.to_v`
 If you wish to have a leaner LoRA / train more blocks over layers you could pass - 
 ```bash
---lora_layers="attn.to_k,attn.to_q,attn.to_v,attn.to_out.0"
+--lora_layers attn.to_k attn.to_q attn.to_v attn.to_out.0
 ```
 This will reduce LoRA size by roughly 50% for the same rank compared to the default. 
 However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and
 freezing some of the early & blocks is usually better. 
 
-The following configuration
 
 ### Text Encoder Training
 Alongside the transformer, LoRA fine-tuning of the CLIP text encoders is now also supported.
diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index abc9d97d8f1e..2ba0b3d3586c 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -172,6 +172,42 @@ def test_dreambooth_lora_block(self):
             )
             self.assertTrue(starts_with_transformer)
 
+    def test_dreambooth_lora_layer(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            test_args = f"""
+                {self.script_path}
+                --pretrained_model_name_or_path {self.pretrained_model_name_or_path}
+                --instance_data_dir {self.instance_data_dir}
+                --instance_prompt {self.instance_prompt}
+                --resolution 64
+                --train_batch_size 1
+                --gradient_accumulation_steps 1
+                --max_train_steps 2
+                --lora_layers attn.to_k
+                --learning_rate 5.0e-04
+                --scale_lr
+                --lr_scheduler constant
+                --lr_warmup_steps 0
+                --output_dir {tmpdir}
+                """.split()
+
+            run_command(self._launch_args + test_args)
+            # save_pretrained smoke test
+            self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))
+
+            # make sure the state_dict has the correct naming in the parameters.
+            lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
+            is_lora = all("lora" in k for k in lora_state_dict.keys())
+            self.assertTrue(is_lora)
+
+            # when not training the text encoder, all the parameters in the state dict should start
+            # with `"transformer"` in their names.
+            # In this test, only params of transformer block 0 should be in the state dict
+            starts_with_transformer = all(
+                "attn.to_k" in key for key in lora_state_dict.keys()
+            )
+            self.assertTrue(starts_with_transformer)
+
     def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             test_args = f"""
diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 7c70762e4607..cb4e6773ed26 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -573,20 +573,22 @@ def parse_args(input_args=None):
 
     parser.add_argument(
         "--lora_layers",
+        nargs="+",
         type=str,
         default=None,
         help=(
-            "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string. "
+            "The transformer block layers to apply LoRA training on. "
             "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md"
         ),
     )
     parser.add_argument(
         "--lora_blocks",
-        type=str,
+        nargs="+",
+        type=int,
         default=None,
         help=(
-            "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner. "
-            'E.g. - "12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
+            "The transformer blocks to apply LoRA training on."
+            'E.g. - "--lora_blocks 12 30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
         ),
     )
 
@@ -1242,7 +1244,7 @@ def main(args):
             text_encoder_one.gradient_checkpointing_enable()
             text_encoder_two.gradient_checkpointing_enable()
     if args.lora_layers is not None:
-        target_modules = [layer.strip() for layer in args.lora_layers.split(",")]
+        target_modules = args.lora_layers
     else:
         target_modules = [
             "attn.add_k_proj",
@@ -1255,9 +1257,8 @@ def main(args):
             "attn.to_v",
         ]
     if args.lora_blocks is not None:
-        target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")]
         target_modules = [
-            f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules
+            f"transformer_blocks.{block}.{module}" for block in args.lora_blocks for module in target_modules
         ]
         print(target_modules)
 

From 90550a81de34ef6cce788ec86e490c4a1da88986 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 18:15:10 +0300
Subject: [PATCH 08/16] readme

---
 examples/dreambooth/README_sd3.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md
index 496841cbce7d..6f0385010883 100644
--- a/examples/dreambooth/README_sd3.md
+++ b/examples/dreambooth/README_sd3.md
@@ -165,7 +165,7 @@ For **SD3.5-Large** specifically, you may find this information useful (taken fr
 
 We've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. 
 - with `--lora_blocks` you can specify the block numbers for training. E.g. passing - 
-```bash
+```diff
 --lora_blocks 12 13 14 15 16 17 18 19 20 21 22 23 24 30 31 32 33 34 35 36 37
 ```
 will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. 
@@ -173,7 +173,7 @@ will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, al
 By default, the trained layers are -  
 `attn.add_k_proj attn.add_q_proj attn.add_v_proj attn.to_add_out attn.to_k attn.to_out.0 attn.to_q attn.to_v`
 If you wish to have a leaner LoRA / train more blocks over layers you could pass - 
-```bash
+```diff
 --lora_layers attn.to_k attn.to_q attn.to_v attn.to_out.0
 ```
 This will reduce LoRA size by roughly 50% for the same rank compared to the default. 

From 2cba0c96de1b81032d81517ed60002331a515e74 Mon Sep 17 00:00:00 2001
From: Linoy <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 15:16:12 +0000
Subject: [PATCH 09/16] test style

---
 examples/dreambooth/test_dreambooth_lora_sd3.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index 2ba0b3d3586c..deaed0ef3eb9 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -203,9 +203,7 @@ def test_dreambooth_lora_layer(self):
             # when not training the text encoder, all the parameters in the state dict should start
             # with `"transformer"` in their names.
             # In this test, only params of transformer block 0 should be in the state dict
-            starts_with_transformer = all(
-                "attn.to_k" in key for key in lora_state_dict.keys()
-            )
+            starts_with_transformer = all("attn.to_k" in key for key in lora_state_dict.keys())
             self.assertTrue(starts_with_transformer)
 
     def test_dreambooth_lora_sd3_checkpointing_checkpoints_total_limit(self):

From 128826b8a252f4b4af38cedb2db6eae8095f98f3 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Fri, 25 Oct 2024 22:07:37 +0300
Subject: [PATCH 10/16] remove print, change nargs

---
 examples/dreambooth/train_dreambooth_lora_sd3.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index cb4e6773ed26..3fae643cd934 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -573,7 +573,7 @@ def parse_args(input_args=None):
 
     parser.add_argument(
         "--lora_layers",
-        nargs="+",
+        nargs="*",
         type=str,
         default=None,
         help=(
@@ -583,7 +583,7 @@ def parse_args(input_args=None):
     )
     parser.add_argument(
         "--lora_blocks",
-        nargs="+",
+        nargs="*",
         type=int,
         default=None,
         help=(
@@ -1260,7 +1260,6 @@ def main(args):
         target_modules = [
             f"transformer_blocks.{block}.{module}" for block in args.lora_blocks for module in target_modules
         ]
-        print(target_modules)
 
     # now we will add new LoRA weights to the attention layers
     transformer_lora_config = LoraConfig(

From 0c7fa8b301d14546d5ce1e6cb115532574dd628b Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Mon, 28 Oct 2024 12:41:21 +0200
Subject: [PATCH 11/16] test arg change

---
 examples/dreambooth/train_dreambooth_lora_sd3.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 3fae643cd934..126e2ef51282 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -573,7 +573,6 @@ def parse_args(input_args=None):
 
     parser.add_argument(
         "--lora_layers",
-        nargs="*",
         type=str,
         default=None,
         help=(
@@ -583,8 +582,7 @@ def parse_args(input_args=None):
     )
     parser.add_argument(
         "--lora_blocks",
-        nargs="*",
-        type=int,
+        type=str,
         default=None,
         help=(
             "The transformer blocks to apply LoRA training on."
@@ -1244,7 +1242,8 @@ def main(args):
             text_encoder_one.gradient_checkpointing_enable()
             text_encoder_two.gradient_checkpointing_enable()
     if args.lora_layers is not None:
-        target_modules = args.lora_layers
+        #target_modules = args.lora_layers
+        target_modules = [layer.strip() for layer in args.lora_layers.split(",")]
     else:
         target_modules = [
             "attn.add_k_proj",
@@ -1257,8 +1256,9 @@ def main(args):
             "attn.to_v",
         ]
     if args.lora_blocks is not None:
+        target_blocks = [int(block.strip()) for block in args.lora_blocks.split(",")]
         target_modules = [
-            f"transformer_blocks.{block}.{module}" for block in args.lora_blocks for module in target_modules
+            f"transformer_blocks.{block}.{module}" for block in target_blocks for module in target_modules
         ]
 
     # now we will add new LoRA weights to the attention layers

From 10a26595bf3adbfe1a2a9b91d98334405f1cd84e Mon Sep 17 00:00:00 2001
From: Linoy <linoy@huggingface.co>
Date: Mon, 28 Oct 2024 10:45:37 +0000
Subject: [PATCH 12/16] style

---
 examples/dreambooth/train_dreambooth_lora_sd3.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 126e2ef51282..0b250f85cbf5 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -1242,7 +1242,7 @@ def main(args):
             text_encoder_one.gradient_checkpointing_enable()
             text_encoder_two.gradient_checkpointing_enable()
     if args.lora_layers is not None:
-        #target_modules = args.lora_layers
+        # target_modules = args.lora_layers
         target_modules = [layer.strip() for layer in args.lora_layers.split(",")]
     else:
         target_modules = [

From ad6c2f33f7b5328bac059c40d478f8f8b716e2cb Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Mon, 28 Oct 2024 13:18:05 +0200
Subject: [PATCH 13/16] revert nargs 2/2

---
 examples/dreambooth/README_sd3.md                | 6 +++---
 examples/dreambooth/train_dreambooth_lora_sd3.py | 7 +++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md
index 6f0385010883..d60b9ae86466 100644
--- a/examples/dreambooth/README_sd3.md
+++ b/examples/dreambooth/README_sd3.md
@@ -166,15 +166,15 @@ For **SD3.5-Large** specifically, you may find this information useful (taken fr
 We've added `--lora_layers` and `--lora_blocks` to make LoRA training modules configurable. 
 - with `--lora_blocks` you can specify the block numbers for training. E.g. passing - 
 ```diff
---lora_blocks 12 13 14 15 16 17 18 19 20 21 22 23 24 30 31 32 33 34 35 36 37
+--lora_blocks "12,13,14,15,16,17,18,19,20,21,22,23,24,30,31,32,33,34,35,36,37"
 ```
 will trigger LoRA training of transformer blocks 12-24 and 30-37. By default, all blocks are trained. 
 - with `--lora_layers` you can specify the types of layers you wish to train. 
 By default, the trained layers are -  
-`attn.add_k_proj attn.add_q_proj attn.add_v_proj attn.to_add_out attn.to_k attn.to_out.0 attn.to_q attn.to_v`
+`attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,attn.to_k,attn.to_out.0,attn.to_q,attn.to_v`
 If you wish to have a leaner LoRA / train more blocks over layers you could pass - 
 ```diff
---lora_layers attn.to_k attn.to_q attn.to_v attn.to_out.0
+--lora_layers attn.to_k,attn.to_q,attn.to_v,attn.to_out.0
 ```
 This will reduce LoRA size by roughly 50% for the same rank compared to the default. 
 However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and
diff --git a/examples/dreambooth/train_dreambooth_lora_sd3.py b/examples/dreambooth/train_dreambooth_lora_sd3.py
index 126e2ef51282..fc3c69b8901f 100644
--- a/examples/dreambooth/train_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/train_dreambooth_lora_sd3.py
@@ -576,7 +576,7 @@ def parse_args(input_args=None):
         type=str,
         default=None,
         help=(
-            "The transformer block layers to apply LoRA training on. "
+            "The transformer block layers to apply LoRA training on. Please specify the layers in a comma seperated string."
             "For examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md"
         ),
     )
@@ -585,8 +585,8 @@ def parse_args(input_args=None):
         type=str,
         default=None,
         help=(
-            "The transformer blocks to apply LoRA training on."
-            'E.g. - "--lora_blocks 12 30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
+            "The transformer blocks to apply LoRA training on. Please specify the block numbers in a comma seperated manner."
+            'E.g. - "--lora_blocks 12,30" will result in lora training of transformer blocks 12 and 30. For more examples refer to https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_SD3.md'
         ),
     )
 
@@ -1242,7 +1242,6 @@ def main(args):
             text_encoder_one.gradient_checkpointing_enable()
             text_encoder_two.gradient_checkpointing_enable()
     if args.lora_layers is not None:
-        #target_modules = args.lora_layers
         target_modules = [layer.strip() for layer in args.lora_layers.split(",")]
     else:
         target_modules = [

From df018ddcb392818be4a9d017923f408954b0903f Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Mon, 28 Oct 2024 15:47:13 +0200
Subject: [PATCH 14/16] address sayaks comments

---
 examples/dreambooth/README_sd3.md               |  2 +-
 examples/dreambooth/test_dreambooth_lora_sd3.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/dreambooth/README_sd3.md b/examples/dreambooth/README_sd3.md
index d60b9ae86466..89d87d65dd44 100644
--- a/examples/dreambooth/README_sd3.md
+++ b/examples/dreambooth/README_sd3.md
@@ -174,7 +174,7 @@ By default, the trained layers are -
 `attn.add_k_proj,attn.add_q_proj,attn.add_v_proj,attn.to_add_out,attn.to_k,attn.to_out.0,attn.to_q,attn.to_v`
 If you wish to have a leaner LoRA / train more blocks over layers you could pass - 
 ```diff
---lora_layers attn.to_k,attn.to_q,attn.to_v,attn.to_out.0
++ --lora_layers attn.to_k,attn.to_q,attn.to_v,attn.to_out.0
 ```
 This will reduce LoRA size by roughly 50% for the same rank compared to the default. 
 However, if you're after compact LoRAs, it's our impression that maintaining the default setting for `--lora_layers` and
diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index deaed0ef3eb9..bec890e55ffc 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -38,6 +38,8 @@ class DreamBoothLoRASD3(ExamplesTestsAccelerate):
     pretrained_model_name_or_path = "hf-internal-testing/tiny-sd3-pipe"
     script_path = "examples/dreambooth/train_dreambooth_lora_sd3.py"
 
+    LORA_BLOCK_TO_TEST = 0
+    LORA_LAYER_TO_TEST = "attn.to_k"
     def test_dreambooth_lora_sd3(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             test_args = f"""
@@ -147,7 +149,7 @@ def test_dreambooth_lora_block(self):
                 --train_batch_size 1
                 --gradient_accumulation_steps 1
                 --max_train_steps 2
-                --lora_blocks 0
+                --lora_blocks {LORA_BLOCK_TO_TEST}
                 --learning_rate 5.0e-04
                 --scale_lr
                 --lr_scheduler constant
@@ -183,7 +185,7 @@ def test_dreambooth_lora_layer(self):
                 --train_batch_size 1
                 --gradient_accumulation_steps 1
                 --max_train_steps 2
-                --lora_layers attn.to_k
+                --lora_layers {LORA_LAYER_TO_TEST}
                 --learning_rate 5.0e-04
                 --scale_lr
                 --lr_scheduler constant
@@ -200,9 +202,7 @@ def test_dreambooth_lora_layer(self):
             is_lora = all("lora" in k for k in lora_state_dict.keys())
             self.assertTrue(is_lora)
 
-            # when not training the text encoder, all the parameters in the state dict should start
-            # with `"transformer"` in their names.
-            # In this test, only params of transformer block 0 should be in the state dict
+            # In this test, only transformer params of attention layers `attn.to_k` should be in the state dict
             starts_with_transformer = all("attn.to_k" in key for key in lora_state_dict.keys())
             self.assertTrue(starts_with_transformer)
 

From 65dd59d5e6d4583fa009782f8eab9e52b66fcc70 Mon Sep 17 00:00:00 2001
From: Linoy <linoy@huggingface.co>
Date: Mon, 28 Oct 2024 13:48:02 +0000
Subject: [PATCH 15/16] style

---
 examples/dreambooth/test_dreambooth_lora_sd3.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index bec890e55ffc..a052ea3b6497 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -40,6 +40,7 @@ class DreamBoothLoRASD3(ExamplesTestsAccelerate):
 
     LORA_BLOCK_TO_TEST = 0
     LORA_LAYER_TO_TEST = "attn.to_k"
+
     def test_dreambooth_lora_sd3(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             test_args = f"""

From 559b0bc860ea644a88097cb0986860e425bce6c6 Mon Sep 17 00:00:00 2001
From: linoytsaban <linoy@huggingface.co>
Date: Mon, 28 Oct 2024 15:54:12 +0200
Subject: [PATCH 16/16] address sayaks comments

---
 examples/dreambooth/test_dreambooth_lora_sd3.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/dreambooth/test_dreambooth_lora_sd3.py b/examples/dreambooth/test_dreambooth_lora_sd3.py
index a052ea3b6497..5d6c8bb9938a 100644
--- a/examples/dreambooth/test_dreambooth_lora_sd3.py
+++ b/examples/dreambooth/test_dreambooth_lora_sd3.py
@@ -38,8 +38,8 @@ class DreamBoothLoRASD3(ExamplesTestsAccelerate):
     pretrained_model_name_or_path = "hf-internal-testing/tiny-sd3-pipe"
     script_path = "examples/dreambooth/train_dreambooth_lora_sd3.py"
 
-    LORA_BLOCK_TO_TEST = 0
-    LORA_LAYER_TO_TEST = "attn.to_k"
+    transformer_block_idx = 0
+    layer_type = "attn.to_k"
 
     def test_dreambooth_lora_sd3(self):
         with tempfile.TemporaryDirectory() as tmpdir:
@@ -150,7 +150,7 @@ def test_dreambooth_lora_block(self):
                 --train_batch_size 1
                 --gradient_accumulation_steps 1
                 --max_train_steps 2
-                --lora_blocks {LORA_BLOCK_TO_TEST}
+                --lora_blocks {self.transformer_block_idx}
                 --learning_rate 5.0e-04
                 --scale_lr
                 --lr_scheduler constant
@@ -186,7 +186,7 @@ def test_dreambooth_lora_layer(self):
                 --train_batch_size 1
                 --gradient_accumulation_steps 1
                 --max_train_steps 2
-                --lora_layers {LORA_LAYER_TO_TEST}
+                --lora_layers {self.layer_type}
                 --learning_rate 5.0e-04
                 --scale_lr
                 --lr_scheduler constant