From 9dbed17846352ff7a8bffff435b60fc45d4ff106 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Thu, 2 Oct 2025 18:18:19 +0530
Subject: [PATCH 1/3] make flux ready for mellon

---
 src/diffusers/modular_pipelines/flux/encoders.py       | 2 ++
 src/diffusers/modular_pipelines/flux/modular_blocks.py | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/diffusers/modular_pipelines/flux/encoders.py b/src/diffusers/modular_pipelines/flux/encoders.py
index 8c49990280ac..37a570ae2c5e 100644
--- a/src/diffusers/modular_pipelines/flux/encoders.py
+++ b/src/diffusers/modular_pipelines/flux/encoders.py
@@ -181,6 +181,7 @@ def inputs(self) -> List[InputParam]:
         return [
             InputParam("prompt"),
             InputParam("prompt_2"),
+            InputParam("max_sequence_length", type_hint=int, default=512, required=False),
             InputParam("joint_attention_kwargs"),
         ]
 
@@ -404,6 +405,7 @@ def __call__(self, components: FluxModularPipeline, state: PipelineState) -> Pip
             pooled_prompt_embeds=None,
             device=block_state.device,
             num_images_per_prompt=1,  # TODO: hardcoded for now.
+            max_sequence_length=block_state.max_sequence_length,
             lora_scale=block_state.text_encoder_lora_scale,
         )
 
diff --git a/src/diffusers/modular_pipelines/flux/modular_blocks.py b/src/diffusers/modular_pipelines/flux/modular_blocks.py
index 37895bddbf07..dbb54e81b867 100644
--- a/src/diffusers/modular_pipelines/flux/modular_blocks.py
+++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py
@@ -84,9 +84,9 @@ def description(self):
 
 # before_denoise: all task (text2img, img2img)
 class FluxAutoBeforeDenoiseStep(AutoPipelineBlocks):
-    block_classes = [FluxBeforeDenoiseStep, FluxImg2ImgBeforeDenoiseStep]
-    block_names = ["text2image", "img2img"]
-    block_trigger_inputs = [None, "image_latents"]
+    block_classes = [FluxImg2ImgBeforeDenoiseStep, FluxBeforeDenoiseStep]
+    block_names = ["img2img", "text2image"]
+    block_trigger_inputs = ["image_latents", None]
 
     @property
     def description(self):

From a1edf8ee976853ab0775bcc6834a6fc2a105e429 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Mon, 6 Oct 2025 10:21:39 +0530
Subject: [PATCH 2/3] up

---
 .../modular_pipelines/flux/before_denoise.py  |  4 +++
 .../modular_pipelines/flux/encoders.py        |  3 +++
 .../modular_pipelines/flux/modular_blocks.py  | 25 +++++++++++++++----
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/diffusers/modular_pipelines/flux/before_denoise.py b/src/diffusers/modular_pipelines/flux/before_denoise.py
index 4272066309a2..95858fbf6eb0 100644
--- a/src/diffusers/modular_pipelines/flux/before_denoise.py
+++ b/src/diffusers/modular_pipelines/flux/before_denoise.py
@@ -252,11 +252,13 @@ def inputs(self) -> List[InputParam]:
             InputParam(
                 "prompt_embeds",
                 required=True,
+                kwargs_type="denoiser_input_fields",
                 type_hint=torch.Tensor,
                 description="Pre-generated text embeddings. Can be generated from text_encoder step.",
             ),
             InputParam(
                 "pooled_prompt_embeds",
+                kwargs_type="denoiser_input_fields",
                 type_hint=torch.Tensor,
                 description="Pre-generated pooled text embeddings. Can be generated from text_encoder step.",
             ),
@@ -279,11 +281,13 @@ def intermediate_outputs(self) -> List[str]:
             OutputParam(
                 "prompt_embeds",
                 type_hint=torch.Tensor,
+                kwargs_type="denoiser_input_fields",
                 description="text embeddings used to guide the image generation",
             ),
             OutputParam(
                 "pooled_prompt_embeds",
                 type_hint=torch.Tensor,
+                kwargs_type="denoiser_input_fields",
                 description="pooled text embeddings used to guide the image generation",
             ),
             # TODO: support negative embeddings?
diff --git a/src/diffusers/modular_pipelines/flux/encoders.py b/src/diffusers/modular_pipelines/flux/encoders.py
index 37a570ae2c5e..16ddecbadb4f 100644
--- a/src/diffusers/modular_pipelines/flux/encoders.py
+++ b/src/diffusers/modular_pipelines/flux/encoders.py
@@ -190,16 +190,19 @@ def intermediate_outputs(self) -> List[OutputParam]:
         return [
             OutputParam(
                 "prompt_embeds",
+                kwargs_type="denoiser_input_fields",
                 type_hint=torch.Tensor,
                 description="text embeddings used to guide the image generation",
             ),
             OutputParam(
                 "pooled_prompt_embeds",
+                kwargs_type="denoiser_input_fields",
                 type_hint=torch.Tensor,
                 description="pooled text embeddings used to guide the image generation",
             ),
             OutputParam(
                 "text_ids",
+                kwargs_type="denoiser_input_fields",
                 type_hint=torch.Tensor,
                 description="ids from the text sequence for RoPE",
             ),
diff --git a/src/diffusers/modular_pipelines/flux/modular_blocks.py b/src/diffusers/modular_pipelines/flux/modular_blocks.py
index dbb54e81b867..a221b3a73613 100644
--- a/src/diffusers/modular_pipelines/flux/modular_blocks.py
+++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py
@@ -124,16 +124,32 @@ def description(self):
         return "Decode step that decode the denoised latents into image outputs.\n - `FluxDecodeStep`"
 
 
+class FluxCoreDenoiseStep(SequentialPipelineBlocks):
+    block_classes = [FluxInputStep, FluxAutoBeforeDenoiseStep, FluxAutoDenoiseStep]
+    block_names = ["input", "before_denoise", "denoise"]
+
+    @property
+    def description(self):
+        return (
+            "Core step that performs the denoising process. \n"
+            + " - `FluxInputStep` (input) standardizes the inputs for the denoising step.\n"
+            + " - `FluxAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n"
+            + " - `FluxAutoDenoiseStep` (denoise) iteratively denoises the latents.\n"
+            + "This step support text-to-image and image-to-image tasks for Flux:\n"
+            + " - for image-to-image generation, you need to provide `image_latents`\n"
+            + " - for text-to-image generation, all you need to provide is prompt embeddings"
+        )
+
+
 # text2image
 class FluxAutoBlocks(SequentialPipelineBlocks):
     block_classes = [
         FluxTextEncoderStep,
         FluxAutoVaeEncoderStep,
-        FluxAutoBeforeDenoiseStep,
-        FluxAutoDenoiseStep,
+        FluxCoreDenoiseStep,
         FluxAutoDecodeStep,
     ]
-    block_names = ["text_encoder", "image_encoder", "before_denoise", "denoise", "decoder"]
+    block_names = ["text_encoder", "image_encoder", "denoise", "decoder"]
 
     @property
     def description(self):
@@ -171,8 +187,7 @@ def description(self):
     [
         ("text_encoder", FluxTextEncoderStep),
         ("image_encoder", FluxAutoVaeEncoderStep),
-        ("before_denoise", FluxAutoBeforeDenoiseStep),
-        ("denoise", FluxAutoDenoiseStep),
+        ("denoise", FluxCoreDenoiseStep),
         ("decode", FluxAutoDecodeStep),
     ]
 )

From 9861f5bef5f37c64fb6de3a9562feffb2d19bbb6 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Mon, 6 Oct 2025 12:26:43 +0530
Subject: [PATCH 3/3] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Álvaro Somoza <asomoza@users.noreply.github.com>
---
 src/diffusers/modular_pipelines/flux/modular_blocks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/diffusers/modular_pipelines/flux/modular_blocks.py b/src/diffusers/modular_pipelines/flux/modular_blocks.py
index a221b3a73613..ca4f993a11fe 100644
--- a/src/diffusers/modular_pipelines/flux/modular_blocks.py
+++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py
@@ -149,7 +149,7 @@ class FluxAutoBlocks(SequentialPipelineBlocks):
         FluxCoreDenoiseStep,
         FluxAutoDecodeStep,
     ]
-    block_names = ["text_encoder", "image_encoder", "denoise", "decoder"]
+    block_names = ["text_encoder", "image_encoder", "denoise", "decode"]
 
     @property
     def description(self):