From 9dbed17846352ff7a8bffff435b60fc45d4ff106 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Thu, 2 Oct 2025 18:18:19 +0530 Subject: [PATCH 1/3] make flux ready for mellon --- src/diffusers/modular_pipelines/flux/encoders.py | 2 ++ src/diffusers/modular_pipelines/flux/modular_blocks.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/diffusers/modular_pipelines/flux/encoders.py b/src/diffusers/modular_pipelines/flux/encoders.py index 8c49990280ac..37a570ae2c5e 100644 --- a/src/diffusers/modular_pipelines/flux/encoders.py +++ b/src/diffusers/modular_pipelines/flux/encoders.py @@ -181,6 +181,7 @@ def inputs(self) -> List[InputParam]: return [ InputParam("prompt"), InputParam("prompt_2"), + InputParam("max_sequence_length", type_hint=int, default=512, required=False), InputParam("joint_attention_kwargs"), ] @@ -404,6 +405,7 @@ def __call__(self, components: FluxModularPipeline, state: PipelineState) -> Pip pooled_prompt_embeds=None, device=block_state.device, num_images_per_prompt=1, # TODO: hardcoded for now. + max_sequence_length=block_state.max_sequence_length, lora_scale=block_state.text_encoder_lora_scale, ) diff --git a/src/diffusers/modular_pipelines/flux/modular_blocks.py b/src/diffusers/modular_pipelines/flux/modular_blocks.py index 37895bddbf07..dbb54e81b867 100644 --- a/src/diffusers/modular_pipelines/flux/modular_blocks.py +++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py @@ -84,9 +84,9 @@ def description(self): # before_denoise: all task (text2img, img2img) class FluxAutoBeforeDenoiseStep(AutoPipelineBlocks): - block_classes = [FluxBeforeDenoiseStep, FluxImg2ImgBeforeDenoiseStep] - block_names = ["text2image", "img2img"] - block_trigger_inputs = [None, "image_latents"] + block_classes = [FluxImg2ImgBeforeDenoiseStep, FluxBeforeDenoiseStep] + block_names = ["img2img", "text2image"] + block_trigger_inputs = ["image_latents", None] @property def description(self): From a1edf8ee976853ab0775bcc6834a6fc2a105e429 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Mon, 6 Oct 2025 10:21:39 +0530 Subject: [PATCH 2/3] up --- .../modular_pipelines/flux/before_denoise.py | 4 +++ .../modular_pipelines/flux/encoders.py | 3 +++ .../modular_pipelines/flux/modular_blocks.py | 25 +++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/diffusers/modular_pipelines/flux/before_denoise.py b/src/diffusers/modular_pipelines/flux/before_denoise.py index 4272066309a2..95858fbf6eb0 100644 --- a/src/diffusers/modular_pipelines/flux/before_denoise.py +++ b/src/diffusers/modular_pipelines/flux/before_denoise.py @@ -252,11 +252,13 @@ def inputs(self) -> List[InputParam]: InputParam( "prompt_embeds", required=True, + kwargs_type="denoiser_input_fields", type_hint=torch.Tensor, description="Pre-generated text embeddings. Can be generated from text_encoder step.", ), InputParam( "pooled_prompt_embeds", + kwargs_type="denoiser_input_fields", type_hint=torch.Tensor, description="Pre-generated pooled text embeddings. Can be generated from text_encoder step.", ), @@ -279,11 +281,13 @@ def intermediate_outputs(self) -> List[str]: OutputParam( "prompt_embeds", type_hint=torch.Tensor, + kwargs_type="denoiser_input_fields", description="text embeddings used to guide the image generation", ), OutputParam( "pooled_prompt_embeds", type_hint=torch.Tensor, + kwargs_type="denoiser_input_fields", description="pooled text embeddings used to guide the image generation", ), # TODO: support negative embeddings? diff --git a/src/diffusers/modular_pipelines/flux/encoders.py b/src/diffusers/modular_pipelines/flux/encoders.py index 37a570ae2c5e..16ddecbadb4f 100644 --- a/src/diffusers/modular_pipelines/flux/encoders.py +++ b/src/diffusers/modular_pipelines/flux/encoders.py @@ -190,16 +190,19 @@ def intermediate_outputs(self) -> List[OutputParam]: return [ OutputParam( "prompt_embeds", + kwargs_type="denoiser_input_fields", type_hint=torch.Tensor, description="text embeddings used to guide the image generation", ), OutputParam( "pooled_prompt_embeds", + kwargs_type="denoiser_input_fields", type_hint=torch.Tensor, description="pooled text embeddings used to guide the image generation", ), OutputParam( "text_ids", + kwargs_type="denoiser_input_fields", type_hint=torch.Tensor, description="ids from the text sequence for RoPE", ), diff --git a/src/diffusers/modular_pipelines/flux/modular_blocks.py b/src/diffusers/modular_pipelines/flux/modular_blocks.py index dbb54e81b867..a221b3a73613 100644 --- a/src/diffusers/modular_pipelines/flux/modular_blocks.py +++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py @@ -124,16 +124,32 @@ def description(self): return "Decode step that decode the denoised latents into image outputs.\n - `FluxDecodeStep`" +class FluxCoreDenoiseStep(SequentialPipelineBlocks): + block_classes = [FluxInputStep, FluxAutoBeforeDenoiseStep, FluxAutoDenoiseStep] + block_names = ["input", "before_denoise", "denoise"] + + @property + def description(self): + return ( + "Core step that performs the denoising process. \n" + + " - `FluxInputStep` (input) standardizes the inputs for the denoising step.\n" + + " - `FluxAutoBeforeDenoiseStep` (before_denoise) prepares the inputs for the denoising step.\n" + + " - `FluxAutoDenoiseStep` (denoise) iteratively denoises the latents.\n" + + "This step support text-to-image and image-to-image tasks for Flux:\n" + + " - for image-to-image generation, you need to provide `image_latents`\n" + + " - for text-to-image generation, all you need to provide is prompt embeddings" + ) + + # text2image class FluxAutoBlocks(SequentialPipelineBlocks): block_classes = [ FluxTextEncoderStep, FluxAutoVaeEncoderStep, - FluxAutoBeforeDenoiseStep, - FluxAutoDenoiseStep, + FluxCoreDenoiseStep, FluxAutoDecodeStep, ] - block_names = ["text_encoder", "image_encoder", "before_denoise", "denoise", "decoder"] + block_names = ["text_encoder", "image_encoder", "denoise", "decoder"] @property def description(self): @@ -171,8 +187,7 @@ def description(self): [ ("text_encoder", FluxTextEncoderStep), ("image_encoder", FluxAutoVaeEncoderStep), - ("before_denoise", FluxAutoBeforeDenoiseStep), - ("denoise", FluxAutoDenoiseStep), + ("denoise", FluxCoreDenoiseStep), ("decode", FluxAutoDecodeStep), ] ) From 9861f5bef5f37c64fb6de3a9562feffb2d19bbb6 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Mon, 6 Oct 2025 12:26:43 +0530 Subject: [PATCH 3/3] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Álvaro Somoza --- src/diffusers/modular_pipelines/flux/modular_blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/modular_pipelines/flux/modular_blocks.py b/src/diffusers/modular_pipelines/flux/modular_blocks.py index a221b3a73613..ca4f993a11fe 100644 --- a/src/diffusers/modular_pipelines/flux/modular_blocks.py +++ b/src/diffusers/modular_pipelines/flux/modular_blocks.py @@ -149,7 +149,7 @@ class FluxAutoBlocks(SequentialPipelineBlocks): FluxCoreDenoiseStep, FluxAutoDecodeStep, ] - block_names = ["text_encoder", "image_encoder", "denoise", "decoder"] + block_names = ["text_encoder", "image_encoder", "denoise", "decode"] @property def description(self):