From 957241246ab43faa6ba452d54a192232d16cbb98 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 11 Jan 2025 09:31:11 +0530 Subject: [PATCH 1/6] add negative_prompt documentation. --- src/diffusers/pipelines/flux/pipeline_flux.py | 19 ++++++++++++++++++- .../pipeline_stable_diffusion_3.py | 2 +- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index c23b660300db..33154db54c73 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -665,7 +665,16 @@ def __call__( instead. prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is - will be used instead + will be used instead. + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. If not defined, one has to pass + `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is + not greater than `1`). + negative_prompt_2 (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and + `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. + true_cfg_scale (`float`, *optional*, defaults to 1.0): + When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance. height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): The height in pixels of the generated image. This is set to 1024 by default for the best results. width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): @@ -709,6 +718,14 @@ def __call__( Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not provided, embeddings are computed from the `ip_adapter_image` input argument. + negative_prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt + weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input + argument. + negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt + weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt` + input argument. output_type (`str`, *optional*, defaults to `"pil"`): The output format of the generate image. Choose between [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`. diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py index dc0d64144e12..892e0f6781f5 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py @@ -383,7 +383,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): From 4674e7362d5445b568d26414a8877479d0667eba Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sat, 11 Jan 2025 09:33:51 +0530 Subject: [PATCH 2/6] add proper docs for negative prompts --- src/diffusers/pipelines/flux/pipeline_flux.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index 33154db54c73..c554d39dd1fe 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -790,6 +790,9 @@ def __call__( lora_scale = ( self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None ) + # should this not be like the following? + # has_neg_prompt = negative_prompt is not None or (negative_prompt_embeds is not None and negative_pooled_prompt_embeds is not None) + # do_true_cfg = true_cfg_scale > 1 and has_neg_prompt do_true_cfg = true_cfg_scale > 1 and negative_prompt is not None ( prompt_embeds, From e9441188795fe9cfe541f5620e3eb4096601d301 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 12 Jan 2025 16:38:23 +0530 Subject: [PATCH 3/6] fix-copies --- .../controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py | 2 +- .../pipeline_stable_diffusion_3_controlnet_inpainting.py | 2 +- src/diffusers/pipelines/pag/pipeline_pag_sd_3.py | 2 +- src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py | 2 +- .../stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py | 2 +- .../stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py index d2e3e0f34519..ed98d7dd4ab8 100644 --- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py @@ -404,7 +404,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py index 1040ff265985..805166904795 100644 --- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py @@ -410,7 +410,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py index 0285239aaa8d..61564707e31d 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py @@ -375,7 +375,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py index 121be4ce2c07..664190bd8eee 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py @@ -391,7 +391,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py index 6a3a4abe7696..7bee40cd0080 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py @@ -400,7 +400,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py index 23cc4983d54f..e73e3a831d1c 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py @@ -406,7 +406,7 @@ def encode_prompt( negative_prompt_2 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. - negative_prompt_2 (`str` or `List[str]`, *optional*): + negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders prompt_embeds (`torch.FloatTensor`, *optional*): From 540e1753c2747fc37b259537ba43b1aaeaab3990 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 12 Jan 2025 16:38:52 +0530 Subject: [PATCH 4/6] remove comment. --- src/diffusers/pipelines/flux/pipeline_flux.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/diffusers/pipelines/flux/pipeline_flux.py b/src/diffusers/pipelines/flux/pipeline_flux.py index c554d39dd1fe..33154db54c73 100644 --- a/src/diffusers/pipelines/flux/pipeline_flux.py +++ b/src/diffusers/pipelines/flux/pipeline_flux.py @@ -790,9 +790,6 @@ def __call__( lora_scale = ( self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None ) - # should this not be like the following? - # has_neg_prompt = negative_prompt is not None or (negative_prompt_embeds is not None and negative_pooled_prompt_embeds is not None) - # do_true_cfg = true_cfg_scale > 1 and has_neg_prompt do_true_cfg = true_cfg_scale > 1 and negative_prompt is not None ( prompt_embeds, From 7db0b700ca57e9975e229e68bc2bf189d7a4de86 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Sun, 12 Jan 2025 17:49:52 +0530 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: hlky --- .../pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py index 892e0f6781f5..23950f895aae 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py @@ -385,7 +385,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. From ce4b83695c9f87f7095e7d525762e5ee6d0b5051 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Sun, 12 Jan 2025 17:50:12 +0530 Subject: [PATCH 6/6] fix-copies --- .../controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py | 2 +- .../pipeline_stable_diffusion_3_controlnet_inpainting.py | 2 +- src/diffusers/pipelines/pag/pipeline_pag_sd_3.py | 2 +- src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py | 2 +- .../stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py | 2 +- .../stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py index ed98d7dd4ab8..7f85fcc1d90d 100644 --- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py @@ -406,7 +406,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. diff --git a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py index 805166904795..abefb844a8cc 100644 --- a/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +++ b/src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py @@ -412,7 +412,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py index 61564707e31d..fde3e500a573 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_3.py @@ -377,7 +377,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py index 664190bd8eee..d64582a26f7a 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py @@ -393,7 +393,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py index 7bee40cd0080..b6e95844b3bd 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py @@ -402,7 +402,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument. diff --git a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py index e73e3a831d1c..67791c17a74b 100644 --- a/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py @@ -408,7 +408,7 @@ def encode_prompt( `text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders. negative_prompt_3 (`str` or `List[str]`, *optional*): The prompt or prompts not to guide the image generation to be sent to `tokenizer_3` and - `text_encoder_3`. If not defined, `negative_prompt` is used in both text-encoders + `text_encoder_3`. If not defined, `negative_prompt` is used in all the text-encoders. prompt_embeds (`torch.FloatTensor`, *optional*): Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input argument.