From 5960184a92107d5862920954daae698c3af0bf0b Mon Sep 17 00:00:00 2001 From: Nilesh Kokane Date: Fri, 5 Apr 2024 11:31:11 +0530 Subject: [PATCH 1/3] Check for latents, before calling prepare_latents - sdxlImg2Img --- .../pipeline_stable_diffusion_xl_img2img.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index b72b19d5c1ef..b98ea279c1a2 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -1247,17 +1247,19 @@ def denoising_value_valid(dnv): latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) add_noise = True if self.denoising_start is None else False + # 6. Prepare latent variables - latents = self.prepare_latents( - image, - latent_timestep, - batch_size, - num_images_per_prompt, - prompt_embeds.dtype, - device, - generator, - add_noise, - ) + if latents is None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size, + num_images_per_prompt, + prompt_embeds.dtype, + device, + generator, + add_noise, + ) # 7. Prepare extra step kwargs. extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) From 8ef365b79f6dfae4c424a39cf23c23eb69a5d3eb Mon Sep 17 00:00:00 2001 From: Nilesh Kokane Date: Mon, 15 Apr 2024 10:54:00 +0530 Subject: [PATCH 2/3] Added latents check for all the img2img pipeline --- .../clip_guided_stable_diffusion_img2img.py | 13 ++++++++--- .../community/latent_consistency_img2img.py | 23 ++++++++++--------- .../stable_diffusion_controlnet_img2img.py | 19 +++++++-------- ...le_diffusion_controlnet_inpaint_img2img.py | 19 +++++++-------- .../controlnet/pipeline_controlnet_img2img.py | 19 +++++++-------- .../pipeline_controlnet_sd_xl_img2img.py | 21 +++++++++-------- .../pipeline_latent_consistency_img2img.py | 7 +++--- .../shap_e/pipeline_shap_e_img2img.py | 18 +++++++-------- .../pipeline_stable_unclip_img2img.py | 21 +++++++++-------- 9 files changed, 87 insertions(+), 73 deletions(-) diff --git a/examples/community/clip_guided_stable_diffusion_img2img.py b/examples/community/clip_guided_stable_diffusion_img2img.py index 434d5253679a..c8e0a9094f22 100644 --- a/examples/community/clip_guided_stable_diffusion_img2img.py +++ b/examples/community/clip_guided_stable_diffusion_img2img.py @@ -359,9 +359,16 @@ def __call__( # Preprocess image image = preprocess(image, width, height) - latents = self.prepare_latents( - image, latent_timestep, batch_size, num_images_per_prompt, text_embeddings.dtype, self.device, generator - ) + if latents is None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size, + num_images_per_prompt, + text_embeddings.dtype, + self.device, + generator, + ) if clip_guidance_scale > 0: if clip_prompt is not None: diff --git a/examples/community/latent_consistency_img2img.py b/examples/community/latent_consistency_img2img.py index 35cd74166c68..98078a2eef96 100644 --- a/examples/community/latent_consistency_img2img.py +++ b/examples/community/latent_consistency_img2img.py @@ -335,17 +335,18 @@ def __call__( # 5. Prepare latent variable num_channels_latents = self.unet.config.in_channels - latents = self.prepare_latents( - image, - latent_timestep, - batch_size * num_images_per_prompt, - num_channels_latents, - height, - width, - prompt_embeds.dtype, - device, - latents, - ) + if latents is None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + prompt_embeds.dtype, + device, + latents, + ) bs = batch_size * num_images_per_prompt # 6. Get Guidance Scale Embedding diff --git a/examples/community/stable_diffusion_controlnet_img2img.py b/examples/community/stable_diffusion_controlnet_img2img.py index 5f9083616a84..74674e65f0ef 100644 --- a/examples/community/stable_diffusion_controlnet_img2img.py +++ b/examples/community/stable_diffusion_controlnet_img2img.py @@ -802,15 +802,16 @@ def __call__( latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) # 6. Prepare latent variables - latents = self.prepare_latents( - image, - latent_timestep, - batch_size, - num_images_per_prompt, - prompt_embeds.dtype, - device, - generator, - ) + if latents is None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size, + num_images_per_prompt, + prompt_embeds.dtype, + device, + generator, + ) # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) diff --git a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py index d056eb112165..14c4e4aa6d4e 100644 --- a/examples/community/stable_diffusion_controlnet_inpaint_img2img.py +++ b/examples/community/stable_diffusion_controlnet_inpaint_img2img.py @@ -907,15 +907,16 @@ def __call__( latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) # 6. Prepare latent variables - latents = self.prepare_latents( - image, - latent_timestep, - batch_size, - num_images_per_prompt, - prompt_embeds.dtype, - device, - generator, - ) + if latents is None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size, + num_images_per_prompt, + prompt_embeds.dtype, + device, + generator, + ) mask_image_latents = self.prepare_mask_latents( mask_image, diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index a5a0aaed0f2e..022f30d819d8 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -1169,15 +1169,16 @@ def __call__( self._num_timesteps = len(timesteps) # 6. Prepare latent variables - latents = self.prepare_latents( - image, - latent_timestep, - batch_size, - num_images_per_prompt, - prompt_embeds.dtype, - device, - generator, - ) + if latents is None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size, + num_images_per_prompt, + prompt_embeds.dtype, + device, + generator, + ) # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index d32e7d81649d..ae2fbe05c427 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -1429,16 +1429,17 @@ def __call__( self._num_timesteps = len(timesteps) # 6. Prepare latent variables - latents = self.prepare_latents( - image, - latent_timestep, - batch_size, - num_images_per_prompt, - prompt_embeds.dtype, - device, - generator, - True, - ) + if latents in None: + latents = self.prepare_latents( + image, + latent_timestep, + batch_size, + num_images_per_prompt, + prompt_embeds.dtype, + device, + generator, + True, + ) # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index 8957d7140ef1..fce694d1d0bd 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -872,9 +872,10 @@ def __call__( else self.scheduler.config.original_inference_steps ) latent_timestep = timesteps[:1] - latents = self.prepare_latents( - image, latent_timestep, batch_size, num_images_per_prompt, prompt_embeds.dtype, device, generator - ) + if latents is None: + latents = self.prepare_latents( + image, latent_timestep, batch_size, num_images_per_prompt, prompt_embeds.dtype, device, generator + ) bs = batch_size * num_images_per_prompt # 6. Get Guidance Scale Embedding diff --git a/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py b/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py index 02e32633cedb..700ca5db6f07 100644 --- a/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +++ b/src/diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py @@ -239,15 +239,15 @@ def __call__( num_embeddings = self.prior.config.num_embeddings embedding_dim = self.prior.config.embedding_dim - - latents = self.prepare_latents( - (batch_size, num_embeddings * embedding_dim), - image_embeds.dtype, - device, - generator, - latents, - self.scheduler, - ) + if latents is None: + latents = self.prepare_latents( + (batch_size, num_embeddings * embedding_dim), + image_embeds.dtype, + device, + generator, + latents, + self.scheduler, + ) # YiYi notes: for testing only to match ldm, we can directly create a latents with desired shape: batch_size, num_embeddings, embedding_dim latents = latents.reshape(latents.shape[0], num_embeddings, embedding_dim) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py index fe19b4de3127..134ec39effc5 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py @@ -786,16 +786,17 @@ def __call__( # 6. Prepare latent variables num_channels_latents = self.unet.config.in_channels - latents = self.prepare_latents( - batch_size=batch_size, - num_channels_latents=num_channels_latents, - height=height, - width=width, - dtype=prompt_embeds.dtype, - device=device, - generator=generator, - latents=latents, - ) + if latents is None: + latents = self.prepare_latents( + batch_size=batch_size, + num_channels_latents=num_channels_latents, + height=height, + width=width, + dtype=prompt_embeds.dtype, + device=device, + generator=generator, + latents=latents, + ) # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) From e1455a9c9469bf4d76a0eb344184ca0e5096276c Mon Sep 17 00:00:00 2001 From: Nilesh Kokane Date: Sun, 28 Apr 2024 10:25:22 +0530 Subject: [PATCH 3/3] Fixed silly mistake while checking latents as None --- .../pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index ae2fbe05c427..d7889a9efbb5 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -1429,7 +1429,7 @@ def __call__( self._num_timesteps = len(timesteps) # 6. Prepare latent variables - if latents in None: + if latents is None: latents = self.prepare_latents( image, latent_timestep,