diff --git a/docs/source/en/api/pipelines/latent_consistency_models.md b/docs/source/en/api/pipelines/latent_consistency_models.md index 927b28a5a038..1a7c14fb1a77 100644 --- a/docs/source/en/api/pipelines/latent_consistency_models.md +++ b/docs/source/en/api/pipelines/latent_consistency_models.md @@ -8,47 +8,8 @@ The abstract of the [paper](https://arxiv.org/pdf/2310.04378.pdf) is as follows: A demo for the [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) checkpoint can be found [here](https://huggingface.co/spaces/SimianLuo/Latent_Consistency_Model). -This pipeline was contributed by [luosiallen](https://luosiallen.github.io/) and [dg845](https://github.com/dg845). +The pipelines were contributed by [luosiallen](https://luosiallen.github.io/), [nagolinc](https://github.com/nagolinc), and [dg845](https://github.com/dg845). -## text-to-image - -```python -import torch -from diffusers import DiffusionPipeline - -pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32) - -# To save GPU memory, torch.float16 can be used, but it may compromise image quality. -pipe.to(torch_device="cuda", torch_dtype=torch.float32) - -prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k" - -# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps. -num_inference_steps = 4 - -images = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0).images -``` - -## image-to-image - -```python -import torch -from diffusers import AutoPipelineForImage2Image -import PIL - -pipe = AutoPipelineForImage2Image.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32) - -# To save GPU memory, torch.float16 can be used, but it may compromise image quality. -pipe.to(torch_device="cuda", torch_dtype=torch.float32) - -prompt = "High altitude snowy mountains" -image = PIL.Image.open("./snowy_mountains.png") - -# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps. -num_inference_steps = 4 - -images = pipe(prompt=prompt, image=image, num_inference_steps=num_inference_steps, guidance_scale=8.0).images -``` ## LatentConsistencyModelPipeline @@ -62,6 +23,8 @@ images = pipe(prompt=prompt, image=image, num_inference_steps=num_inference_step - enable_vae_tiling - disable_vae_tiling +## LatentConsistencyModelImg2ImgPipeline + [[autodoc]] LatentConsistencyModelImg2ImgPipeline - all - __call__ diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index 99d2d2e5c4d7..8d0a2fe1b5b5 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -27,7 +27,14 @@ from ...models import AutoencoderKL, UNet2DConditionModel from ...models.lora import adjust_lora_scale_text_encoder from ...schedulers import LCMScheduler -from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers +from ...utils import ( + USE_PEFT_BACKEND, + deprecate, + logging, + replace_example_docstring, + scale_lora_layers, + unscale_lora_layers, +) from ...utils.torch_utils import randn_tensor from ..pipeline_utils import DiffusionPipeline from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -36,6 +43,32 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +EXAMPLE_DOC_STRING = """ + Examples: + ```py + >>> from diffusers import AutoPipelineForImage2Image + >>> import torch + >>> import PIL + + >>> pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") + >>> # To save GPU memory, torch.float16 can be used, but it may compromise image quality. + >>> pipe.to(torch_device="cuda", torch_dtype=torch.float32) + + >>> prompt = "High altitude snowy mountains" + >>> image = PIL.Image.open("./snowy_mountains.png") + + >>> # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps. + >>> num_inference_steps = 4 + >>> images = pipe( + ... prompt=prompt, image=image, num_inference_steps=num_inference_steps, guidance_scale=8.0 + ... ).images + + >>> images[0].save("image.png") + ``` + +""" + + class LatentConsistencyModelImg2ImgPipeline( DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin ): @@ -486,6 +519,7 @@ def get_timesteps(self, num_inference_steps, strength, device): return timesteps, num_inference_steps - t_start @torch.no_grad() + @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( self, prompt: Union[str, List[str]] = None, @@ -559,6 +593,8 @@ def __call__( Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that the output of the pre-final layer will be used for computing the prompt embeddings. + Examples: + Returns: [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned, diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index 04dcef4152d4..dca3f7e21e8a 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -26,12 +26,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel from ...models.lora import adjust_lora_scale_text_encoder from ...schedulers import LCMScheduler -from ...utils import ( - USE_PEFT_BACKEND, - logging, - scale_lora_layers, - unscale_lora_layers, -) +from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers from ...utils.torch_utils import randn_tensor from ..pipeline_utils import DiffusionPipeline from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker @@ -39,6 +34,25 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name +EXAMPLE_DOC_STRING = """ + Examples: + ```py + >>> from diffusers import DiffusionPipeline + >>> import torch + + >>> pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7") + >>> # To save GPU memory, torch.float16 can be used, but it may compromise image quality. + >>> pipe.to(torch_device="cuda", torch_dtype=torch.float32) + + >>> prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k" + + >>> # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps. + >>> num_inference_steps = 4 + >>> images = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0).images + >>> images[0].save("image.png") + ``` +""" + class LatentConsistencyModelPipeline( DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin @@ -477,6 +491,7 @@ def check_inputs( raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") @torch.no_grad() + @replace_example_docstring(EXAMPLE_DOC_STRING) def __call__( self, prompt: Union[str, List[str]] = None, @@ -550,6 +565,8 @@ def __call__( Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that the output of the pre-final layer will be used for computing the prompt embeddings. + Examples: + Returns: [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,