Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 3 additions & 40 deletions docs/source/en/api/pipelines/latent_consistency_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,8 @@ The abstract of the [paper](https://arxiv.org/pdf/2310.04378.pdf) is as follows:

A demo for the [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7) checkpoint can be found [here](https://huggingface.co/spaces/SimianLuo/Latent_Consistency_Model).

This pipeline was contributed by [luosiallen](https://luosiallen.github.io/) and [dg845](https://github.com/dg845).
The pipelines were contributed by [luosiallen](https://luosiallen.github.io/), [nagolinc](https://github.com/nagolinc), and [dg845](https://github.com/dg845).

## text-to-image

```python
import torch
from diffusers import DiffusionPipeline

pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32)

# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4

images = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0).images
```

## image-to-image

```python
import torch
from diffusers import AutoPipelineForImage2Image
import PIL

pipe = AutoPipelineForImage2Image.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", torch_dtype=torch.float32)

# To save GPU memory, torch.float16 can be used, but it may compromise image quality.
pipe.to(torch_device="cuda", torch_dtype=torch.float32)

prompt = "High altitude snowy mountains"
image = PIL.Image.open("./snowy_mountains.png")

# Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
num_inference_steps = 4

images = pipe(prompt=prompt, image=image, num_inference_steps=num_inference_steps, guidance_scale=8.0).images
```

## LatentConsistencyModelPipeline

Expand All @@ -62,6 +23,8 @@ images = pipe(prompt=prompt, image=image, num_inference_steps=num_inference_step
- enable_vae_tiling
- disable_vae_tiling

## LatentConsistencyModelImg2ImgPipeline

[[autodoc]] LatentConsistencyModelImg2ImgPipeline
- all
- __call__
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,14 @@
from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import LCMScheduler
from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
from ...utils import (
USE_PEFT_BACKEND,
deprecate,
logging,
replace_example_docstring,
scale_lora_layers,
unscale_lora_layers,
)
from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
Expand All @@ -36,6 +43,32 @@
logger = logging.get_logger(__name__) # pylint: disable=invalid-name


EXAMPLE_DOC_STRING = """
Examples:
```py
>>> from diffusers import AutoPipelineForImage2Image
>>> import torch
>>> import PIL

>>> pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7")
>>> # To save GPU memory, torch.float16 can be used, but it may compromise image quality.
>>> pipe.to(torch_device="cuda", torch_dtype=torch.float32)

>>> prompt = "High altitude snowy mountains"
>>> image = PIL.Image.open("./snowy_mountains.png")

>>> # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
>>> num_inference_steps = 4
>>> images = pipe(
... prompt=prompt, image=image, num_inference_steps=num_inference_steps, guidance_scale=8.0
... ).images

>>> images[0].save("image.png")
```

"""


class LatentConsistencyModelImg2ImgPipeline(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
):
Expand Down Expand Up @@ -486,6 +519,7 @@ def get_timesteps(self, num_inference_steps, strength, device):
return timesteps, num_inference_steps - t_start

@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
self,
prompt: Union[str, List[str]] = None,
Expand Down Expand Up @@ -559,6 +593,8 @@ def __call__(
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.

Examples:

Returns:
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,33 @@
from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import LCMScheduler
from ...utils import (
USE_PEFT_BACKEND,
logging,
scale_lora_layers,
unscale_lora_layers,
)
from ...utils import USE_PEFT_BACKEND, logging, replace_example_docstring, scale_lora_layers, unscale_lora_layers
from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker


logger = logging.get_logger(__name__) # pylint: disable=invalid-name

EXAMPLE_DOC_STRING = """
Examples:
```py
>>> from diffusers import DiffusionPipeline
>>> import torch

>>> pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7")
>>> # To save GPU memory, torch.float16 can be used, but it may compromise image quality.
>>> pipe.to(torch_device="cuda", torch_dtype=torch.float32)

>>> prompt = "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"

>>> # Can be set to 1~50 steps. LCM support fast inference even <= 4 steps. Recommend: 1~8 steps.
>>> num_inference_steps = 4
>>> images = pipe(prompt=prompt, num_inference_steps=num_inference_steps, guidance_scale=8.0).images
>>> images[0].save("image.png")
```
"""


class LatentConsistencyModelPipeline(
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
Expand Down Expand Up @@ -477,6 +491,7 @@ def check_inputs(
raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}")

@torch.no_grad()
@replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__(
self,
prompt: Union[str, List[str]] = None,
Expand Down Expand Up @@ -550,6 +565,8 @@ def __call__(
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
the output of the pre-final layer will be used for computing the prompt embeddings.

Examples:

Returns:
[`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
Expand Down