|
30 | 30 | XFormersAttnProcessor,
|
31 | 31 | )
|
32 | 32 | from ...schedulers import KarrasDiffusionSchedulers
|
33 |
| -from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor |
| 33 | +from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring |
34 | 34 | from ..pipeline_utils import DiffusionPipeline
|
35 | 35 | from . import StableDiffusionXLPipelineOutput
|
36 | 36 | from .watermark import StableDiffusionXLWatermarker
|
|
39 | 39 | logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
40 | 40 |
|
41 | 41 |
|
| 42 | +EXAMPLE_DOC_STRING = """ |
| 43 | + Examples: |
| 44 | + ```py |
| 45 | + >>> import torch |
| 46 | + >>> from diffusers import StableDiffusionXLInpaintPipeline |
| 47 | + >>> from diffusers.utils import load_image |
| 48 | +
|
| 49 | + >>> pipe = StableDiffusionXLInpaintPipeline.from_pretrained( |
| 50 | + ... "stabilityai/stable-diffusion-xl-base-0.9", |
| 51 | + ... torch_dtype=torch.float16, |
| 52 | + ... variant="fp16", |
| 53 | + ... use_safetensors=True, |
| 54 | + ... ) |
| 55 | + >>> pipe.to("cuda") |
| 56 | +
|
| 57 | + >>> img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" |
| 58 | + >>> mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" |
| 59 | +
|
| 60 | + >>> init_image = load_image(img_url).convert("RGB") |
| 61 | + >>> mask_image = load_image(mask_url).convert("RGB") |
| 62 | +
|
| 63 | + >>> prompt = "A majestic tiger sitting on a bench" |
| 64 | + >>> image = pipe( |
| 65 | + ... prompt=prompt, image=init_image, mask_image=mask_image, num_inference_steps=50, strength=0.80 |
| 66 | + ... ).images[0] |
| 67 | + ``` |
| 68 | +""" |
| 69 | + |
| 70 | + |
42 | 71 | # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
43 | 72 | def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
44 | 73 | """
|
@@ -810,6 +839,7 @@ def upcast_vae(self):
|
810 | 839 | self.vae.decoder.mid_block.to(dtype)
|
811 | 840 |
|
812 | 841 | @torch.no_grad()
|
| 842 | + @replace_example_docstring(EXAMPLE_DOC_STRING) |
813 | 843 | def __call__(
|
814 | 844 | self,
|
815 | 845 | prompt: Union[str, List[str]] = None,
|
@@ -948,43 +978,35 @@ def __call__(
|
948 | 978 | A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
949 | 979 | `self.processor` in
|
950 | 980 | [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
|
951 |
| - Examples: |
| 981 | + original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)): |
| 982 | + If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled. |
| 983 | + `original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as |
| 984 | + explained in section 2.2 of |
| 985 | + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). |
| 986 | + crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)): |
| 987 | + `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position |
| 988 | + `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting |
| 989 | + `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of |
| 990 | + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). |
| 991 | + target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)): |
| 992 | + For most cases, `target_size` should be set to the desired height and width of the generated image. If |
| 993 | + not specified it will default to `(width, height)`. Part of SDXL's micro-conditioning as explained in |
| 994 | + section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). |
| 995 | + aesthetic_score (`float`, *optional*, defaults to 6.0): |
| 996 | + Used to simulate an aesthetic score of the generated image by influencing the positive text condition. |
| 997 | + Part of SDXL's micro-conditioning as explained in section 2.2 of |
| 998 | + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). |
| 999 | + negative_aesthetic_score (`float`, *optional*, defaults to 2.5): |
| 1000 | + Part of SDXL's micro-conditioning as explained in section 2.2 of |
| 1001 | + [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). Can be used to |
| 1002 | + simulate an aesthetic score of the generated image by influencing the negative text condition. |
952 | 1003 |
|
953 |
| - ```py |
954 |
| - >>> import PIL |
955 |
| - >>> import requests |
956 |
| - >>> import torch |
957 |
| - >>> from io import BytesIO |
958 |
| -
|
959 |
| - >>> from diffusers import StableDiffusionInpaintPipeline |
960 |
| -
|
961 |
| -
|
962 |
| - >>> def download_image(url): |
963 |
| - ... response = requests.get(url) |
964 |
| - ... return PIL.Image.open(BytesIO(response.content)).convert("RGB") |
965 |
| -
|
966 |
| -
|
967 |
| - >>> img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" |
968 |
| - >>> mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" |
969 |
| -
|
970 |
| - >>> init_image = download_image(img_url).resize((512, 512)) |
971 |
| - >>> mask_image = download_image(mask_url).resize((512, 512)) |
972 |
| -
|
973 |
| - >>> pipe = StableDiffusionInpaintPipeline.from_pretrained( |
974 |
| - ... "runwayml/stable-diffusion-inpainting", torch_dtype=torch.float16 |
975 |
| - ... ) |
976 |
| - >>> pipe = pipe.to("cuda") |
977 |
| -
|
978 |
| - >>> prompt = "Face of a yellow cat, high resolution, sitting on a park bench" |
979 |
| - >>> image = pipe(prompt=prompt, image=init_image, mask_image=mask_image).images[0] |
980 |
| - ``` |
| 1004 | + Examples: |
981 | 1005 |
|
982 | 1006 | Returns:
|
983 |
| - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: |
984 |
| - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. |
985 |
| - When returning a tuple, the first element is a list with the generated images, and the second element is a |
986 |
| - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" |
987 |
| - (nsfw) content, according to the `safety_checker`. |
| 1007 | + [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] or `tuple`: |
| 1008 | + [`~pipelines.stable_diffusion.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a |
| 1009 | + `tuple. `tuple. When returning a tuple, the first element is a list with the generated images. |
988 | 1010 | """
|
989 | 1011 | # 0. Default height and width to unet
|
990 | 1012 | height = height or self.unet.config.sample_size * self.vae_scale_factor
|
|
0 commit comments