From 98de29d156f252e394eaa6b76f4052e772572daa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Tolga=20Cang=C3=B6z?= Date: Wed, 28 Feb 2024 12:53:26 +0300 Subject: [PATCH 1/4] Add copyright notice to relevant files and fix typos --- .../en/api/models/consistency_decoder_vae.md | 14 +++++++++++++- .../api/pipelines/stable_diffusion/sdxl_turbo.md | 2 +- .../en/api/schedulers/consistency_decoder.md | 16 ++++++++++++++-- docs/source/en/using-diffusers/sdxl_turbo.md | 12 ++++++------ 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/docs/source/en/api/models/consistency_decoder_vae.md b/docs/source/en/api/models/consistency_decoder_vae.md index b45f7fa059dc..94a64820ebb1 100644 --- a/docs/source/en/api/models/consistency_decoder_vae.md +++ b/docs/source/en/api/models/consistency_decoder_vae.md @@ -1,6 +1,18 @@ + + # Consistency Decoder -Consistency decoder can be used to decode the latents from the denoising UNet in the [`StableDiffusionPipeline`]. This decoder was introduced in the [DALL-E 3 technical report](https://openai.com/dall-e-3). +Consistency decoder can be used to decode the latents from the denoising UNet in the [`StableDiffusionPipeline`]. This decoder was introduced in the [DALL-E 3 technical report](https://openai.com/dall-e-3). The original codebase can be found at [openai/consistencydecoder](https://github.com/openai/consistencydecoder). diff --git a/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md b/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md index 935de29c9619..764685a73cfb 100644 --- a/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md +++ b/docs/source/en/api/pipelines/stable_diffusion/sdxl_turbo.md @@ -21,7 +21,7 @@ The abstract from the paper is: ## Tips - SDXL Turbo uses the exact same architecture as [SDXL](./stable_diffusion_xl), which means it also has the same API. Please refer to the [SDXL](./stable_diffusion_xl) API reference for more details. -- SDXL Turbo should disable guidance scale by setting `guidance_scale=0.0` +- SDXL Turbo should disable guidance scale by setting `guidance_scale=0.0`. - SDXL Turbo should use `timestep_spacing='trailing'` for the scheduler and use between 1 and 4 steps. - SDXL Turbo has been trained to generate images of size 512x512. - SDXL Turbo is open-access, but not open-source meaning that one might have to buy a model license in order to use it for commercial applications. Make sure to read the [official model card](https://huggingface.co/stabilityai/sdxl-turbo) to learn more. diff --git a/docs/source/en/api/schedulers/consistency_decoder.md b/docs/source/en/api/schedulers/consistency_decoder.md index 6c937b913279..a9eaa5336dcd 100644 --- a/docs/source/en/api/schedulers/consistency_decoder.md +++ b/docs/source/en/api/schedulers/consistency_decoder.md @@ -1,9 +1,21 @@ + + # ConsistencyDecoderScheduler -This scheduler is a part of the [`ConsistencyDecoderPipeline`] and was introduced in [DALL-E 3](https://openai.com/dall-e-3). +This scheduler is a part of the [`ConsistencyDecoderPipeline`] and was introduced in [DALL-E 3](https://openai.com/dall-e-3). The original codebase can be found at [openai/consistency_models](https://github.com/openai/consistency_models). ## ConsistencyDecoderScheduler -[[autodoc]] schedulers.scheduling_consistency_decoder.ConsistencyDecoderScheduler \ No newline at end of file +[[autodoc]] schedulers.scheduling_consistency_decoder.ConsistencyDecoderScheduler diff --git a/docs/source/en/using-diffusers/sdxl_turbo.md b/docs/source/en/using-diffusers/sdxl_turbo.md index 4e960c320c97..7ef76e7aa6dc 100644 --- a/docs/source/en/using-diffusers/sdxl_turbo.md +++ b/docs/source/en/using-diffusers/sdxl_turbo.md @@ -31,7 +31,7 @@ Before you begin, make sure you have the following libraries installed: Model weights may be stored in separate subfolders on the Hub or locally, in which case, you should use the [`~StableDiffusionXLPipeline.from_pretrained`] method: ```py -from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image +from diffusers import AutoPipelineForText2Image import torch pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") @@ -53,7 +53,7 @@ pipeline = pipeline.to("cuda") For text-to-image, pass a text prompt. By default, SDXL Turbo generates a 512x512 image, and that resolution gives the best results. You can try setting the `height` and `width` parameters to 768x768 or 1024x1024, but you should expect quality degradations when doing so. -Make sure to set `guidance_scale` to 0.0 to disable, as the model was trained without it. A single inference step is enough to generate high quality images. +Make sure to set `guidance_scale` to 0.0 to disable, as the model was trained without it. A single inference step is enough to generate high quality images. Increasing the number of steps to 2, 3 or 4 should improve image quality. ```py @@ -75,7 +75,7 @@ image ## Image-to-image -For image-to-image generation, make sure that `num_inference_steps * strength` is larger or equal to 1. +For image-to-image generation, make sure that `num_inference_steps * strength` is larger or equal to 1. The image-to-image pipeline will run for `int(num_inference_steps * strength)` steps, e.g. `0.5 * 2.0 = 1` step in our example below. @@ -84,14 +84,14 @@ from diffusers import AutoPipelineForImage2Image from diffusers.utils import load_image, make_image_grid # use from_pipe to avoid consuming additional memory when loading a checkpoint -pipeline = AutoPipelineForImage2Image.from_pipe(pipeline_text2image).to("cuda") +pipeline_image2image = AutoPipelineForImage2Image.from_pipe(pipeline_text2image).to("cuda") init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png") init_image = init_image.resize((512, 512)) prompt = "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k" -image = pipeline(prompt, image=init_image, strength=0.5, guidance_scale=0.0, num_inference_steps=2).images[0] +image = pipeline_image2image(prompt, image=init_image, strength=0.5, guidance_scale=0.0, num_inference_steps=2).images[0] make_image_grid([init_image, image], rows=1, cols=2) ``` @@ -101,7 +101,7 @@ make_image_grid([init_image, image], rows=1, cols=2) ## Speed-up SDXL Turbo even more -- Compile the UNet if you are using PyTorch version 2 or better. The first inference run will be very slow, but subsequent ones will be much faster. +- Compile the UNet if you are using PyTorch version 2.0 or higher. The first inference run will be very slow, but subsequent ones will be much faster. ```py pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) From f734457e6d526093edbf1af1e9d86d9dbc7ab4db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Tolga=20Cang=C3=B6z?= Date: Wed, 28 Feb 2024 14:04:51 +0300 Subject: [PATCH 2/4] Set `timestep_spacing` parameter of `StableDiffusionXLPipeline`'s scheduler to `'trailing'`. --- docs/source/en/using-diffusers/sdxl_turbo.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/en/using-diffusers/sdxl_turbo.md b/docs/source/en/using-diffusers/sdxl_turbo.md index 7ef76e7aa6dc..14002d251eff 100644 --- a/docs/source/en/using-diffusers/sdxl_turbo.md +++ b/docs/source/en/using-diffusers/sdxl_turbo.md @@ -49,6 +49,8 @@ pipeline = StableDiffusionXLPipeline.from_single_file( pipeline = pipeline.to("cuda") ``` +In this method, you need to set `timestep_spacing` parameter of the `StableDiffusionXLPipeline`'s scheduler to `'trailing'`. + ## Text-to-image For text-to-image, pass a text prompt. By default, SDXL Turbo generates a 512x512 image, and that resolution gives the best results. You can try setting the `height` and `width` parameters to 768x768 or 1024x1024, but you should expect quality degradations when doing so. From cf351ee785509ae12c31626dea9b531ed1a35d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Tolga=20Cang=C3=B6z?= Date: Thu, 29 Feb 2024 20:34:43 +0300 Subject: [PATCH 3/4] Update `StableDiffusionXLPipeline.from_single_file` by including EulerAncestralDiscreteScheduler with `timestep_spacing="trailing"` param. --- docs/source/en/using-diffusers/sdxl_turbo.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/en/using-diffusers/sdxl_turbo.md b/docs/source/en/using-diffusers/sdxl_turbo.md index 14002d251eff..0e7ee4db27f8 100644 --- a/docs/source/en/using-diffusers/sdxl_turbo.md +++ b/docs/source/en/using-diffusers/sdxl_turbo.md @@ -41,15 +41,17 @@ pipeline = pipeline.to("cuda") You can also use the [`~StableDiffusionXLPipeline.from_single_file`] method to load a model checkpoint stored in a single file format (`.ckpt` or `.safetensors`) from the Hub or locally: ```py -from diffusers import StableDiffusionXLPipeline +from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler import torch pipeline = StableDiffusionXLPipeline.from_single_file( - "https://huggingface.co/stabilityai/sdxl-turbo/blob/main/sd_xl_turbo_1.0_fp16.safetensors", torch_dtype=torch.float16) + "https://huggingface.co/stabilityai/sdxl-turbo/blob/main/sd_xl_turbo_1.0_fp16.safetensors", + torch_dtype=torch.float16, variant="fp16") pipeline = pipeline.to("cuda") +pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing") ``` -In this method, you need to set `timestep_spacing` parameter of the `StableDiffusionXLPipeline`'s scheduler to `'trailing'`. +When using [`~StableDiffusionXLPipeline.from_single_file`], users can play around with the scheduler config values, but the main one is to set `timestep_spacing="trailing"`. ## Text-to-image From 3b481dd4db3377bf06955bc429daa8756fde2a9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Tolga=20Cang=C3=B6z?= Date: Thu, 29 Feb 2024 23:06:14 +0300 Subject: [PATCH 4/4] Update model loading method in SDXL Turbo documentation --- docs/source/en/using-diffusers/sdxl_turbo.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/en/using-diffusers/sdxl_turbo.md b/docs/source/en/using-diffusers/sdxl_turbo.md index 0e7ee4db27f8..9ec0e94ec1f6 100644 --- a/docs/source/en/using-diffusers/sdxl_turbo.md +++ b/docs/source/en/using-diffusers/sdxl_turbo.md @@ -38,7 +38,7 @@ pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", t pipeline = pipeline.to("cuda") ``` -You can also use the [`~StableDiffusionXLPipeline.from_single_file`] method to load a model checkpoint stored in a single file format (`.ckpt` or `.safetensors`) from the Hub or locally: +You can also use the [`~StableDiffusionXLPipeline.from_single_file`] method to load a model checkpoint stored in a single file format (`.ckpt` or `.safetensors`) from the Hub or locally. For this loading method, you need to set `timestep_spacing="trailing"` (feel free to experiment with the other scheduler config values to get better results): ```py from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler @@ -51,8 +51,6 @@ pipeline = pipeline.to("cuda") pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing") ``` -When using [`~StableDiffusionXLPipeline.from_single_file`], users can play around with the scheduler config values, but the main one is to set `timestep_spacing="trailing"`. - ## Text-to-image For text-to-image, pass a text prompt. By default, SDXL Turbo generates a 512x512 image, and that resolution gives the best results. You can try setting the `height` and `width` parameters to 768x768 or 1024x1024, but you should expect quality degradations when doing so.