diff --git a/docs/source/en/training/distributed_inference.md b/docs/source/en/training/distributed_inference.md
index 99c6acfe8d96..72bb5f5fd7fe 100644
--- a/docs/source/en/training/distributed_inference.md
+++ b/docs/source/en/training/distributed_inference.md
@@ -1,3 +1,15 @@
+
+
# Distributed inference with multiple GPUs
On distributed setups, you can run inference across multiple GPUs with 🤗 [Accelerate](https://huggingface.co/docs/accelerate/index) or [PyTorch Distributed](https://pytorch.org/tutorials/beginner/dist_overview.html), which is useful for generating with multiple prompts in parallel.
@@ -13,6 +25,7 @@ To begin, create a Python file and initialize an [`accelerate.PartialState`] to
Now use the [`~accelerate.PartialState.split_between_processes`] utility as a context manager to automatically distribute the prompts between the number of processes.
```py
+import torch
from accelerate import PartialState
from diffusers import DiffusionPipeline
@@ -92,4 +105,4 @@ Once you've completed the inference script, use the `--nproc_per_node` argument
```bash
torchrun run_distributed.py --nproc_per_node=2
-```
\ No newline at end of file
+```
diff --git a/docs/source/en/using-diffusers/control_brightness.md b/docs/source/en/using-diffusers/control_brightness.md
index 17c107ba57b8..c5f9870776dc 100644
--- a/docs/source/en/using-diffusers/control_brightness.md
+++ b/docs/source/en/using-diffusers/control_brightness.md
@@ -34,15 +34,15 @@ Next, configure the following parameters in the [`DDIMScheduler`]:
2. `timestep_spacing="trailing"`, starts sampling from the last timestep
```py
->>> from diffusers import DiffusionPipeline, DDIMScheduler
+from diffusers import DiffusionPipeline, DDIMScheduler
->>> pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True)
-# switch the scheduler in the pipeline to use the DDIMScheduler
+pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True)
->>> pipeline.scheduler = DDIMScheduler.from_config(
-... pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing"
-... )
->>> pipeline.to("cuda")
+# switch the scheduler in the pipeline to use the DDIMScheduler
+pipeline.scheduler = DDIMScheduler.from_config(
+ pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing"
+)
+pipeline.to("cuda")
```
Finally, in your call to the pipeline, set `guidance_rescale` to prevent overexposure:
@@ -50,6 +50,7 @@ Finally, in your call to the pipeline, set `guidance_rescale` to prevent overexp
```py
prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k"
image = pipeline(prompt, guidance_rescale=0.7).images[0]
+image
```
diff --git a/docs/source/en/using-diffusers/freeu.md b/docs/source/en/using-diffusers/freeu.md
index 4f3c64096705..c5f3577ae3aa 100644
--- a/docs/source/en/using-diffusers/freeu.md
+++ b/docs/source/en/using-diffusers/freeu.md
@@ -23,7 +23,7 @@ However, the skip connection can sometimes introduce unnatural image details. [F
FreeU is applied during inference and it does not require any additional training. The technique works for different tasks such as text-to-image, image-to-image, and text-to-video.
-In this guide, you will apply FreeU to the [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`TextToVideoSDPipeline`].
+In this guide, you will apply FreeU to the [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`TextToVideoSDPipeline`]. You need to install Diffusers from source to run the examples below.
## StableDiffusionPipeline
@@ -58,6 +58,7 @@ And then run inference:
prompt = "A squirrel eating a burger"
seed = 2023
image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0]
+image
```
The figure below compares non-FreeU and FreeU results respectively for the same hyperparameters used above (`prompt` and `seed`):
@@ -80,9 +81,9 @@ seed = 2023
pipeline.enable_freeu(s1=0.9, s2=0.2, b1=1.1, b2=1.2)
image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0]
+image
```
-

## Stable Diffusion XL
@@ -100,13 +101,13 @@ pipeline = DiffusionPipeline.from_pretrained(
prompt = "A squirrel eating a burger"
seed = 2023
-# Comes from
+# Comes from
# https://wandb.ai/nasirk24/UNET-FreeU-SDXL/reports/FreeU-SDXL-Optimal-Parameters--Vmlldzo1NDg4NTUw
pipeline.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2)
image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0]
+image
```
-

## Text-to-video generation
@@ -119,8 +120,7 @@ from diffusers.utils import export_to_video
import torch
model_id = "cerspense/zeroscope_v2_576w"
-pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16).to("cuda")
-pipe = pipe.to("cuda")
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
prompt = "an astronaut riding a horse on mars"
seed = 2023
@@ -132,4 +132,4 @@ video_frames = pipe(prompt, height=320, width=576, num_frames=30, generator=torc
export_to_video(video_frames, "astronaut_rides_horse.mp4")
```
-Thanks to [kadirnar](https://github.com/kadirnar/) for helping to integrate the feature, and to [justindujardin](https://github.com/justindujardin) for the helpful discussions.
\ No newline at end of file
+Thanks to [kadirnar](https://github.com/kadirnar/) for helping to integrate the feature, and to [justindujardin](https://github.com/justindujardin) for the helpful discussions.
diff --git a/docs/source/en/using-diffusers/reusing_seeds.md b/docs/source/en/using-diffusers/reusing_seeds.md
index 7cbaf2643202..d2638b469e30 100644
--- a/docs/source/en/using-diffusers/reusing_seeds.md
+++ b/docs/source/en/using-diffusers/reusing_seeds.md
@@ -16,7 +16,7 @@ specific language governing permissions and limitations under the License.
A common way to improve the quality of generated images is with *deterministic batch generation*, generate a batch of images and select one image to improve with a more detailed prompt in a second round of inference. The key is to pass a list of [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html#generator)'s to the pipeline for batched image generation, and tie each `Generator` to a seed so you can reuse it for an image.
-Let's use [`runwayml/stable-diffusion-v1-5`](runwayml/stable-diffusion-v1-5) for example, and generate several versions of the following prompt:
+Let's use [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5) for example, and generate several versions of the following prompt:
```py
prompt = "Labrador in the style of Vermeer"
@@ -25,27 +25,27 @@ prompt = "Labrador in the style of Vermeer"
Instantiate a pipeline with [`DiffusionPipeline.from_pretrained`] and place it on a GPU (if available):
```python
->>> from diffusers import DiffusionPipeline
-
->>> pipe = DiffusionPipeline.from_pretrained(
-... "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True
-... )
->>> pipe = pipe.to("cuda")
+import torch
+from diffusers import DiffusionPipeline
+from diffusers.utils import make_image_grid
+
+pipe = DiffusionPipeline.from_pretrained(
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True
+)
+pipe = pipe.to("cuda")
```
-Now, define four different `Generator`'s and assign each `Generator` a seed (`0` to `3`) so you can reuse a `Generator` later for a specific image:
+Now, define four different `Generator`s and assign each `Generator` a seed (`0` to `3`) so you can reuse a `Generator` later for a specific image:
```python
->>> import torch
-
->>> generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)]
+generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)]
```
Generate the images and have a look:
```python
->>> images = pipe(prompt, generator=generator, num_images_per_prompt=4).images
->>> images
+images = pipe(prompt, generator=generator, num_images_per_prompt=4).images
+make_image_grid(images, rows=2, cols=2)
```

@@ -60,8 +60,8 @@ generator = [torch.Generator(device="cuda").manual_seed(0) for i in range(4)]
Create four generators with seed `0`, and generate another batch of images, all of which should look like the first image from the previous round!
```python
->>> images = pipe(prompt, generator=generator).images
->>> images
+images = pipe(prompt, generator=generator).images
+make_image_grid(images, rows=2, cols=2)
```

diff --git a/docs/source/en/using-diffusers/textual_inversion_inference.md b/docs/source/en/using-diffusers/textual_inversion_inference.md
index 6e690c62f76a..7583dee63e3b 100644
--- a/docs/source/en/using-diffusers/textual_inversion_inference.md
+++ b/docs/source/en/using-diffusers/textual_inversion_inference.md
@@ -18,26 +18,12 @@ The [`StableDiffusionPipeline`] supports textual inversion, a technique that ena
This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you're interested in teaching a model new concepts with textual inversion, take a look at the [Textual Inversion](../training/text_inversion) training guide.
-Login to your Hugging Face account:
-
-```py
-from huggingface_hub import notebook_login
-
-notebook_login()
-```
-
Import the necessary libraries:
```py
-import os
import torch
-
-import PIL
-from PIL import Image
-
from diffusers import StableDiffusionPipeline
from diffusers.utils import make_image_grid
-from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
```
## Stable Diffusion 1 and 2
@@ -64,7 +50,7 @@ Create a prompt with the pre-learned concept by using the special placeholder to
```py
prompt = "a grafitti in a favela wall with a
on it"
-num_samples = 2
+num_samples_per_row = 2
num_rows = 2
```
@@ -73,10 +59,10 @@ Then run the pipeline (feel free to adjust the parameters like `num_inference_st
```py
all_images = []
for _ in range(num_rows):
- images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images
+ images = pipeline(prompt, num_images_per_prompt=num_samples_per_row, num_inference_steps=50, guidance_scale=7.5).images
all_images.extend(images)
-grid = make_image_grid(all_images, num_samples, num_rows)
+grid = make_image_grid(all_images, num_rows, num_samples_per_row)
grid
```
@@ -84,7 +70,6 @@ grid
-
## Stable Diffusion XL
Stable Diffusion XL (SDXL) can also use textual inversion vectors for inference. In contrast to Stable Diffusion 1 and 2, SDXL has two text encoders so you'll need two textual inversion embeddings - one for each text encoder model.
@@ -109,9 +94,9 @@ state_dict
[ 0.0475, -0.0508, -0.0145, ..., 0.0070, -0.0089, -0.0163]],
```
-There are two tensors, `"clip-g"` and `"clip-l"`.
-`"clip-g"` corresponds to the bigger text encoder in SDXL and refers to
-`pipe.text_encoder_2` and `"clip-l"` refers to `pipe.text_encoder`.
+There are two tensors, `"clip_g"` and `"clip_l"`.
+`"clip_g"` corresponds to the bigger text encoder in SDXL and refers to
+`pipe.text_encoder_2` and `"clip_l"` refers to `pipe.text_encoder`.
Now you can load each tensor separately by passing them along with the correct text encoder and tokenizer
to [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`]:
@@ -129,4 +114,5 @@ pipe.load_textual_inversion(state_dict["clip_l"], token="unaestheticXLv31", text
# the embedding should be used as a negative embedding, so we pass it as a negative prompt
generator = torch.Generator().manual_seed(33)
image = pipe("a woman standing in front of a mountain", negative_prompt="unaestheticXLv31", generator=generator).images[0]
+image
```
diff --git a/docs/source/en/using-diffusers/weighted_prompts.md b/docs/source/en/using-diffusers/weighted_prompts.md
index ede2c7f35169..5007d235ae99 100644
--- a/docs/source/en/using-diffusers/weighted_prompts.md
+++ b/docs/source/en/using-diffusers/weighted_prompts.md
@@ -41,6 +41,7 @@ import torch
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_safetensors=True)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+pipe.to("cuda")
prompt = "a red cat playing with a ball"
@@ -165,7 +166,9 @@ import torch
from diffusers import StableDiffusionPipeline
from compel import Compel, DiffusersTextualInversionManager
-pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda")
+pipe = StableDiffusionPipeline.from_pretrained(
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16,
+ use_safetensors=True, variant="fp16").to("cuda")
pipe.load_textual_inversion("sd-concepts-library/midjourney-style")
```
@@ -173,7 +176,7 @@ Compel provides a `DiffusersTextualInversionManager` class to simplify prompt we
```py
textual_inversion_manager = DiffusersTextualInversionManager(pipe)
-compel = Compel(
+compel_proc = Compel(
tokenizer=pipe.tokenizer,
text_encoder=pipe.text_encoder,
textual_inversion_manager=textual_inversion_manager)
@@ -225,6 +228,8 @@ Stable Diffusion XL (SDXL) has two tokenizers and text encoders so it's usage is
```py
from compel import Compel, ReturnedEmbeddingsType
from diffusers import DiffusionPipeline
+from diffusers.utils import make_image_grid
+import torch
pipeline = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
@@ -251,6 +256,7 @@ conditioning, pooled = compel(prompt)
# generate image
generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))]
images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images
+make_image_grid(images, rows=1, cols=2)
```
@@ -262,4 +268,4 @@ images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, gener
"a red cat playing with a (ball)0.6"
-
\ No newline at end of file
+