Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/cogvideo/train_cogvideox_image_to_video_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def _load_dataset_from_local_path(self):

if any(not path.is_file() for path in instance_videos):
raise ValueError(
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file."
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file."
)

return instance_prompts, instance_videos
Expand Down
2 changes: 1 addition & 1 deletion examples/cogvideo/train_cogvideox_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ def _load_dataset_from_local_path(self):

if any(not path.is_file() for path in instance_videos):
raise ValueError(
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file."
"Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file."
)

return instance_prompts, instance_videos
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ accelerate launch train_multi_subject_dreambooth_inpaint.py \

## 3. Results

A [![Weights & Biases](https://img.shields.io/badge/Weights%20&%20Biases-Report-blue)](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & baises run was performed on a A100 GPU with the following stetting:
A [![Weights & Biases](https://img.shields.io/badge/Weights%20&%20Biases-Report-blue)](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & biases run was performed on a A100 GPU with the following stetting:

```bash
accelerate launch train_multi_subject_dreambooth_inpaint.py \
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/hooks/faster_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class FasterCacheConfig:
alpha_low_frequency: float = 1.1
alpha_high_frequency: float = 1.1

# n as described in CFG-Cache explanation in the paper - dependant on the model
# n as described in CFG-Cache explanation in the paper - dependent on the model
unconditional_batch_skip_range: int = 5
unconditional_batch_timestep_skip_range: Tuple[int, int] = (-1, 641)

Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/hooks/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def initialize_hook(self, module: torch.nn.Module) -> torch.nn.Module:

def deinitalize_hook(self, module: torch.nn.Module) -> torch.nn.Module:
r"""
Hook that is executed when a model is deinitalized.
Hook that is executed when a model is deinitialized.

Args:
module (`torch.nn.Module`):
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/hooks/layerwise_casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def initialize_hook(self, module: torch.nn.Module):

def deinitalize_hook(self, module: torch.nn.Module):
raise NotImplementedError(
"LayerwiseCastingHook does not support deinitalization. A model once enabled with layerwise casting will "
"LayerwiseCastingHook does not support deinitialization. A model once enabled with layerwise casting will "
"have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype "
"will lead to precision loss, which might have an impact on the model's generation quality. The model should "
"be re-initialized and loaded in the original dtype."
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/loaders/peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def load_lora_adapter(

rank = {}
for key, val in state_dict.items():
# Cannot figure out rank from lora layers that don't have atleast 2 dimensions.
# Cannot figure out rank from lora layers that don't have at least 2 dimensions.
# Bias layers in LoRA only have a single dimension
if "lora_B" in key and val.ndim > 1:
# Check out https://github.com/huggingface/peft/pull/2419 for the `^` symbol.
Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/models/autoencoders/autoencoder_kl.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapter
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
force_upcast (`bool`, *optional*, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
mid_block_add_attention (`bool`, *optional*, default to `True`):
If enabled, the mid_block of the Encoder and Decoder will have attention blocks. If set to false, the
mid_block will only have resnet blocks
Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/models/autoencoders/autoencoder_kl_allegro.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,8 +715,8 @@ class AutoencoderKLAllegro(ModelMixin, ConfigMixin):
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
force_upcast (`bool`, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
"""

_supports_gradient_checkpointing = True
Expand Down
4 changes: 2 additions & 2 deletions src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,8 +983,8 @@ class AutoencoderKLCogVideoX(ModelMixin, ConfigMixin, FromOriginalModelMixin):
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
force_upcast (`bool`, *optional*, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
"""

_supports_gradient_checkpointing = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin):
Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper.
force_upcast (`bool`, *optional*, default to `True`):
If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE
can be fine-tuned / trained to a lower range without loosing too much precision in which case
`force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast`
can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
"""

_supports_gradient_checkpointing = True
Expand Down
2 changes: 1 addition & 1 deletion src/diffusers/pipelines/consisid/consisid_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def process_face_embeddings(
raise RuntimeError("facexlib align face fail")
align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB

# incase insightface didn't detect face
# in case insightface didn't detect face
if id_ante_embedding is None:
logger.warning("Failed to detect face using insightface. Extracting embedding with align face")
id_ante_embedding = face_helper_2.get_feat(align_face)
Expand Down
4 changes: 2 additions & 2 deletions tests/lora/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,7 @@ def test_simple_inference_with_text_denoiser_block_scale(self):
def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self):
"""
Tests a simple inference with lora attached to text encoder and unet, attaches
multiple adapters and set differnt weights for different blocks (i.e. block lora)
multiple adapters and set different weights for different blocks (i.e. block lora)
"""
for scheduler_cls in self.scheduler_classes:
components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls)
Expand Down Expand Up @@ -1636,7 +1636,7 @@ def test_simple_inference_with_text_lora_denoiser_fused_multi(
pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-1"])
self.assertTrue(pipe.num_fused_loras == 1, f"{pipe.num_fused_loras=}, {pipe.fused_loras=}")

# Fusing should still keep the LoRA layers so outpout should remain the same
# Fusing should still keep the LoRA layers so output should remain the same
outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0]

self.assertTrue(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/cogvideo/test_cogvideox_image2video.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def test_vae_tiling(self, expected_diff_max: float = 0.3):
generator_device = "cpu"
components = self.get_dummy_components()

# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization.
# The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initialization.
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
components["transformer"] = CogVideoXTransformer3DModel.from_config(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/consisid/test_consisid.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def test_vae_tiling(self, expected_diff_max: float = 0.4):
generator_device = "cpu"
components = self.get_dummy_components()

# The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initalization.
# The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initialization.
# This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings.
# See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py
components["transformer"] = ConsisIDTransformer3DModel.from_config(
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/kolors/test_kolors_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,6 @@ def test_inference_batch_single_identical(self):
def test_float16_inference(self):
super().test_float16_inference(expected_max_diff=7e-2)

@unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unline kolors t2i.")
@unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unlike kolors t2i.")
def test_encode_prompt_works_in_isolation(self):
pass
2 changes: 1 addition & 1 deletion tests/pipelines/pag/test_pag_pixart_sigma.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def test_attention_slicing_forward_pass(
assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0]))
assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0]))

# Because we have `pag_applied_layers` we cannot direcly apply
# Because we have `pag_applied_layers` we cannot directly apply
# `set_default_attn_processor`
def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4):
components = self.get_dummy_components()
Expand Down
2 changes: 1 addition & 1 deletion tests/pipelines/stable_unclip/test_stable_unclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_stable_unclip(self):
pipe.enable_sequential_cpu_offload()

generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe("anime turle", generator=generator, output_type="np")
output = pipe("anime turtle", generator=generator, output_type="np")

image = output.images[0]

Expand Down
4 changes: 2 additions & 2 deletions tests/pipelines/stable_unclip/test_stable_unclip_img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def test_stable_unclip_l_img2img(self):
pipe.enable_sequential_cpu_offload()

generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(input_image, "anime turle", generator=generator, output_type="np")
output = pipe(input_image, "anime turtle", generator=generator, output_type="np")

image = output.images[0]

Expand All @@ -277,7 +277,7 @@ def test_stable_unclip_h_img2img(self):
pipe.enable_sequential_cpu_offload()

generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(input_image, "anime turle", generator=generator, output_type="np")
output = pipe(input_image, "anime turtle", generator=generator, output_type="np")

image = output.images[0]

Expand Down
10 changes: 5 additions & 5 deletions tests/pipelines/test_pipelines_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2096,11 +2096,11 @@ def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict=
with torch.no_grad():
encoded_prompt_outputs = pipe_with_just_text_encoder.encode_prompt(**encode_prompt_inputs)

# Programatically determine the reutrn names of `encode_prompt.`
ast_vistor = ReturnNameVisitor()
encode_prompt_tree = ast_vistor.get_ast_tree(cls=self.pipeline_class)
ast_vistor.visit(encode_prompt_tree)
prompt_embed_kwargs = ast_vistor.return_names
# Programmatically determine the return names of `encode_prompt.`
ast_visitor = ReturnNameVisitor()
encode_prompt_tree = ast_visitor.get_ast_tree(cls=self.pipeline_class)
ast_visitor.visit(encode_prompt_tree)
prompt_embed_kwargs = ast_visitor.return_names
prompt_embeds_kwargs = dict(zip(prompt_embed_kwargs, encoded_prompt_outputs))

# Pack the outputs of `encode_prompt`.
Expand Down
2 changes: 1 addition & 1 deletion tests/quantization/bnb/test_4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def test_model_memory_usage(self):

def test_original_dtype(self):
r"""
A simple test to check if the model succesfully stores the original dtype
A simple test to check if the model successfully stores the original dtype
"""
self.assertTrue("_pre_quantization_dtype" in self.model_4bit.config)
self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config)
Expand Down
2 changes: 1 addition & 1 deletion tests/quantization/bnb/test_mixed_int8.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_model_memory_usage(self):

def test_original_dtype(self):
r"""
A simple test to check if the model succesfully stores the original dtype
A simple test to check if the model successfully stores the original dtype
"""
self.assertTrue("_pre_quantization_dtype" in self.model_8bit.config)
self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config)
Expand Down
2 changes: 1 addition & 1 deletion tests/single_file/test_model_autoencoder_dc_single_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_single_file_in_type_variant_components(self):
# `in` variant checkpoints require passing in a `config` parameter
# in order to set the scaling factor correctly.
# `in` and `mix` variants have the same keys and we cannot automatically infer a scaling factor.
# We default to using teh `mix` config
# We default to using the `mix` config
repo_id = "mit-han-lab/dc-ae-f128c512-in-1.0-diffusers"
ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0/blob/main/model.safetensors"

Expand Down
2 changes: 1 addition & 1 deletion utils/custom_init_isort.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def sort_imports(file: str, check_only: bool = True):
code, start_prompt="_import_structure = {", end_prompt="if TYPE_CHECKING:"
)

# We ignore block 0 (everything untils start_prompt) and the last block (everything after end_prompt).
# We ignore block 0 (everything until start_prompt) and the last block (everything after end_prompt).
for block_idx in range(1, len(main_blocks) - 1):
# Check if the block contains some `_import_structure`s thingy to sort.
block = main_blocks[block_idx]
Expand Down