From cca6a08cbaa42078e1fa8ee1df8a79e5486fd17a Mon Sep 17 00:00:00 2001 From: co63oc Date: Thu, 1 May 2025 15:27:17 +0800 Subject: [PATCH 1/6] Fix typos in strings and comments Signed-off-by: co63oc --- .../cogvideo/train_cogvideox_image_to_video_lora.py | 2 +- examples/cogvideo/train_cogvideox_lora.py | 2 +- .../multi_subject_dreambooth_inpainting/README.md | 4 ++-- src/diffusers/hooks/faster_cache.py | 2 +- src/diffusers/hooks/hooks.py | 6 +++--- src/diffusers/hooks/layerwise_casting.py | 4 ++-- src/diffusers/loaders/peft.py | 2 +- src/diffusers/models/autoencoders/autoencoder_kl.py | 2 +- .../models/autoencoders/autoencoder_kl_allegro.py | 2 +- .../models/autoencoders/autoencoder_kl_cogvideox.py | 2 +- .../autoencoders/autoencoder_kl_temporal_decoder.py | 2 +- src/diffusers/pipelines/consisid/consisid_utils.py | 2 +- tests/lora/utils.py | 4 ++-- tests/pipelines/cogvideo/test_cogvideox_image2video.py | 2 +- tests/pipelines/consisid/test_consisid.py | 2 +- tests/pipelines/kolors/test_kolors_img2img.py | 2 +- tests/pipelines/pag/test_pag_pixart_sigma.py | 2 +- tests/pipelines/stable_unclip/test_stable_unclip.py | 2 +- .../stable_unclip/test_stable_unclip_img2img.py | 4 ++-- tests/pipelines/test_pipelines_common.py | 10 +++++----- tests/quantization/bnb/test_4bit.py | 2 +- tests/quantization/bnb/test_mixed_int8.py | 2 +- .../test_model_autoencoder_dc_single_file.py | 2 +- utils/custom_init_isort.py | 2 +- 24 files changed, 34 insertions(+), 34 deletions(-) diff --git a/examples/cogvideo/train_cogvideox_image_to_video_lora.py b/examples/cogvideo/train_cogvideox_image_to_video_lora.py index 642aecabf74f..809911c279fd 100644 --- a/examples/cogvideo/train_cogvideox_image_to_video_lora.py +++ b/examples/cogvideo/train_cogvideox_image_to_video_lora.py @@ -555,7 +555,7 @@ def _load_dataset_from_local_path(self): if any(not path.is_file() for path in instance_videos): raise ValueError( - "Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file." + "Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file." ) return instance_prompts, instance_videos diff --git a/examples/cogvideo/train_cogvideox_lora.py b/examples/cogvideo/train_cogvideox_lora.py index e737ce76241f..acd593c2afa7 100644 --- a/examples/cogvideo/train_cogvideox_lora.py +++ b/examples/cogvideo/train_cogvideox_lora.py @@ -539,7 +539,7 @@ def _load_dataset_from_local_path(self): if any(not path.is_file() for path in instance_videos): raise ValueError( - "Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found atleast one path that is not a valid file." + "Expected '--video_column' to be a path to a file in `--instance_data_root` containing line-separated paths to video data but found at least one path that is not a valid file." ) return instance_prompts, instance_videos diff --git a/examples/research_projects/multi_subject_dreambooth_inpainting/README.md b/examples/research_projects/multi_subject_dreambooth_inpainting/README.md index ffd8e304efce..a3e999e681e3 100644 --- a/examples/research_projects/multi_subject_dreambooth_inpainting/README.md +++ b/examples/research_projects/multi_subject_dreambooth_inpainting/README.md @@ -2,7 +2,7 @@ Please note that this project is not actively maintained. However, you can open an issue and tag @gzguevara. -[DreamBooth](https://arxiv.org/abs/2208.12242) is a method to personalize text2image models like stable diffusion given just a few(3~5) images of a subject. This project consists of **two parts**. Training Stable Diffusion for inpainting requieres prompt-image-mask pairs. The Unet of inpainiting models have 5 additional input channels (4 for the encoded masked-image and 1 for the mask itself). +[DreamBooth](https://arxiv.org/abs/2208.12242) is a method to personalize text2image models like stable diffusion given just a few(3~5) images of a subject. This project consists of **two parts**. Training Stable Diffusion for inpainting requires prompt-image-mask pairs. The Unet of inpainting models have 5 additional input channels (4 for the encoded masked-image and 1 for the mask itself). **The first part**, the `multi_inpaint_dataset.ipynb` notebook, demonstrates how make a 🤗 dataset of prompt-image-mask pairs. You can, however, skip the first part and move straight to the second part with the example datasets in this project. ([cat toy dataset masked](https://huggingface.co/datasets/gzguevara/cat_toy_masked), [mr. potato head dataset masked](https://huggingface.co/datasets/gzguevara/mr_potato_head_masked)) @@ -73,7 +73,7 @@ accelerate launch train_multi_subject_dreambooth_inpaint.py \ ## 3. Results -A [![Weights & Biases](https://img.shields.io/badge/Weights%20&%20Biases-Report-blue)](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & baises run was performed on a A100 GPU with the following stetting: +A [![Weights & Biases](https://img.shields.io/badge/Weights%20&%20Biases-Report-blue)](https://wandb.ai/gzguevara/uncategorized/reports/Multi-Subject-Dreambooth-for-Inpainting--Vmlldzo2MzY5NDQ4?accessToken=y0nya2d7baguhbryxaikbfr1203amvn1jsmyl07vk122mrs7tnph037u1nqgse8t) is provided showing the training progress by every 50 steps. Note, the reported weights & biases run was performed on a A100 GPU with the following stetting: ```bash accelerate launch train_multi_subject_dreambooth_inpaint.py \ diff --git a/src/diffusers/hooks/faster_cache.py b/src/diffusers/hooks/faster_cache.py index 634635346474..35b176e930c9 100644 --- a/src/diffusers/hooks/faster_cache.py +++ b/src/diffusers/hooks/faster_cache.py @@ -146,7 +146,7 @@ class FasterCacheConfig: alpha_low_frequency: float = 1.1 alpha_high_frequency: float = 1.1 - # n as described in CFG-Cache explanation in the paper - dependant on the model + # n as described in CFG-Cache explanation in the paper - dependent on the model unconditional_batch_skip_range: int = 5 unconditional_batch_timestep_skip_range: Tuple[int, int] = (-1, 641) diff --git a/src/diffusers/hooks/hooks.py b/src/diffusers/hooks/hooks.py index 3b2e4ed91c2f..72f9c7c57ba3 100644 --- a/src/diffusers/hooks/hooks.py +++ b/src/diffusers/hooks/hooks.py @@ -43,9 +43,9 @@ def initialize_hook(self, module: torch.nn.Module) -> torch.nn.Module: """ return module - def deinitalize_hook(self, module: torch.nn.Module) -> torch.nn.Module: + def deinitialize_hook(self, module: torch.nn.Module) -> torch.nn.Module: r""" - Hook that is executed when a model is deinitalized. + Hook that is executed when a model is deinitialized. Args: module (`torch.nn.Module`): @@ -192,7 +192,7 @@ def remove_hook(self, name: str, recurse: bool = True) -> None: else: self._fn_refs[index + 1].forward = old_forward - self._module_ref = hook.deinitalize_hook(self._module_ref) + self._module_ref = hook.deinitialize_hook(self._module_ref) del self.hooks[name] self._hook_order.pop(index) self._fn_refs.pop(index) diff --git a/src/diffusers/hooks/layerwise_casting.py b/src/diffusers/hooks/layerwise_casting.py index c0105ab93483..45023bbb1bf9 100644 --- a/src/diffusers/hooks/layerwise_casting.py +++ b/src/diffusers/hooks/layerwise_casting.py @@ -60,9 +60,9 @@ def initialize_hook(self, module: torch.nn.Module): module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking) return module - def deinitalize_hook(self, module: torch.nn.Module): + def deinitialize_hook(self, module: torch.nn.Module): raise NotImplementedError( - "LayerwiseCastingHook does not support deinitalization. A model once enabled with layerwise casting will " + "LayerwiseCastingHook does not support deinitialization. A model once enabled with layerwise casting will " "have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype " "will lead to precision loss, which might have an impact on the model's generation quality. The model should " "be re-initialized and loaded in the original dtype." diff --git a/src/diffusers/loaders/peft.py b/src/diffusers/loaders/peft.py index bbef5b1628cb..e8fc35c97acf 100644 --- a/src/diffusers/loaders/peft.py +++ b/src/diffusers/loaders/peft.py @@ -250,7 +250,7 @@ def load_lora_adapter( rank = {} for key, val in state_dict.items(): - # Cannot figure out rank from lora layers that don't have atleast 2 dimensions. + # Cannot figure out rank from lora layers that don't have at least 2 dimensions. # Bias layers in LoRA only have a single dimension if "lora_B" in key and val.ndim > 1: # Check out https://github.com/huggingface/peft/pull/2419 for the `^` symbol. diff --git a/src/diffusers/models/autoencoders/autoencoder_kl.py b/src/diffusers/models/autoencoders/autoencoder_kl.py index 357df0c31087..76b3294edb0b 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl.py @@ -63,7 +63,7 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapter Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper. force_upcast (`bool`, *optional*, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without loosing too much precision in which case + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix mid_block_add_attention (`bool`, *optional*, default to `True`): If enabled, the mid_block of the Encoder and Decoder will have attention blocks. If set to false, the diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py b/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py index a76277366c09..6234a851b2b1 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py @@ -715,7 +715,7 @@ class AutoencoderKLAllegro(ModelMixin, ConfigMixin): Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper. force_upcast (`bool`, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without loosing too much precision in which case + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix """ diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py index e2b26396899f..5de4c296035b 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py @@ -983,7 +983,7 @@ class AutoencoderKLCogVideoX(ModelMixin, ConfigMixin, FromOriginalModelMixin): Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper. force_upcast (`bool`, *optional*, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without loosing too much precision in which case + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix """ diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py b/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py index 5a72cd395196..8945ffae3d75 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py @@ -161,7 +161,7 @@ class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin): Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper. force_upcast (`bool`, *optional*, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without loosing too much precision in which case + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix """ diff --git a/src/diffusers/pipelines/consisid/consisid_utils.py b/src/diffusers/pipelines/consisid/consisid_utils.py index 874b3d76149b..23811a4986e3 100644 --- a/src/diffusers/pipelines/consisid/consisid_utils.py +++ b/src/diffusers/pipelines/consisid/consisid_utils.py @@ -166,7 +166,7 @@ def process_face_embeddings( raise RuntimeError("facexlib align face fail") align_face = face_helper_1.cropped_faces[0] # (512, 512, 3) # RGB - # incase insightface didn't detect face + # in case insightface didn't detect face if id_ante_embedding is None: logger.warning("Failed to detect face using insightface. Extracting embedding with align face") id_ante_embedding = face_helper_2.get_feat(align_face) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index 87a8fddfa583..cc0cd66ad3e8 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -1090,7 +1090,7 @@ def test_simple_inference_with_text_denoiser_block_scale(self): def test_simple_inference_with_text_denoiser_multi_adapter_block_lora(self): """ Tests a simple inference with lora attached to text encoder and unet, attaches - multiple adapters and set differnt weights for different blocks (i.e. block lora) + multiple adapters and set different weights for different blocks (i.e. block lora) """ for scheduler_cls in self.scheduler_classes: components, text_lora_config, denoiser_lora_config = self.get_dummy_components(scheduler_cls) @@ -1638,7 +1638,7 @@ def test_simple_inference_with_text_lora_denoiser_fused_multi( pipe.fuse_lora(components=self.pipeline_class._lora_loadable_modules, adapter_names=["adapter-1"]) - # Fusing should still keep the LoRA layers so outpout should remain the same + # Fusing should still keep the LoRA layers so output should remain the same outputs_lora_1_fused = pipe(**inputs, generator=torch.manual_seed(0))[0] self.assertTrue( diff --git a/tests/pipelines/cogvideo/test_cogvideox_image2video.py b/tests/pipelines/cogvideo/test_cogvideox_image2video.py index ec4e51bd1bad..c2e7d0cdbf87 100644 --- a/tests/pipelines/cogvideo/test_cogvideox_image2video.py +++ b/tests/pipelines/cogvideo/test_cogvideox_image2video.py @@ -270,7 +270,7 @@ def test_vae_tiling(self, expected_diff_max: float = 0.3): generator_device = "cpu" components = self.get_dummy_components() - # The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initalization. + # The reason to modify it this way is because I2V Transformer limits the generation to resolutions used during initialization. # This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings. # See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py components["transformer"] = CogVideoXTransformer3DModel.from_config( diff --git a/tests/pipelines/consisid/test_consisid.py b/tests/pipelines/consisid/test_consisid.py index a39c17bb4f79..5921b91d6bc4 100644 --- a/tests/pipelines/consisid/test_consisid.py +++ b/tests/pipelines/consisid/test_consisid.py @@ -279,7 +279,7 @@ def test_vae_tiling(self, expected_diff_max: float = 0.4): generator_device = "cpu" components = self.get_dummy_components() - # The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initalization. + # The reason to modify it this way is because ConsisID Transformer limits the generation to resolutions used during initialization. # This limitation comes from using learned positional embeddings which cannot be generated on-the-fly like sincos or RoPE embeddings. # See the if-statement on "self.use_learned_positional_embeddings" in diffusers/models/embeddings.py components["transformer"] = ConsisIDTransformer3DModel.from_config( diff --git a/tests/pipelines/kolors/test_kolors_img2img.py b/tests/pipelines/kolors/test_kolors_img2img.py index 89da95753a14..f3caed704af4 100644 --- a/tests/pipelines/kolors/test_kolors_img2img.py +++ b/tests/pipelines/kolors/test_kolors_img2img.py @@ -155,6 +155,6 @@ def test_inference_batch_single_identical(self): def test_float16_inference(self): super().test_float16_inference(expected_max_diff=7e-2) - @unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unline kolors t2i.") + @unittest.skip("Test not supported because kolors img2img doesn't take pooled embeds as inputs unlike kolors t2i.") def test_encode_prompt_works_in_isolation(self): pass diff --git a/tests/pipelines/pag/test_pag_pixart_sigma.py b/tests/pipelines/pag/test_pag_pixart_sigma.py index 624b57844390..c79f5ee82106 100644 --- a/tests/pipelines/pag/test_pag_pixart_sigma.py +++ b/tests/pipelines/pag/test_pag_pixart_sigma.py @@ -254,7 +254,7 @@ def test_attention_slicing_forward_pass( assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0])) assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0])) - # Because we have `pag_applied_layers` we cannot direcly apply + # Because we have `pag_applied_layers` we cannot directly apply # `set_default_attn_processor` def test_dict_tuple_outputs_equivalent(self, expected_slice=None, expected_max_difference=1e-4): components = self.get_dummy_components() diff --git a/tests/pipelines/stable_unclip/test_stable_unclip.py b/tests/pipelines/stable_unclip/test_stable_unclip.py index 8cf103dffd56..f89133dba9ed 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip.py @@ -217,7 +217,7 @@ def test_stable_unclip(self): pipe.enable_sequential_cpu_offload() generator = torch.Generator(device="cpu").manual_seed(0) - output = pipe("anime turle", generator=generator, output_type="np") + output = pipe("anime turtle", generator=generator, output_type="np") image = output.images[0] diff --git a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py index 176b6954d616..7add9770b4ef 100644 --- a/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py +++ b/tests/pipelines/stable_unclip/test_stable_unclip_img2img.py @@ -246,7 +246,7 @@ def test_stable_unclip_l_img2img(self): pipe.enable_sequential_cpu_offload() generator = torch.Generator(device="cpu").manual_seed(0) - output = pipe(input_image, "anime turle", generator=generator, output_type="np") + output = pipe(input_image, "anime turtle", generator=generator, output_type="np") image = output.images[0] @@ -273,7 +273,7 @@ def test_stable_unclip_h_img2img(self): pipe.enable_sequential_cpu_offload() generator = torch.Generator(device="cpu").manual_seed(0) - output = pipe(input_image, "anime turle", generator=generator, output_type="np") + output = pipe(input_image, "anime turtle", generator=generator, output_type="np") image = output.images[0] diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 7478898644b4..63e318c96a1b 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -2096,11 +2096,11 @@ def test_encode_prompt_works_in_isolation(self, extra_required_param_value_dict= with torch.no_grad(): encoded_prompt_outputs = pipe_with_just_text_encoder.encode_prompt(**encode_prompt_inputs) - # Programatically determine the reutrn names of `encode_prompt.` - ast_vistor = ReturnNameVisitor() - encode_prompt_tree = ast_vistor.get_ast_tree(cls=self.pipeline_class) - ast_vistor.visit(encode_prompt_tree) - prompt_embed_kwargs = ast_vistor.return_names + # Programmatically determine the return names of `encode_prompt.` + ast_visitor = ReturnNameVisitor() + encode_prompt_tree = ast_visitor.get_ast_tree(cls=self.pipeline_class) + ast_visitor.visit(encode_prompt_tree) + prompt_embed_kwargs = ast_visitor.return_names prompt_embeds_kwargs = dict(zip(prompt_embed_kwargs, encoded_prompt_outputs)) # Pack the outputs of `encode_prompt`. diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 096ee4c34448..4396974f0799 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -205,7 +205,7 @@ def test_model_memory_usage(self): def test_original_dtype(self): r""" - A simple test to check if the model succesfully stores the original dtype + A simple test to check if the model successfully stores the original dtype """ self.assertTrue("_pre_quantization_dtype" in self.model_4bit.config) self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config) diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py index a8aff679b5b6..f9870977999a 100644 --- a/tests/quantization/bnb/test_mixed_int8.py +++ b/tests/quantization/bnb/test_mixed_int8.py @@ -195,7 +195,7 @@ def test_model_memory_usage(self): def test_original_dtype(self): r""" - A simple test to check if the model succesfully stores the original dtype + A simple test to check if the model successfully stores the original dtype """ self.assertTrue("_pre_quantization_dtype" in self.model_8bit.config) self.assertFalse("_pre_quantization_dtype" in self.model_fp16.config) diff --git a/tests/single_file/test_model_autoencoder_dc_single_file.py b/tests/single_file/test_model_autoencoder_dc_single_file.py index 31b2eb6e36b0..27348fa7b29d 100644 --- a/tests/single_file/test_model_autoencoder_dc_single_file.py +++ b/tests/single_file/test_model_autoencoder_dc_single_file.py @@ -95,7 +95,7 @@ def test_single_file_in_type_variant_components(self): # `in` variant checkpoints require passing in a `config` parameter # in order to set the scaling factor correctly. # `in` and `mix` variants have the same keys and we cannot automatically infer a scaling factor. - # We default to using teh `mix` config + # We default to using the `mix` config repo_id = "mit-han-lab/dc-ae-f128c512-in-1.0-diffusers" ckpt_path = "https://huggingface.co/mit-han-lab/dc-ae-f128c512-in-1.0/blob/main/model.safetensors" diff --git a/utils/custom_init_isort.py b/utils/custom_init_isort.py index 791df0e78694..cc3bccb9bd63 100644 --- a/utils/custom_init_isort.py +++ b/utils/custom_init_isort.py @@ -252,7 +252,7 @@ def sort_imports(file: str, check_only: bool = True): code, start_prompt="_import_structure = {", end_prompt="if TYPE_CHECKING:" ) - # We ignore block 0 (everything untils start_prompt) and the last block (everything after end_prompt). + # We ignore block 0 (everything until start_prompt) and the last block (everything after end_prompt). for block_idx in range(1, len(main_blocks) - 1): # Check if the block contains some `_import_structure`s thingy to sort. block = main_blocks[block_idx] From d88f72753bd9ee2be8889b1e808a9e83b6338647 Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 30 May 2025 14:14:55 +0800 Subject: [PATCH 2/6] Update src/diffusers/hooks/hooks.py Co-authored-by: Aryan --- src/diffusers/hooks/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/hooks/hooks.py b/src/diffusers/hooks/hooks.py index 72f9c7c57ba3..3a99c62eabf3 100644 --- a/src/diffusers/hooks/hooks.py +++ b/src/diffusers/hooks/hooks.py @@ -43,7 +43,7 @@ def initialize_hook(self, module: torch.nn.Module) -> torch.nn.Module: """ return module - def deinitialize_hook(self, module: torch.nn.Module) -> torch.nn.Module: + def deinitalize_hook(self, module: torch.nn.Module) -> torch.nn.Module: r""" Hook that is executed when a model is deinitialized. From 8bcd65da9b21377d852789b9d28660e5ff4caf2a Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 30 May 2025 14:15:06 +0800 Subject: [PATCH 3/6] Update src/diffusers/hooks/hooks.py Co-authored-by: Aryan --- src/diffusers/hooks/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/hooks/hooks.py b/src/diffusers/hooks/hooks.py index 3a99c62eabf3..2f3f61357325 100644 --- a/src/diffusers/hooks/hooks.py +++ b/src/diffusers/hooks/hooks.py @@ -192,7 +192,7 @@ def remove_hook(self, name: str, recurse: bool = True) -> None: else: self._fn_refs[index + 1].forward = old_forward - self._module_ref = hook.deinitialize_hook(self._module_ref) + self._module_ref = hook.deinitalize_hook(self._module_ref) del self.hooks[name] self._hook_order.pop(index) self._fn_refs.pop(index) From 5d614c733ec584416b7ce325d2e951462098b6ef Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 30 May 2025 14:19:19 +0800 Subject: [PATCH 4/6] Update layerwise_casting.py --- src/diffusers/hooks/layerwise_casting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/hooks/layerwise_casting.py b/src/diffusers/hooks/layerwise_casting.py index 45023bbb1bf9..5c99cb74480d 100644 --- a/src/diffusers/hooks/layerwise_casting.py +++ b/src/diffusers/hooks/layerwise_casting.py @@ -60,7 +60,7 @@ def initialize_hook(self, module: torch.nn.Module): module.to(dtype=self.storage_dtype, non_blocking=self.non_blocking) return module - def deinitialize_hook(self, module: torch.nn.Module): + def deinitalize_hook(self, module: torch.nn.Module): raise NotImplementedError( "LayerwiseCastingHook does not support deinitialization. A model once enabled with layerwise casting will " "have casted its weights to a lower precision dtype for storage. Casting this back to the original dtype " From d5809ed63a301f755a2c85fe0562b95a0f00c2da Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 30 May 2025 10:43:42 +0000 Subject: [PATCH 5/6] Apply style fixes --- src/diffusers/models/autoencoders/autoencoder_kl.py | 4 ++-- src/diffusers/models/autoencoders/autoencoder_kl_allegro.py | 4 ++-- src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py | 4 ++-- .../models/autoencoders/autoencoder_kl_temporal_decoder.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/diffusers/models/autoencoders/autoencoder_kl.py b/src/diffusers/models/autoencoders/autoencoder_kl.py index af0287997920..8c86908c76ab 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl.py @@ -63,8 +63,8 @@ class AutoencoderKL(ModelMixin, ConfigMixin, FromOriginalModelMixin, PeftAdapter Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper. force_upcast (`bool`, *optional*, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without losing too much precision in which case - `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` + can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix mid_block_add_attention (`bool`, *optional*, default to `True`): If enabled, the mid_block of the Encoder and Decoder will have attention blocks. If set to false, the mid_block will only have resnet blocks diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py b/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py index 090430206b51..c25b158cfc83 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_allegro.py @@ -715,8 +715,8 @@ class AutoencoderKLAllegro(ModelMixin, ConfigMixin): Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper. force_upcast (`bool`, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without losing too much precision in which case - `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` + can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix """ _supports_gradient_checkpointing = True diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py index 6b604599dc6e..f76d9e91d4a5 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_cogvideox.py @@ -983,8 +983,8 @@ class AutoencoderKLCogVideoX(ModelMixin, ConfigMixin, FromOriginalModelMixin): Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper. force_upcast (`bool`, *optional*, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without losing too much precision in which case - `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` + can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix """ _supports_gradient_checkpointing = True diff --git a/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py b/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py index f4606604b161..fb6a7de19dfe 100644 --- a/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +++ b/src/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py @@ -161,8 +161,8 @@ class AutoencoderKLTemporalDecoder(ModelMixin, ConfigMixin): Synthesis with Latent Diffusion Models](https://huggingface.co/papers/2112.10752) paper. force_upcast (`bool`, *optional*, default to `True`): If enabled it will force the VAE to run in float32 for high image resolution pipelines, such as SD-XL. VAE - can be fine-tuned / trained to a lower range without losing too much precision in which case - `force_upcast` can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix + can be fine-tuned / trained to a lower range without losing too much precision in which case `force_upcast` + can be set to `False` - see: https://huggingface.co/madebyollin/sdxl-vae-fp16-fix """ _supports_gradient_checkpointing = True From b153510e7ff9fd183032b95c8bea655120dd7dbb Mon Sep 17 00:00:00 2001 From: co63oc Date: Fri, 30 May 2025 18:56:58 +0800 Subject: [PATCH 6/6] update