Skip to content

Commit cc4f805

Browse files
committed
rebase on main branch
1 parent 4a7fc38 commit cc4f805

12 files changed

+142
-500
lines changed

examples/community/ip_adapter_face_id.py

Lines changed: 7 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@
2626
from diffusers.image_processor import VaeImageProcessor
2727
from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
2828
from diffusers.models import AutoencoderKL, UNet2DConditionModel
29-
from diffusers.models.attention_processor import FusedAttnProcessor2_0
3029
from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder
31-
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
30+
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
3231
from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
3332
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
3433
from diffusers.schedulers import KarrasDiffusionSchedulers
@@ -415,7 +414,12 @@ def retrieve_timesteps(
415414

416415

417416
class IPAdapterFaceIDStableDiffusionPipeline(
418-
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
417+
DiffusionPipeline,
418+
EfficiencyMixin,
419+
TextualInversionLoaderMixin,
420+
LoraLoaderMixin,
421+
IPAdapterMixin,
422+
FromSingleFileMixin,
419423
):
420424
r"""
421425
Pipeline for text-to-image generation using Stable Diffusion.
@@ -727,35 +731,6 @@ def set_ip_adapter_scale(self, scale):
727731
if isinstance(attn_processor, (LoRAIPAdapterAttnProcessor, LoRAIPAdapterAttnProcessor2_0)):
728732
attn_processor.scale = scale
729733

730-
def enable_vae_slicing(self):
731-
r"""
732-
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
733-
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
734-
"""
735-
self.vae.enable_slicing()
736-
737-
def disable_vae_slicing(self):
738-
r"""
739-
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
740-
computing decoding in one step.
741-
"""
742-
self.vae.disable_slicing()
743-
744-
def enable_vae_tiling(self):
745-
r"""
746-
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
747-
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
748-
processing larger images.
749-
"""
750-
self.vae.enable_tiling()
751-
752-
def disable_vae_tiling(self):
753-
r"""
754-
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
755-
computing decoding in one step.
756-
"""
757-
self.vae.disable_tiling()
758-
759734
def _encode_prompt(
760735
self,
761736
prompt,
@@ -1080,93 +1055,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype
10801055
latents = latents * self.scheduler.init_noise_sigma
10811056
return latents
10821057

1083-
def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
1084-
r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
1085-
1086-
The suffixes after the scaling factors represent the stages where they are being applied.
1087-
1088-
Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
1089-
that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
1090-
1091-
Args:
1092-
s1 (`float`):
1093-
Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
1094-
mitigate "oversmoothing effect" in the enhanced denoising process.
1095-
s2 (`float`):
1096-
Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
1097-
mitigate "oversmoothing effect" in the enhanced denoising process.
1098-
b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
1099-
b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
1100-
"""
1101-
if not hasattr(self, "unet"):
1102-
raise ValueError("The pipeline must have `unet` for using FreeU.")
1103-
self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
1104-
1105-
def disable_freeu(self):
1106-
"""Disables the FreeU mechanism if enabled."""
1107-
self.unet.disable_freeu()
1108-
1109-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
1110-
def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
1111-
"""
1112-
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
1113-
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
1114-
1115-
<Tip warning={true}>
1116-
1117-
This API is 🧪 experimental.
1118-
1119-
</Tip>
1120-
1121-
Args:
1122-
unet (`bool`, defaults to `True`): To apply fusion on the UNet.
1123-
vae (`bool`, defaults to `True`): To apply fusion on the VAE.
1124-
"""
1125-
self.fusing_unet = False
1126-
self.fusing_vae = False
1127-
1128-
if unet:
1129-
self.fusing_unet = True
1130-
self.unet.fuse_qkv_projections()
1131-
self.unet.set_attn_processor(FusedAttnProcessor2_0())
1132-
1133-
if vae:
1134-
if not isinstance(self.vae, AutoencoderKL):
1135-
raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
1136-
1137-
self.fusing_vae = True
1138-
self.vae.fuse_qkv_projections()
1139-
self.vae.set_attn_processor(FusedAttnProcessor2_0())
1140-
1141-
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
1142-
def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
1143-
"""Disable QKV projection fusion if enabled.
1144-
1145-
<Tip warning={true}>
1146-
1147-
This API is 🧪 experimental.
1148-
1149-
</Tip>
1150-
1151-
Args:
1152-
unet (`bool`, defaults to `True`): To apply fusion on the UNet.
1153-
vae (`bool`, defaults to `True`): To apply fusion on the VAE.
1154-
1155-
"""
1156-
if unet:
1157-
if not self.fusing_unet:
1158-
logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
1159-
else:
1160-
self.unet.unfuse_qkv_projections()
1161-
self.fusing_unet = False
1162-
1163-
if vae:
1164-
if not self.fusing_vae:
1165-
logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
1166-
else:
1167-
self.vae.unfuse_qkv_projections()
1168-
self.fusing_vae = False
1169-
11701058
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
11711059
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
11721060
"""

examples/community/pipeline_animatediff_img2video.py

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
2727
from diffusers.models.lora import adjust_lora_scale_text_encoder
2828
from diffusers.models.unet_motion_model import MotionAdapter
29-
from diffusers.pipelines.pipeline_utils import DiffusionPipeline
29+
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin
3030
from diffusers.schedulers import (
3131
DDIMScheduler,
3232
DPMSolverMultistepScheduler,
@@ -230,7 +230,9 @@ class AnimateDiffImgToVideoPipelineOutput(BaseOutput):
230230
frames: Union[torch.Tensor, np.ndarray]
231231

232232

233-
class AnimateDiffImgToVideoPipeline(DiffusionPipeline, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin):
233+
class AnimateDiffImgToVideoPipeline(
234+
DiffusionPipeline, EfficiencyMixin, TextualInversionLoaderMixin, IPAdapterMixin, LoraLoaderMixin
235+
):
234236
r"""
235237
Pipeline for text-to-video generation.
236238
@@ -527,67 +529,6 @@ def decode_latents(self, latents):
527529
video = video.float()
528530
return video
529531

530-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
531-
def enable_vae_slicing(self):
532-
r"""
533-
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
534-
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
535-
"""
536-
self.vae.enable_slicing()
537-
538-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
539-
def disable_vae_slicing(self):
540-
r"""
541-
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
542-
computing decoding in one step.
543-
"""
544-
self.vae.disable_slicing()
545-
546-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
547-
def enable_vae_tiling(self):
548-
r"""
549-
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
550-
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
551-
processing larger images.
552-
"""
553-
self.vae.enable_tiling()
554-
555-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
556-
def disable_vae_tiling(self):
557-
r"""
558-
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
559-
computing decoding in one step.
560-
"""
561-
self.vae.disable_tiling()
562-
563-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
564-
def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
565-
r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
566-
567-
The suffixes after the scaling factors represent the stages where they are being applied.
568-
569-
Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
570-
that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
571-
572-
Args:
573-
s1 (`float`):
574-
Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
575-
mitigate "oversmoothing effect" in the enhanced denoising process.
576-
s2 (`float`):
577-
Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
578-
mitigate "oversmoothing effect" in the enhanced denoising process.
579-
b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
580-
b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
581-
"""
582-
if not hasattr(self, "unet"):
583-
raise ValueError("The pipeline must have `unet` for using FreeU.")
584-
self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
585-
586-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
587-
def disable_freeu(self):
588-
"""Disables the FreeU mechanism if enabled."""
589-
self.unet.disable_freeu()
590-
591532
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
592533
def prepare_extra_step_kwargs(self, generator, eta):
593534
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature

examples/community/pipeline_stable_diffusion_xl_ipex.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -267,39 +267,6 @@ def __init__(
267267
else:
268268
self.watermark = None
269269

270-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
271-
def enable_vae_slicing(self):
272-
r"""
273-
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
274-
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
275-
"""
276-
self.vae.enable_slicing()
277-
278-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
279-
def disable_vae_slicing(self):
280-
r"""
281-
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
282-
computing decoding in one step.
283-
"""
284-
self.vae.disable_slicing()
285-
286-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
287-
def enable_vae_tiling(self):
288-
r"""
289-
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
290-
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
291-
processing larger images.
292-
"""
293-
self.vae.enable_tiling()
294-
295-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
296-
def disable_vae_tiling(self):
297-
r"""
298-
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
299-
computing decoding in one step.
300-
"""
301-
self.vae.disable_tiling()
302-
303270
def encode_prompt(
304271
self,
305272
prompt: str,
@@ -701,34 +668,6 @@ def upcast_vae(self):
701668
self.vae.decoder.conv_in.to(dtype)
702669
self.vae.decoder.mid_block.to(dtype)
703670

704-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_freeu
705-
def enable_freeu(self, s1: float, s2: float, b1: float, b2: float):
706-
r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497.
707-
708-
The suffixes after the scaling factors represent the stages where they are being applied.
709-
710-
Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values
711-
that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL.
712-
713-
Args:
714-
s1 (`float`):
715-
Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to
716-
mitigate "oversmoothing effect" in the enhanced denoising process.
717-
s2 (`float`):
718-
Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to
719-
mitigate "oversmoothing effect" in the enhanced denoising process.
720-
b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features.
721-
b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features.
722-
"""
723-
if not hasattr(self, "unet"):
724-
raise ValueError("The pipeline must have `unet` for using FreeU.")
725-
self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2)
726-
727-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_freeu
728-
def disable_freeu(self):
729-
"""Disables the FreeU mechanism if enabled."""
730-
self.unet.disable_freeu()
731-
732671
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
733672
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
734673
"""

src/diffusers/models/unets/unet_3d_condition.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from ..attention_processor import (
2828
ADDED_KV_ATTENTION_PROCESSORS,
2929
CROSS_ATTENTION_PROCESSORS,
30+
Attention,
3031
AttentionProcessor,
3132
AttnAddedKVProcessor,
3233
AttnProcessor,
@@ -503,6 +504,44 @@ def disable_freeu(self):
503504
if hasattr(upsample_block, k) or getattr(upsample_block, k, None) is not None:
504505
setattr(upsample_block, k, None)
505506

507+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.fuse_qkv_projections
508+
def fuse_qkv_projections(self):
509+
"""
510+
Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
511+
key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
512+
513+
<Tip warning={true}>
514+
515+
This API is 🧪 experimental.
516+
517+
</Tip>
518+
"""
519+
self.original_attn_processors = None
520+
521+
for _, attn_processor in self.attn_processors.items():
522+
if "Added" in str(attn_processor.__class__.__name__):
523+
raise ValueError("`fuse_qkv_projections()` is not supported for models having added KV projections.")
524+
525+
self.original_attn_processors = self.attn_processors
526+
527+
for module in self.modules():
528+
if isinstance(module, Attention):
529+
module.fuse_projections(fuse=True)
530+
531+
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unfuse_qkv_projections
532+
def unfuse_qkv_projections(self):
533+
"""Disables the fused QKV projection if enabled.
534+
535+
<Tip warning={true}>
536+
537+
This API is 🧪 experimental.
538+
539+
</Tip>
540+
541+
"""
542+
if self.original_attn_processors is not None:
543+
self.set_attn_processor(self.original_attn_processors)
544+
506545
# Copied from diffusers.models.unets.unet_2d_condition.UNet2DConditionModel.unload_lora
507546
def unload_lora(self):
508547
"""Unloads LoRA weights."""

0 commit comments

Comments
 (0)