|
26 | 26 | from diffusers.image_processor import VaeImageProcessor |
27 | 27 | from diffusers.loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin |
28 | 28 | from diffusers.models import AutoencoderKL, UNet2DConditionModel |
29 | | -from diffusers.models.attention_processor import FusedAttnProcessor2_0 |
30 | 29 | from diffusers.models.lora import LoRALinearLayer, adjust_lora_scale_text_encoder |
31 | | -from diffusers.pipelines.pipeline_utils import DiffusionPipeline |
| 30 | +from diffusers.pipelines.pipeline_utils import DiffusionPipeline, EfficiencyMixin |
32 | 31 | from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput |
33 | 32 | from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker |
34 | 33 | from diffusers.schedulers import KarrasDiffusionSchedulers |
@@ -415,7 +414,12 @@ def retrieve_timesteps( |
415 | 414 |
|
416 | 415 |
|
417 | 416 | class IPAdapterFaceIDStableDiffusionPipeline( |
418 | | - DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin |
| 417 | + DiffusionPipeline, |
| 418 | + EfficiencyMixin, |
| 419 | + TextualInversionLoaderMixin, |
| 420 | + LoraLoaderMixin, |
| 421 | + IPAdapterMixin, |
| 422 | + FromSingleFileMixin, |
419 | 423 | ): |
420 | 424 | r""" |
421 | 425 | Pipeline for text-to-image generation using Stable Diffusion. |
@@ -727,35 +731,6 @@ def set_ip_adapter_scale(self, scale): |
727 | 731 | if isinstance(attn_processor, (LoRAIPAdapterAttnProcessor, LoRAIPAdapterAttnProcessor2_0)): |
728 | 732 | attn_processor.scale = scale |
729 | 733 |
|
730 | | - def enable_vae_slicing(self): |
731 | | - r""" |
732 | | - Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to |
733 | | - compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. |
734 | | - """ |
735 | | - self.vae.enable_slicing() |
736 | | - |
737 | | - def disable_vae_slicing(self): |
738 | | - r""" |
739 | | - Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to |
740 | | - computing decoding in one step. |
741 | | - """ |
742 | | - self.vae.disable_slicing() |
743 | | - |
744 | | - def enable_vae_tiling(self): |
745 | | - r""" |
746 | | - Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to |
747 | | - compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow |
748 | | - processing larger images. |
749 | | - """ |
750 | | - self.vae.enable_tiling() |
751 | | - |
752 | | - def disable_vae_tiling(self): |
753 | | - r""" |
754 | | - Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to |
755 | | - computing decoding in one step. |
756 | | - """ |
757 | | - self.vae.disable_tiling() |
758 | | - |
759 | 734 | def _encode_prompt( |
760 | 735 | self, |
761 | 736 | prompt, |
@@ -1080,93 +1055,6 @@ def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype |
1080 | 1055 | latents = latents * self.scheduler.init_noise_sigma |
1081 | 1056 | return latents |
1082 | 1057 |
|
1083 | | - def enable_freeu(self, s1: float, s2: float, b1: float, b2: float): |
1084 | | - r"""Enables the FreeU mechanism as in https://arxiv.org/abs/2309.11497. |
1085 | | -
|
1086 | | - The suffixes after the scaling factors represent the stages where they are being applied. |
1087 | | -
|
1088 | | - Please refer to the [official repository](https://github.com/ChenyangSi/FreeU) for combinations of the values |
1089 | | - that are known to work well for different pipelines such as Stable Diffusion v1, v2, and Stable Diffusion XL. |
1090 | | -
|
1091 | | - Args: |
1092 | | - s1 (`float`): |
1093 | | - Scaling factor for stage 1 to attenuate the contributions of the skip features. This is done to |
1094 | | - mitigate "oversmoothing effect" in the enhanced denoising process. |
1095 | | - s2 (`float`): |
1096 | | - Scaling factor for stage 2 to attenuate the contributions of the skip features. This is done to |
1097 | | - mitigate "oversmoothing effect" in the enhanced denoising process. |
1098 | | - b1 (`float`): Scaling factor for stage 1 to amplify the contributions of backbone features. |
1099 | | - b2 (`float`): Scaling factor for stage 2 to amplify the contributions of backbone features. |
1100 | | - """ |
1101 | | - if not hasattr(self, "unet"): |
1102 | | - raise ValueError("The pipeline must have `unet` for using FreeU.") |
1103 | | - self.unet.enable_freeu(s1=s1, s2=s2, b1=b1, b2=b2) |
1104 | | - |
1105 | | - def disable_freeu(self): |
1106 | | - """Disables the FreeU mechanism if enabled.""" |
1107 | | - self.unet.disable_freeu() |
1108 | | - |
1109 | | - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections |
1110 | | - def fuse_qkv_projections(self, unet: bool = True, vae: bool = True): |
1111 | | - """ |
1112 | | - Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query, |
1113 | | - key, value) are fused. For cross-attention modules, key and value projection matrices are fused. |
1114 | | -
|
1115 | | - <Tip warning={true}> |
1116 | | -
|
1117 | | - This API is 🧪 experimental. |
1118 | | -
|
1119 | | - </Tip> |
1120 | | -
|
1121 | | - Args: |
1122 | | - unet (`bool`, defaults to `True`): To apply fusion on the UNet. |
1123 | | - vae (`bool`, defaults to `True`): To apply fusion on the VAE. |
1124 | | - """ |
1125 | | - self.fusing_unet = False |
1126 | | - self.fusing_vae = False |
1127 | | - |
1128 | | - if unet: |
1129 | | - self.fusing_unet = True |
1130 | | - self.unet.fuse_qkv_projections() |
1131 | | - self.unet.set_attn_processor(FusedAttnProcessor2_0()) |
1132 | | - |
1133 | | - if vae: |
1134 | | - if not isinstance(self.vae, AutoencoderKL): |
1135 | | - raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.") |
1136 | | - |
1137 | | - self.fusing_vae = True |
1138 | | - self.vae.fuse_qkv_projections() |
1139 | | - self.vae.set_attn_processor(FusedAttnProcessor2_0()) |
1140 | | - |
1141 | | - # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections |
1142 | | - def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True): |
1143 | | - """Disable QKV projection fusion if enabled. |
1144 | | -
|
1145 | | - <Tip warning={true}> |
1146 | | -
|
1147 | | - This API is 🧪 experimental. |
1148 | | -
|
1149 | | - </Tip> |
1150 | | -
|
1151 | | - Args: |
1152 | | - unet (`bool`, defaults to `True`): To apply fusion on the UNet. |
1153 | | - vae (`bool`, defaults to `True`): To apply fusion on the VAE. |
1154 | | -
|
1155 | | - """ |
1156 | | - if unet: |
1157 | | - if not self.fusing_unet: |
1158 | | - logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.") |
1159 | | - else: |
1160 | | - self.unet.unfuse_qkv_projections() |
1161 | | - self.fusing_unet = False |
1162 | | - |
1163 | | - if vae: |
1164 | | - if not self.fusing_vae: |
1165 | | - logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.") |
1166 | | - else: |
1167 | | - self.vae.unfuse_qkv_projections() |
1168 | | - self.fusing_vae = False |
1169 | | - |
1170 | 1058 | # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding |
1171 | 1059 | def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): |
1172 | 1060 | """ |
|
0 commit comments