diff --git a/src/diffusers/loaders/textual_inversion.py b/src/diffusers/loaders/textual_inversion.py index f20ad6fba667..e5aeea488407 100644 --- a/src/diffusers/loaders/textual_inversion.py +++ b/src/diffusers/loaders/textual_inversion.py @@ -457,6 +457,8 @@ def load_textual_inversion( def unload_textual_inversion( self, tokens: Optional[Union[str, List[str]]] = None, + tokenizer: Optional["PreTrainedTokenizer"] = None, + text_encoder: Optional["PreTrainedModel"] = None, ): r""" Unload Textual Inversion embeddings from the text encoder of [`StableDiffusionPipeline`] @@ -481,11 +483,28 @@ def unload_textual_inversion( # Remove just one token pipeline.unload_textual_inversion("") + + # Example 3: unload from SDXL + pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") + embedding_path = hf_hub_download(repo_id="linoyts/web_y2k", filename="web_y2k_emb.safetensors", repo_type="model") + + # load embeddings to the text encoders + state_dict = load_file(embedding_path) + + # load embeddings of text_encoder 1 (CLIP ViT-L/14) + pipeline.load_textual_inversion(state_dict["clip_l"], token=["", ""], text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer) + # load embeddings of text_encoder 2 (CLIP ViT-G/14) + pipeline.load_textual_inversion(state_dict["clip_g"], token=["", ""], text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2) + + # Unload explicitly from both text encoders abd tokenizers + pipeline.unload_textual_inversion(tokens=["", ""], text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer) + pipeline.unload_textual_inversion(tokens=["", ""], text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2) + ``` """ - tokenizer = getattr(self, "tokenizer", None) - text_encoder = getattr(self, "text_encoder", None) + tokenizer = tokenizer or getattr(self, "tokenizer", None) + text_encoder = text_encoder or getattr(self, "text_encoder", None) # Get textual inversion tokens and ids token_ids = []