In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [45]:
!pip install -q diffusers transformers ftfy

import os, torch
from PIL import Image
from diffusers import StableDiffusionPipeline
from transformers import CLIPTextModel, CLIPTokenizer

model_path = "camenduru/sd15"
tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(model_path, subfolder="text_encoder", torch_dtype=torch.float16)

def load_learned_embed_in_clip(learned_embeds_path, text_encoder, tokenizer, token=None):
  filename, file_extension = os.path.splitext(learned_embeds_path)
  loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  if (file_extension == ".pt"):
    string_to_token = loaded_learned_embeds['string_to_token']
    string_to_param = loaded_learned_embeds['string_to_param']
    trained_token = list(string_to_token.keys())[0]
    embeds = string_to_param[trained_token]
    embeds = embeds.detach()
    embeds = embeds[1]
  else:
    trained_token = list(loaded_learned_embeds.keys())[0]
    embeds = loaded_learned_embeds[trained_token]

  dtype = text_encoder.get_input_embeddings().weight.dtype
  embeds.to(dtype)
  token = token if token is not None else trained_token
  num_added_tokens = tokenizer.add_tokens(token)
  if num_added_tokens == 0:
    raise ValueError(f"The tokenizer already contains the token {token}.")
  text_encoder.resize_token_embeddings(len(tokenizer))
  token_id = tokenizer.convert_tokens_to_ids(token)
  text_encoder.get_input_embeddings().weight.data[token_id] = embeds

load_learned_embed_in_clip("/content/learned_embeds.bin", text_encoder, tokenizer)
load_learned_embed_in_clip("/content/bad_prompt_version2.pt", text_encoder, tokenizer, "bad_prompt")
load_learned_embed_in_clip("/content/4tnght.bin", text_encoder, tokenizer, "4tNGHT")

In [None]:
pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16, text_encoder=text_encoder, tokenizer=tokenizer).to("cuda")

In [None]:
# with torch.autocast("cuda"):
#   image = pipe("<midjourney-style> house").images[0]
# display(image)

# with torch.autocast("cuda"):
#   image = pipe("hand", negative_prompt="bad_prompt").images[0]
# display(image)

# with torch.autocast("cuda"):
#   image = pipe("hand").images[0]
# display(image)

with torch.autocast("cuda"):
  image = pipe("<4tNGHT>").images[0]
display(image)

In [None]:
def test_learned_embed_in_clip(learned_embeds_path, text_encoder, tokenizer, token=None):
  filename, file_extension = os.path.splitext(learned_embeds_path)
  loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  print(loaded_learned_embeds)

test_learned_embed_in_clip("/content/learned_embeds.bin", text_encoder, tokenizer)
test_learned_embed_in_clip("/content/bad_prompt_version2.pt", text_encoder, tokenizer, "bad_prompt")
test_learned_embed_in_clip("/content/4tnght.pt", text_encoder, tokenizer, "4tNGHT")