[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/notebooks/blob/main/camenduru's_webui_to_diffusers_embeddings.ipynb)

In [None]:
!pip install -q torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 torchtext==0.14.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu116 -U
!pip install -q diffusers transformers ftfy accelerate
!pip install -q https://github.com/camenduru/stable-diffusion-webui-colab/releases/download/0.0.15/xformers-0.0.15.dev0+189828c.d20221207-cp38-cp38-linux_x86_64.whl

import os, torch
from PIL import Image
from diffusers import StableDiffusionPipeline
from transformers import CLIPTextModel, CLIPTokenizer

model_path = "lilpotat/a3"
tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(model_path, subfolder="text_encoder")

def load_learned_embed_in_clip(learned_embeds_path, text_encoder, tokenizer, token=None, type=0):
  # filename, file_extension = os.path.splitext(learned_embeds_path)
  loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  # if (file_extension == ".pt"):
  if(type == 0):
    trained_token = list(loaded_learned_embeds.keys())[0]
    embeds = loaded_learned_embeds[trained_token]
  elif(type == 1):
    string_to_token = loaded_learned_embeds['string_to_token']
    string_to_param = loaded_learned_embeds['string_to_param']
    trained_token = list(string_to_token.keys())[0]
    embeds = string_to_param[trained_token]
    embeds = embeds.detach()
    embeds = embeds[1]
  elif(type == 2):
    embeds = loaded_learned_embeds[0]
  dtype = text_encoder.get_input_embeddings().weight.dtype
  embeds.to(dtype)
  token = token if token is not None else trained_token
  num_added_tokens = tokenizer.add_tokens(token)
  if num_added_tokens == 0:
    raise ValueError(f"The tokenizer already contains the token {token}.")
  text_encoder.resize_token_embeddings(len(tokenizer))
  token_id = tokenizer.convert_tokens_to_ids(token)
  text_encoder.get_input_embeddings().weight.data[token_id] = embeds

In [None]:
!wget https://huggingface.co/datasets/Nerfgun3/bad_prompt/resolve/main/bad_prompt_version2.pt
!wget https://github.com/MushroomFleet/djz-Aesthetic-Embeddings/raw/main/djz-CyberCityV0.pt
!wget https://gitgud.io/viper1/stable-diffusion-embeddings/-/raw/master/embeddings/botan-50000.pt
!wget https://raw.githubusercontent.com/hlky/sd-embeddings/main/anya/anya.pt
!wget https://huggingface.co/datasets/Nerfgun3/bad_prompt/resolve/main/bad_prompt.pt
!wget https://huggingface.co/sd-concepts-library/midjourney-style/resolve/main/learned_embeds.bin

In [None]:
# load_learned_embed_in_clip("/content/bad_prompt.pt", text_encoder, tokenizer, "bad_prompt", type=1)
# load_learned_embed_in_clip("/content/djz-CyberCityV0.pt", text_encoder, tokenizer, "CyberCityV0", type=2)
# load_learned_embed_in_clip("/content/botan-50000.pt", text_encoder, tokenizer, "botan-50000", type=2)
load_learned_embed_in_clip("/content/learned_embeds.bin", text_encoder, tokenizer, type=0)
# load_learned_embed_in_clip("/content/4tnght.pt", text_encoder, tokenizer, "<4tNGHT>", type=0)

In [None]:
pipe = StableDiffusionPipeline.from_pretrained(model_path, text_encoder=text_encoder, tokenizer=tokenizer, safety_checker=None).to("cuda")
pipe.enable_xformers_memory_efficient_attention()

In [None]:
# with torch.autocast("cuda"):
#   image = pipe("<midjourney-style> house").images[0]
# display(image)

# with torch.autocast("cuda"):
#   image = pipe("<4tNGHT>").images[0]
# display(image)

# with torch.autocast("cuda"):
#   image = pipe("girl waiving", negative_prompt="bad_prompt").images[0]
# display(image)
 
generator = torch.cuda.manual_seed(10)
with torch.autocast("cuda"):
  image = pipe("cute girl waving to camera <midjourney-style>", negative_prompt="", generator=generator).images[0]
real_seed = torch.cuda.initial_seed()
display(image)
print(real_seed)

In [None]:
def test_learned_embed_in_clip(learned_embeds_path):
  loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  trained_token = list(loaded_learned_embeds.keys())[0]
  embeds = loaded_learned_embeds[trained_token]
  # print(loaded_learned_embeds)
  print(embeds.shape)

# test_learned_embed_in_clip("/content/anya.pt")
# test_learned_embed_in_clip("/content/botan-50000.pt")
# test_learned_embed_in_clip("/content/djz-CyberCityV0.pt")
test_learned_embed_in_clip("/content/learned_embeds.bin")
# test_learned_embed_in_clip("/content/bad_prompt_version2.pt")
# test_learned_embed_in_clip("/content/4tnght.pt")

torch.Size([768])
