From a009f1d1fe03fe622b57de5e53cbe283257f91ec Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Sat, 25 Mar 2023 09:37:05 +0530 Subject: [PATCH 1/3] improve stable unclip doc. --- .../source/en/api/pipelines/stable_unclip.mdx | 58 +++++++++++++++---- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/docs/source/en/api/pipelines/stable_unclip.mdx b/docs/source/en/api/pipelines/stable_unclip.mdx index c8b5d58705ba..372242ae2dff 100644 --- a/docs/source/en/api/pipelines/stable_unclip.mdx +++ b/docs/source/en/api/pipelines/stable_unclip.mdx @@ -42,12 +42,9 @@ Coming soon! ### Text guided Image-to-Image Variation ```python -import requests -import torch -from PIL import Image -from io import BytesIO - from diffusers import StableUnCLIPImg2ImgPipeline +from diffusers.utils import load_image +import torch pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16" @@ -55,12 +52,10 @@ pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( pipe = pipe.to("cuda") url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png" - -response = requests.get(url) -init_image = Image.open(BytesIO(response.content)).convert("RGB") +init_image = load_image(url) images = pipe(init_image).images -images[0].save("fantasy_landscape.png") +images[0].save("variation_image.png") ``` Optionally, you can also pass a prompt to `pipe` such as: @@ -69,7 +64,50 @@ Optionally, you can also pass a prompt to `pipe` such as: prompt = "A fantasy landscape, trending on artstation" images = pipe(init_image, prompt=prompt).images -images[0].save("fantasy_landscape.png") +images[0].save("variation_image_two.png") +``` + +### Memory optimization + +If you are short on GPU memory, you can enable smart CPU offloading so that models that are not needed +immediately for a computation can be offloaded to CPU: + +```python +from diffusers import StableUnCLIPImg2ImgPipeline +from diffusers.utils import load_image +import torch + +pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( + "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16" +) +# Offload to CPU. +pipe.enable_model_cpu_offload() + +url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png" +init_image = load_image(url) + +images = pipe(init_image).images +images[0] +``` + +Further memory optimizations are possible by enabling VAE slicing on the pipeline: + +```python +from diffusers import StableUnCLIPImg2ImgPipeline +from diffusers.utils import load_image +import torch + +pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( + "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16" +) +pipe.enable_model_cpu_offload() +pipe.enable_vae_slicing() + +url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png" +init_image = load_image(url) + +images = pipe(init_image).images +images[0] ``` ### StableUnCLIPPipeline From 43558e34400a3d4a1b70c27971f3d543484773f4 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Thu, 30 Mar 2023 08:14:22 +0530 Subject: [PATCH 2/3] add: entry of StableUnCLIPPipeline to the docs --- .../source/en/api/pipelines/stable_unclip.mdx | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/docs/source/en/api/pipelines/stable_unclip.mdx b/docs/source/en/api/pipelines/stable_unclip.mdx index 372242ae2dff..e79514ee7310 100644 --- a/docs/source/en/api/pipelines/stable_unclip.mdx +++ b/docs/source/en/api/pipelines/stable_unclip.mdx @@ -32,12 +32,49 @@ we do not add any additional noise to the image embeddings i.e. `noise_level = 0 * [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) * [stabilityai/stable-diffusion-2-1-unclip-small](https://hf.co/stabilityai/stable-diffusion-2-1-unclip-small) * Text-to-image - * Coming soon! + * [stabilityai/stable-diffusion-2-1-unclip-small](https://hf.co/stabilityai/stable-diffusion-2-1-unclip-small) ### Text-to-Image Generation -Coming soon! +```python +import torch +from diffusers import UnCLIPScheduler, DDPMScheduler, StableUnCLIPPipeline +from diffusers.models import PriorTransformer +from transformers import CLIPTokenizer, CLIPTextModelWithProjection + +prior_model_id = "kakaobrain/karlo-v1-alpha" +data_type = torch.float16 +prior = PriorTransformer.from_pretrained(prior_model_id, subfolder="prior", torch_dtype=data_type) + +prior_text_model_id = "openai/clip-vit-large-patch14" +prior_tokenizer = CLIPTokenizer.from_pretrained(prior_text_model_id) +prior_text_model = CLIPTextModelWithProjection.from_pretrained(prior_text_model_id, torch_dtype=data_type) +prior_scheduler = UnCLIPScheduler.from_pretrained(prior_model_id, subfolder="prior_scheduler") +prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config) + +stable_unclip_model_id = "stabilityai/stable-diffusion-2-1-unclip-small" + +pipe = StableUnCLIPPipeline.from_pretrained( + stable_unclip_model_id, + torch_dtype=data_type, + variant="fp16", + prior_tokenizer=prior_tokenizer, + prior_text_encoder=prior_text_model, + prior=prior, + prior_scheduler=prior_scheduler, +) + +pipe = pipe.to("cuda") +wave_prompt = "dramatic wave, the Oceans roar, Strong wave spiral across the oceans as the waves unfurl into roaring crests; perfect wave form; perfect wave shape; dramatic wave shape; wave shape unbelievable; wave; wave shape spectacular" + +images = pipe(prompt=wave_prompt).images +images[0].save("waves.png") +``` + + +Note that the details of using [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) is unclear to us as of now, so we don't recommend its use. + ### Text guided Image-to-Image Variation From 82413dcb44403b8c6b3aa941e603d2769c7684de Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Thu, 30 Mar 2023 14:27:50 +0530 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: apolinario --- docs/source/en/api/pipelines/stable_unclip.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/en/api/pipelines/stable_unclip.mdx b/docs/source/en/api/pipelines/stable_unclip.mdx index e79514ee7310..ee359d0ba486 100644 --- a/docs/source/en/api/pipelines/stable_unclip.mdx +++ b/docs/source/en/api/pipelines/stable_unclip.mdx @@ -35,6 +35,7 @@ we do not add any additional noise to the image embeddings i.e. `noise_level = 0 * [stabilityai/stable-diffusion-2-1-unclip-small](https://hf.co/stabilityai/stable-diffusion-2-1-unclip-small) ### Text-to-Image Generation +Stable unCLIP can be leveraged for text-to-image generation by pipelining it with the prior model of KakaoBrain's open source DALL-E 2 replication [Karlo](https://huggingface.co/kakaobrain/karlo-v1-alpha) ```python import torch @@ -72,7 +73,7 @@ images[0].save("waves.png") ``` -Note that the details of using [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) is unclear to us as of now, so we don't recommend its use. +For text-to-image we use `stabilityai/stable-diffusion-2-1-unclip-small` as it was trained on CLIP ViT-L/14 embedding, the same as the Karlo model prior. [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) was trained on OpenCLIP ViT-H, so we don't recommend its use.