From a009f1d1fe03fe622b57de5e53cbe283257f91ec Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Sat, 25 Mar 2023 09:37:05 +0530
Subject: [PATCH 1/3] improve stable unclip doc.

---
 .../source/en/api/pipelines/stable_unclip.mdx | 58 +++++++++++++++----
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/docs/source/en/api/pipelines/stable_unclip.mdx b/docs/source/en/api/pipelines/stable_unclip.mdx
index c8b5d58705ba..372242ae2dff 100644
--- a/docs/source/en/api/pipelines/stable_unclip.mdx
+++ b/docs/source/en/api/pipelines/stable_unclip.mdx
@@ -42,12 +42,9 @@ Coming soon!
 ### Text guided Image-to-Image Variation
 
 ```python
-import requests
-import torch
-from PIL import Image
-from io import BytesIO
-
 from diffusers import StableUnCLIPImg2ImgPipeline
+from diffusers.utils import load_image
+import torch
 
 pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
     "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16"
@@ -55,12 +52,10 @@ pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
 pipe = pipe.to("cuda")
 
 url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png"
-
-response = requests.get(url)
-init_image = Image.open(BytesIO(response.content)).convert("RGB")
+init_image = load_image(url)
 
 images = pipe(init_image).images
-images[0].save("fantasy_landscape.png")
+images[0].save("variation_image.png")
 ```
 
 Optionally, you can also pass a prompt to `pipe` such as:
@@ -69,7 +64,50 @@ Optionally, you can also pass a prompt to `pipe` such as:
 prompt = "A fantasy landscape, trending on artstation"
 
 images = pipe(init_image, prompt=prompt).images
-images[0].save("fantasy_landscape.png")
+images[0].save("variation_image_two.png")
+```
+
+### Memory optimization
+
+If you are short on GPU memory, you can enable smart CPU offloading so that models that are not needed
+immediately for a computation can be offloaded to CPU:
+
+```python 
+from diffusers import StableUnCLIPImg2ImgPipeline
+from diffusers.utils import load_image
+import torch
+
+pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16"
+)
+# Offload to CPU.
+pipe.enable_model_cpu_offload()
+
+url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png"
+init_image = load_image(url)
+
+images = pipe(init_image).images
+images[0]
+```
+
+Further memory optimizations are possible by enabling VAE slicing on the pipeline: 
+
+```python 
+from diffusers import StableUnCLIPImg2ImgPipeline
+from diffusers.utils import load_image
+import torch
+
+pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-2-1-unclip", torch_dtype=torch.float16, variation="fp16"
+)
+pipe.enable_model_cpu_offload()
+pipe.enable_vae_slicing()
+
+url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/tarsila_do_amaral.png"
+init_image = load_image(url)
+
+images = pipe(init_image).images
+images[0]
 ```
 
 ### StableUnCLIPPipeline

From 43558e34400a3d4a1b70c27971f3d543484773f4 Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 30 Mar 2023 08:14:22 +0530
Subject: [PATCH 2/3] add: entry of StableUnCLIPPipeline to the docs

---
 .../source/en/api/pipelines/stable_unclip.mdx | 41 ++++++++++++++++++-
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/docs/source/en/api/pipelines/stable_unclip.mdx b/docs/source/en/api/pipelines/stable_unclip.mdx
index 372242ae2dff..e79514ee7310 100644
--- a/docs/source/en/api/pipelines/stable_unclip.mdx
+++ b/docs/source/en/api/pipelines/stable_unclip.mdx
@@ -32,12 +32,49 @@ we do not add any additional noise to the image embeddings i.e. `noise_level = 0
 	* [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip)
 	* [stabilityai/stable-diffusion-2-1-unclip-small](https://hf.co/stabilityai/stable-diffusion-2-1-unclip-small)
 * Text-to-image 
-	* Coming soon!
+	* [stabilityai/stable-diffusion-2-1-unclip-small](https://hf.co/stabilityai/stable-diffusion-2-1-unclip-small)
 
 ### Text-to-Image Generation
 
-Coming soon!
+```python
+import torch
+from diffusers import UnCLIPScheduler, DDPMScheduler, StableUnCLIPPipeline
+from diffusers.models import PriorTransformer
+from transformers import CLIPTokenizer, CLIPTextModelWithProjection
+
+prior_model_id = "kakaobrain/karlo-v1-alpha"
+data_type = torch.float16
+prior = PriorTransformer.from_pretrained(prior_model_id, subfolder="prior", torch_dtype=data_type)
+
+prior_text_model_id = "openai/clip-vit-large-patch14"
+prior_tokenizer = CLIPTokenizer.from_pretrained(prior_text_model_id)
+prior_text_model = CLIPTextModelWithProjection.from_pretrained(prior_text_model_id, torch_dtype=data_type)
+prior_scheduler = UnCLIPScheduler.from_pretrained(prior_model_id, subfolder="prior_scheduler")
+prior_scheduler = DDPMScheduler.from_config(prior_scheduler.config)
+
+stable_unclip_model_id = "stabilityai/stable-diffusion-2-1-unclip-small"
+
+pipe = StableUnCLIPPipeline.from_pretrained(
+    stable_unclip_model_id,
+    torch_dtype=data_type,
+    variant="fp16",
+    prior_tokenizer=prior_tokenizer,
+    prior_text_encoder=prior_text_model,
+    prior=prior,
+    prior_scheduler=prior_scheduler,
+)
+
+pipe = pipe.to("cuda")
+wave_prompt = "dramatic wave, the Oceans roar, Strong wave spiral across the oceans as the waves unfurl into roaring crests; perfect wave form; perfect wave shape; dramatic wave shape; wave shape unbelievable; wave; wave shape spectacular"
+
+images = pipe(prompt=wave_prompt).images
+images[0].save("waves.png")
+```
+<Tip warning={true}>
+
+Note that the details of using [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) is unclear to us as of now, so we don't recommend its use. 
 
+</Tip>
 
 ### Text guided Image-to-Image Variation
 

From 82413dcb44403b8c6b3aa941e603d2769c7684de Mon Sep 17 00:00:00 2001
From: Sayak Paul <spsayakpaul@gmail.com>
Date: Thu, 30 Mar 2023 14:27:50 +0530
Subject: [PATCH 3/3] Apply suggestions from code review

Co-authored-by: apolinario <joaopaulo.passos@gmail.com>
---
 docs/source/en/api/pipelines/stable_unclip.mdx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/en/api/pipelines/stable_unclip.mdx b/docs/source/en/api/pipelines/stable_unclip.mdx
index e79514ee7310..ee359d0ba486 100644
--- a/docs/source/en/api/pipelines/stable_unclip.mdx
+++ b/docs/source/en/api/pipelines/stable_unclip.mdx
@@ -35,6 +35,7 @@ we do not add any additional noise to the image embeddings i.e. `noise_level = 0
 	* [stabilityai/stable-diffusion-2-1-unclip-small](https://hf.co/stabilityai/stable-diffusion-2-1-unclip-small)
 
 ### Text-to-Image Generation
+Stable unCLIP can be leveraged for text-to-image generation by pipelining it with the prior model of KakaoBrain's open source DALL-E 2 replication [Karlo](https://huggingface.co/kakaobrain/karlo-v1-alpha)
 
 ```python
 import torch
@@ -72,7 +73,7 @@ images[0].save("waves.png")
 ```
 <Tip warning={true}>
 
-Note that the details of using [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) is unclear to us as of now, so we don't recommend its use. 
+For text-to-image we use `stabilityai/stable-diffusion-2-1-unclip-small` as it was trained on CLIP ViT-L/14 embedding, the same as the Karlo model prior. [stabilityai/stable-diffusion-2-1-unclip](https://hf.co/stabilityai/stable-diffusion-2-1-unclip) was trained on OpenCLIP ViT-H, so we don't recommend its use. 
 
 </Tip>