In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from diffusers import DiffusionPipeline
import torch

device='mps'
pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", variant="fp16", use_safetensors=True, torch_dtype=torch.float16).to(device)

## Directly using the pipeline

In [None]:
image = pipeline(prompt="A cat playing with a ball in the forest", negative_prompt="deformed, ugly",
                 num_inference_steps=10, width=512, height=512).images[0]
image

## Using Compel, no weighting

In [None]:
from compel import CompelForSDXL
compel = CompelForSDXL(pipeline)

conditioning = compel("A cat playing with a ball in the forest")
negative_conditioning = compel("deformed, ugly")
# you could also use batched input:
# conditioning = compel(["A cat playing with a ball in the forest", "deformed, ugly"])
# and then use conditioning.embeds[0:1] for positive and conditioning.embeds[1:2] for negative

image = pipeline(prompt_embeds=conditioning.embeds, pooled_prompt_embeds=conditioning.pooled_embeds,
                 negative_prompt_embeds=negative_conditioning.embeds, negative_pooled_prompt_embeds=negative_conditioning.pooled_embeds,
                 num_inference_steps=10, width=512, height=512).images[0]
image

## Using Compel, with weighting

In [None]:
from compel import CompelForSDXL
compel = CompelForSDXL(pipeline)

conditioning = compel("A cat playing with a ball++ in the forest")
compel = CompelForSDXL(pipeline)
negative_conditioning = compel("deformed, ugly")
# you could also use batched input:
# conditioning = compel(["A cat playing with a ball++ in the forest", "deformed, ugly"])
# and then use conditioning.embeds[0:1] for positive and conditioning.embeds[1:2] for negative

image = pipeline(prompt_embeds=conditioning.embeds, pooled_prompt_embeds=conditioning.pooled_embeds,
                 negative_prompt_embeds=negative_conditioning.embeds, negative_pooled_prompt_embeds=negative_conditioning.pooled_embeds,
                 num_inference_steps=10, width=512, height=512).images[0]
image

## Long prompts

In [None]:
from compel import CompelForSDXL
compel = CompelForSDXL(pipeline)

prompt = "a cat playing with a ball++ in the forest"
negative_prompt = "a long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long long negative prompt"

# use batched input - Compel will automatically pad the shorter main prompt to the length of the longer negative prompt (or vice versa)
# otherwise you'll hae to use `pad_conditioning_tensors_to_same_length` from `compel.utils`
conditioning = compel([prompt, negative_prompt])
print(conditioning.embeds.shape, conditioning.pooled_embeds.shape)

image = pipeline(prompt_embeds=conditioning.embeds[0:1], pooled_prompt_embeds=conditioning.pooled_embeds[0:1],
                 negative_prompt_embeds=conditioning.embeds[1:2], negative_pooled_prompt_embeds=conditioning.pooled_embeds[1:2],
                 num_inference_steps=24, width=768, height=768).images[0]
image

## Sequential cpu offload

In [None]:
from compel import Compel, ReturnedEmbeddingsType

compel = Compel(tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] , 
                text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2], 
                returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, 
                requires_pooled=[False, True],
                device="cuda")

pipeline.enable_sequential_cpu_offload()
prompt = "a cat playing with a ball++ in the forest"
negative_prompt = "deformed, ugly"
conditioning, pooled = compel([prompt, negative_prompt])

image = pipeline(prompt_embeds=conditioning[0:1], pooled_prompt_embeds=pooled[0:1], 
                 negative_prompt_embeds=conditioning[1:2], negative_pooled_prompt_embeds=pooled[1:2],
                 num_inference_steps=24, width=768, height=768).images[0]
image

## Different prompts for different encoders

In [None]:
from compel import CompelForSDXL
compel = CompelForSDXL(pipeline)

main_prompt = "a cat playing with a ball++ in the forest"
style_prompt = "forest ambience, high quality, detailed, intricate, artstation, 8k"

conditioning = compel(main_prompt=main_prompt, style_prompt=style_prompt)

image = pipeline(prompt_embeds=conditioning.embeds, pooled_prompt_embeds=conditioning.pooled_embeds, num_inference_steps=30).images[0]

image


## Full manual control

In [None]:
compel1 = Compel(
    tokenizer=pipeline.tokenizer,
    text_encoder=pipeline.text_encoder,
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=False,
)

compel2 = Compel(
    tokenizer=pipeline.tokenizer_2,
    text_encoder=pipeline.text_encoder_2,
    returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
    requires_pooled=True,
)

conditioning1 = compel1(main_prompt)
conditioning2, pooled = compel2(style_prompt)
conditioning = torch.cat((conditioning1, conditioning2), dim=-1)

image = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, num_inference_steps=30).images[0]
image