Notebook for running stable duffusion image generation.

Models are being loaded from:
* https://huggingface.co/madebyollin/sdxl-vae-fp16-fix
* https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0

In [None]:
import torch
from diffusers import DiffusionPipeline, AutoencoderKL

from dreambooth.src import utils

In [None]:
device = "cuda"

# fp16 cannot be run on CPU
dtype = torch.float16 if device == "cuda" else torch.float32

## Generate images with the stable diffusion xl base model

In [None]:
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", 
    torch_dtype=dtype
)

pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=dtype,
    variant="fp16",
    use_safetensors=True
)
pipe.to(device)

In [None]:
prompt = "A woman wearing cowboy boots is running on the ocean."

sd_output = pipe(
    prompt=prompt, 
    num_inference_steps=25, 
    num_images_per_prompt=1
)

In [None]:
utils.show_images(sd_output.images)

## Add a refiner to improve image generation quality

In [None]:
refiner = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    vae=vae,
    torch_dtype=dtype,
    variant="fp16",
    use_safetensors=True
)
refiner.to(device)

In [None]:
sd_output_latent = pipe(
    prompt=prompt,
    num_inference_steps=40,
    num_images_per_prompt=1,
    denoising_end=0.7,
    output_type="latent"
)

refiner_output = refiner(
    prompt=prompt,
    num_inference_steps=n_steps,
    denoising_start=high_noise_frac,
    image=sd_output_latent.images
)

In [None]:
refiner_output.images[0]