## Setup

In [None]:
!pip install diffusers transformers accelerate safetensors torchvision --upgrade
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --upgrade

In [None]:
import torch
from PIL import Image, ImageFilter

from diffusers import AutoPipelineForImage2Image, AutoPipelineForText2Image
from diffusers import AutoPipelineForInpainting, ControlNetModel
from diffusers import DPMSolverMultistepScheduler, EulerDiscreteScheduler, UniPCMultistepScheduler
from diffusers import StableDiffusionPipeline as SDPipeline
from diffusers import StableDiffusionControlNetPipeline as SDCNPipeline
from diffusers import StableDiffusionImageVariationPipeline as SDIVPipeline

from WK06_utils import StableDiffusionImageVariationProcessor as SDIVProcessor

## [Stable Diffusion](https://huggingface.co/stabilityai/stable-diffusion-2)

Open-source image generation architecture.

In [None]:
pipe = SDPipeline.from_pretrained(
  "runwayml/stable-diffusion-v1-5",
  #"CompVis/stable-diffusion-v1-4",
  #"stabilityai/stable-diffusion-2-1",
  safety_checker=None,
  torch_dtype=torch.float16
).to("cuda")

De-noising is the process of turning random pixels into images:

<img src="./imgs/denoise.jpg" width="450px">

The pipeline scheduler determines the rate of de-noising:

<img src="./imgs/scheduler.jpg" width="450px">

In [None]:
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

In [None]:
out = pipe("an egg walking around on the moon", num_inference_steps=32)
display(out["images"][0])

To generate multiple images and save them:

In [None]:
out = pipe("an egg walking around on the moon", num_inference_steps=16, num_images_per_prompt=4)

for i, img in enumerate(out["images"]):
  display(img)
  img.save(str(i) + ".png")

We can reproduce results by using a controllable random number generator:

In [None]:
generator = torch.Generator().manual_seed(1010)
out = pipe("an egg walking around on the moon", num_inference_steps=32, generator=generator)
display(out["images"][0])

## Control Net

Almost like add-ons/plugins for the Stable-Diffusion architecture.

Expands our options for guiding the model.

#### [Scribble](https://huggingface.co/lllyasviel/sd-controlnet-scribble)

<img src="./imgs/scribble_00.jpg" width="200px">
<img src="./imgs/scribble_01.jpg" width="200px">

#### [Depth](https://huggingface.co/lllyasviel/sd-controlnet-depth)

<img src="./imgs/depth_00.jpg" width="200px">
<img src="./imgs/depth_01.jpg" width="200px">

In [None]:
controlnet = ControlNetModel.from_pretrained(
  "lllyasviel/sd-controlnet-scribble",
  # "lllyasviel/sd-controlnet-depth",
  torch_dtype=torch.float16
)

pipe = SDCNPipeline.from_pretrained(
  "runwayml/stable-diffusion-v1-5",
  controlnet=controlnet,
  safety_checker=None,
  torch_dtype=torch.float16
).to("cuda")

In [None]:
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

In [None]:
im = Image.open("./imgs/scribble_00.jpg").convert("RGB")
out = pipe("person", im, num_inference_steps=20)
display(out["images"][0])

## [SD Turbo](https://huggingface.co/stabilityai/sd-turbo)

Distilled version of Stable-Diffusion.

https://www.reddit.com/r/StableDiffusion/comments/1e6cq09/physical_interfaces_realtime_img2img_diffusion/

In [None]:
pipe = AutoPipelineForText2Image.from_pretrained(
  "stabilityai/sd-turbo",
  # "stabilityai/sdxl-turbo",
  torch_dtype=torch.float16,
  variant="fp16"
).to("cuda")

In [None]:
prompt = "A cinematic shot of a baby racoon wearing an intricate italian priest robe."
out = pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0, num_images_per_prompt=4)
display(out["images"][0])

In [None]:
pipe = AutoPipelineForImage2Image.from_pretrained(
  "stabilityai/sd-turbo",
  # "stabilityai/sdxl-turbo",
  torch_dtype=torch.float16,
  variant="fp16"
).to("cuda")

In [None]:
im = Image.open("./imgs/scribble_01.jpg").convert("RGB")
prompt = "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k"
out = pipe(prompt, image=im, num_inference_steps=2, strength=0.5, guidance_scale=0.0)
display(out["images"][0])

## [Inpainting](https://huggingface.co/docs/diffusers/en/using-diffusers/inpaint)

Only generate parts of an image.

In [None]:
pipe = AutoPipelineForInpainting.from_pretrained(
  #"runwayml/stable-diffusion-inpainting",
  #"stable-diffusion-v1-5/stable-diffusion-inpainting",
  "stabilityai/stable-diffusion-2-inpainting",
  torch_dtype=torch.float16,
  variant="fp16"
).to("cuda")

In [None]:
im = Image.open("./imgs/landscape.jpg")
mask = Image.open("./imgs/landscape_mask.jpg").filter(ImageFilter.GaussianBlur((16,0)))
iw,ih = im.size

display(im)
display(mask)

In [None]:
prompt = "bob ross oil painting of nature landscape with trees, detailed brushstrokes, mountains, lake, rocks"
output = pipe(prompt=prompt, image=im, mask_image=mask, num_inference_steps=24, width=iw, height=ih)
display(output.images[0])

## [Image Variation](https://huggingface.co/lambdalabs/sd-image-variations-diffusers)

Get variations of an image.

In [None]:
pipe = SDIVPipeline.from_pretrained(
  "lambdalabs/sd-image-variations-diffusers",
  revision="v2.0",
  safety_checker=None
).to("cuda")

In [None]:
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
# pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

In [None]:
im = Image.open("./imgs/scribble_01.jpg")
input = SDIVProcessor(im).to("cuda").unsqueeze(0)

In [None]:
out = pipe(input, guidance_scale=3, num_inference_steps=32)
display(out["images"][0])