Following the hugging face quicktour at https://huggingface.co/docs/diffusers/quicktour.

Before starting, install the requirements via pip: `pip3 install -r requirements.txt`

# Load the model...

In [17]:
from diffusers import DiffusionPipeline
from pprint import pprint

pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", use_safetensor=True)
# pprint(pipeline)

Keyword arguments {'use_safetensor': True} are not expected by StableDiffusionPipeline and will be ignored.
Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00, 12.93it/s]


# Setup for CPU

In [18]:
pipe = pipeline.to("cpu")

# Generate from prompt

In [4]:
prompt = "skinny black and white cat in ralph steadman style"
image = pipe(prompt).images[0]
image.save("generated.png")

  0%|          | 0/50 [00:00<?, ?it/s]

# Trying different schedulers

## EulerDiscreteScheduler

In [3]:
from diffusers import DiffusionPipeline, EulerDiscreteScheduler

pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", use_safetensors=True)
pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
pipe = pipeline.to("cpu")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

# Models

See https://huggingface.co/docs/diffusers/quicktour#models

## UNet2Model

In [12]:
from diffusers import UNet2DModel
from pprint import pprint

repo_id = "google/ddpm-cat-256"

model = UNet2DModel.from_pretrained(repo_id, use_safetensors=True)
#pprint(model.config)

In [13]:
import torch

torch.manual_seed(0)

noisy_sample = torch.randn(1, model.config.in_channels, model.config.sample_size, model.config.sample_size)
pprint(noisy_sample.shape)

torch.Size([1, 3, 256, 256])


In [14]:
with torch.no_grad():
    noisy_residual = model(sample=noisy_sample, timestep=2).sample

In [None]:
from diffusers import DDPMScheduler

scheduler = DDPMScheduler.from_pretrained(repo_id)
#pprint(scheduler)

In [17]:
less_noisy_sample = scheduler.step(model_output=noisy_residual, timestep=2, sample=noisy_sample).prev_sample
pprint(less_noisy_sample.shape)

torch.Size([1, 3, 256, 256])


In [18]:
import PIL.Image
import numpy as np

def display_sample(sample, i):
    image_processed = sample.cpu().permute(0, 2, 3, 1)
    image_processed = (image_processed + 1.0) * 127.5
    image_processed = image_processed.numpy().astype(np.uint8)

    image_pil = PIL.Image.fromarray(image_processed[0])
    display(f"Image at step {i}")
    display(image_pil)

In [19]:
model.to("cpu") # be nice to use cuda but ho hum
noisy_sample = noisy_sample.to("cpu") # ditto

In [None]:
import tqdm

sample = noisy_sample

for i, t in enumerate(tqdm.tqdm(scheduler.timesteps)):
    #1. predict noise residual
    with torch.no_grad():
        residual = model(sample, t).sample

    #2. compute less noisy image and set x_t -> x_t - 1
    sample = scheduler.step(residual, t, sample).prev_sample

    #3. optionally look at image
    if (i + 1) % 50 == 0:
        display_sample(sample, i + 1)