# Denoising Diffusion Models

<p>
CAS on Advanced Machine Learning <br>
Data Science Lab, University of Bern, 2024<br>
Prepared by Dr. Mykhailo Vladymyrov.

</p>

This work is licensed under a <a href="https://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>.
Based on the HuggingFace tutorials and reference manual

# Libs installation

In [None]:
pip install transformers diffusers accelerate einops datasets

# 1. Cats generation model inspection

The aim of this tutorial is to understand the principle and components of sample generation using diffusion models

In [None]:
from diffusers import DDPMPipeline
from diffusers import DDPMScheduler, UNet2DModel
import torch
import numpy as np
import matplotlib.pyplot as plt
import einops as eo
import tqdm

In [None]:
def is_iterable(obj):
    if type(obj) == str:
      return False
    try:
        iter(obj)
    except Exception:
        return False
    else:
        return True

def plot_many(ys, xs=None, labels=None, xlabels=None, ylabels=None, titles=None, legend_loc='best', single_plot_sz=6):
    """
    plot many lines in one plot
    """
    n_plots = len(ys)

    def prep_for_n_plots(var, n_plots):
        if var is None:
            return [None] * n_plots
        elif is_iterable(var):
            assert len(var) == n_plots, f'len({var}) != {n_plots}'
            return var
        else:
            return [var] * n_plots

    xs = prep_for_n_plots(xs, n_plots)
    labels = prep_for_n_plots(labels, n_plots)
    xlabels = prep_for_n_plots(xlabels, n_plots)
    ylabels = prep_for_n_plots(ylabels, n_plots)
    titles = prep_for_n_plots(titles, n_plots)
    legend_loc = prep_for_n_plots(legend_loc, n_plots)

    if not is_iterable(single_plot_sz):
        single_plot_sz = [single_plot_sz, single_plot_sz]

    figsize = [single_plot_sz[0] * n_plots, single_plot_sz[1]]

    fig, ax = plt.subplots(nrows=1, ncols=n_plots, figsize=figsize)

    if n_plots==1:
      ax = [ax]

    for axi, x, y, label, xlabel, ylabel, title, loc in zip(ax, xs, ys, labels, xlabels, ylabels, titles, legend_loc):
        if y is None:
            # placeholder for empty plot - to be filled by the caller
            continue

        if is_iterable(y[0]):
            n = len(y)
            x = prep_for_n_plots(x, n)
            label = prep_for_n_plots(label, n)

            for xi, yi, labeli in zip(x, y, label):
                if xi is None:
                  axi.plot(yi, label=labeli)
                else:
                  axi.plot(xi, yi, label=labeli)
            axi.legend(loc=loc)
        else:
            if x is None:
              axi.plot(y, label=label)
            else:
              axi.plot(x, y, label=label)
        axi.set_xlabel(xlabel)
        axi.set_ylabel(ylabel)
        axi.set_title(title)

    return fig, ax

def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')  # use first available GPU
    else:
        device = torch.device('cpu')
    return device

Create a generation pipeline based on a pretrained model on the cat images dataset.

In [None]:
device = get_device()

ddpm = DDPMPipeline.from_pretrained("google/ddpm-cat-256", use_safetensors=True).to(device)

Piplene contains 2 parts: denoising model and the scheduler.
Scheduler is taking care of the values of noise - beta(t), and corresponding updates to the sample.

See the docstreeng for more info.

Call to the pipeline - genarates a sample, and returns `ImagePipelineOutput` - dictionary-like object.

In [None]:
ddpm(num_inference_steps=5)

In [None]:
image = ddpm(num_inference_steps=5).images[0]
image

5 steps is not enough to generate meaningful sample. Try more.

# 2. Components of denoising pipeline

In [None]:
scheduler = DDPMScheduler.from_pretrained("google/ddpm-cat-256")
model = UNet2DModel.from_pretrained("google/ddpm-cat-256", use_safetensors=True).to(device)

In [None]:
scheduler.set_timesteps(20)  # number of diffusion steps

In [None]:
scheduler


In [None]:
scheduler.timesteps

The beta is not constant during all the denoising steps:

In [None]:
plot_many(ys=[
            scheduler.alphas.cpu().numpy(),
            scheduler.alphas_cumprod.cpu().numpy(),
            scheduler.betas.cpu().numpy(),
            scheduler.timesteps.cpu().numpy(),
            scheduler.betas.cpu().numpy() / np.sqrt(1-(scheduler.alphas_cumprod.cpu().numpy())**2)
                ],
          titles=['alphas', 'alphas_cumprod', 'betas', 'timesteps', 'beta/sigma'],
          single_plot_sz=5);

In [None]:
sample_size = model.config.sample_size
noise = torch.randn((1, 3, sample_size, sample_size)).to(device)

In [None]:
plt.figure(figsize=(5, 1))
plt.hist(noise.cpu().numpy().flatten(), 1000);

In [None]:
def to_np_showable(pt_img):
  return (eo.rearrange(pt_img.detach().cpu().numpy()[0], 'c w h -> w h c')/3+.5).clip(0., 1.)

Here we will visualize the evolution of sample and noise steps over denoising course:

In [None]:
x = noise

for t in tqdm.notebook.tqdm(scheduler.timesteps):
  with torch.no_grad():
    mod_out = model(x, t)
    noisy_residual = mod_out.sample  # model predicts noise step

    # scheduler step outputs a dictionary with 2 things:
    # 1-step sample update
    # and extrapolation to fully denoised sample
    ddpm_sched_out_dict = scheduler.step(noisy_residual, t, x)
    previous_noisy_sample = ddpm_sched_out_dict.prev_sample
    pred_orig_sample = ddpm_sched_out_dict.pred_original_sample


    x_np = to_np_showable(x)
    nr_np = to_np_showable(noisy_residual)
    pns_np = to_np_showable(previous_noisy_sample)
    pos_np = to_np_showable(pred_orig_sample)

    fig, ax = plt.subplots(1, 4, figsize=(16, 4))
    for axi, im, ttl in zip(ax,
                            [x_np, nr_np, pns_np, pos_np],
                            ['input image', 'noise step', 'updated image', 'expected final denoised']
                            ):
      axi.imshow(im)
      axi.set_title(ttl)
    plt.suptitle(f't={t.numpy()}')
    plt.show()

    x = previous_noisy_sample

# 3. Exercise:

* Increase step number
* visualize distribution of sample pixels and noise at each step
* Try finding more efficient scheduler