# Loading the data

https://www.kaggle.com/datasets/jhoward/lsun_bedroom/data

@misc{yu2016lsun,
      title={LSUN: Construction of a Large-scale Image Dataset using Deep Learning with Humans in the Loop}, 
      author={Fisher Yu and Ari Seff and Yinda Zhang and Shuran Song and Thomas Funkhouser and Jianxiong Xiao},
      year={2016},
      eprint={1506.03365},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

In [None]:
import os
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import models
from torchvision.transforms import v2
from torchvision.datasets import ImageFolder

from tqdm import tqdm

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
# data_path = './data/subset'
data_path = './data/data0/lsun/bedroom'

In [4]:
# image size after transformations
image_size = 64

simple_load = v2.Compose([
    v2.Resize((image_size, image_size)),
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Normalize([0.5], [0.5]),
])

# Training the models

### DDPM

https://huggingface.co/docs/diffusers/en/tutorials/basic_training

In [5]:
from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel
from diffusers.utils import make_image_grid
from datasets import load_dataset

In [6]:
dataset = load_dataset(data_path, split='train')
dataset.set_transform(simple_load)

Resolving data files:   0%|          | 0/303125 [00:00<?, ?it/s]

In [7]:
unet_model = UNet2DModel(
    sample_size=image_size,  # the target image resolution
    in_channels=3,  # the number of input channels, 3 for RGB images
    out_channels=3,  # the number of output channels
    layers_per_block=2,  # how many ResNet layers to use per UNet block
    block_out_channels=(128, 128, 256, 256, 512, 512),  # the number of output channels for each UNet block
    down_block_types=(
        "DownBlock2D",  # a regular ResNet downsampling block
        "DownBlock2D",
        "DownBlock2D",
        "DownBlock2D",
        "AttnDownBlock2D",  # a ResNet downsampling block with spatial self-attention
        "DownBlock2D",
    ),
    up_block_types=(
        "UpBlock2D",  # a regular ResNet upsampling block
        "AttnUpBlock2D",  # a ResNet upsampling block with spatial self-attention
        "UpBlock2D",
        "UpBlock2D",
        "UpBlock2D",
        "UpBlock2D",
    ),
).to(device)

In [8]:
def evaluate(epoch, pipeline, save_name: str = 'ddpm_training', random_state: int | None = None):
    # Sample some images from random noise (this is the backward diffusion process).
    # The default pipeline output type is `List[PIL.Image]`
    images = pipeline(
        batch_size=16,
        generator=torch.manual_seed(random_state),
    ).images

    # Make a grid out of the images
    image_grid = make_image_grid(images, rows=4, cols=4)

    # Save the images
    test_dir = os.path.join('saved', save_name)
    os.makedirs(test_dir, exist_ok=True)
    image_grid.save(f"{test_dir}/{epoch:04d}.png")

In [9]:
def train_unet(model, n_epochs: int, noise_scheduler, optimizer, dataset, batch_size: int = 16, start_epoch: int = 0,
               save_every: int = 1, save_name: str = 'ddpm_training'):
    data_loader = DataLoader(dataset, batch_size)

    for i in range(start_epoch, n_epochs + start_epoch):
        epoch_no = i + 1
        print(f'Starting epoch {epoch_no}...')

        for batch in tqdm(data_loader):

            # move to cuda/cpu
            clean_images = batch["image"].to(device)

            # Sample noise to add to the images
            noise = torch.randn(clean_images.shape, device=device)
            bs = clean_images.shape[0]

            # Sample a random timestep for each image
            timesteps = torch.randint(
                0, noise_scheduler.config.num_train_timesteps, (bs,), device=device,
                dtype=torch.int64
            )

            # Add noise to the clean images according to the noise magnitude at each timestep
            # (this is the forward diffusion process)
            noisy_images = noise_scheduler.add_noise(clean_images, noise, timesteps)

            # Predict the noise residual
            noise_pred = model(noisy_images, timesteps, return_dict=False)[0]
            loss = F.mse_loss(noise_pred, noise)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        # evaluate every nth epoch
        if epoch_no % save_every == 0:
            print(f'Evaluating after epoch {epoch_no}...')
            # save model
            save_dir = f'saved/{save_name}'
            os.makedirs(save_dir, exist_ok=True)
            model.save_pretrained(f'{save_dir}/{epoch_no:04d}_model')
            # evaluate and save images
            pipeline = DDPMPipeline(unet=model, scheduler=noise_scheduler)
            evaluate(epoch_no, pipeline, save_name, random_state=epoch_no)
        
        # cooldown
        time.sleep(60 * 5)

In [10]:
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

Testing learning rate $10^{-3}$ 

In [2]:
train_unet(
    unet_model, 
    n_epochs=5, 
    noise_scheduler=noise_scheduler, 
    optimizer=optim.Adam(unet_model.parameters()), # default lr = 0.001
    dataset=dataset, 
    batch_size=16,
    save_every=1,
    save_name='ddpm_training_lr1e-3',
)

Testing learning rate $10^{-4}$ 

In [11]:
train_unet(
    unet_model, 
    n_epochs=20, 
    noise_scheduler=noise_scheduler, 
    optimizer=optim.Adam(unet_model.parameters(), lr=0.0001), 
    dataset=dataset, 
    batch_size=16,
    save_every=1,
    save_name='ddpm_training_lr1e-4',
)

Starting epoch 1...


  0%|          | 84/18946 [00:44<2:46:44,  1.89it/s]

Testing learning rate $10^{-5}$ 

In [None]:
train_unet( 
    unet_model, 
    n_epochs=10, 
    noise_scheduler=noise_scheduler, 
    optimizer=optim.Adam(unet_model.parameters(), lr=1e-5), 
    dataset=dataset, 
    batch_size=16,
    save_every=1,
    save_name='ddpm_training_lr1e-5',
)

### Gaussian Diffusion

In [None]:
# from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer

In [None]:
# model = Unet(
#     dim = 64,
#     dim_mults = (1, 2, 4, 8),
# ).cuda()

In [None]:
# diffusion = GaussianDiffusion(
#     model,
#     image_size = 128,
#     timesteps = 1000,   # number of steps
# ).cuda()

In [None]:
# trainer = Trainer(
#     diffusion,
#     data_path,
#     train_batch_size=16,
#     train_lr=2e-5,
#     train_num_steps=1,         # total training steps
#     gradient_accumulate_every=2,    # gradient accumulation steps
#     ema_decay=0.995,                # exponential moving average decay
#     amp=True                        # turn on mixed precision
# ).cuda()

In [None]:
# trainer.train()

In [None]:
### GAN