# Downsample images

In [3]:
# Cell 1: Imports
from PIL import Image
import os
from tqdm import tqdm

# Cell 2: Function Definition
def downsample_images(input_dir, output_dir, target_size=(128, 128)):
    """
    Downsamples all images in input_dir (e sue sottocartelle) a target_size
    e ricrea la struttura di directory corrispondente in output_dir.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    for root, dirs, files in os.walk(input_dir):
        rel_path = os.path.relpath(root, input_dir)
        target_dir = os.path.join(output_dir, rel_path)
        os.makedirs(target_dir, exist_ok=True)
        
        for filename in tqdm(files, desc=f"Processing {rel_path}"):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                input_path = os.path.join(root, filename)
                output_path = os.path.join(target_dir, filename)
                with Image.open(input_path) as img:
                    img_resized = img.resize(target_size, Image.LANCZOS)
                    img_resized.save(output_path)
    
    print(f"All images from '{input_dir}' have been downsampled to {target_size} and saved under '{output_dir}'.")


# Cell 3: Batch Processing per Split
target_size = (128, 128)
raw_data_root = "./raw_data"
output_root   = "./resized_data/" + f"{target_size[0]}x{target_size[1]}"
os.makedirs(output_root, exist_ok=True)
splits = ["train", "test"]

for split in splits:
    input_dir  = os.path.join(raw_data_root, split)
    output_dir = os.path.join(output_root, split)
    print(f"\n=== Downsampling split '{split}' ===")
    downsample_images(input_dir, output_dir, target_size=(128, 128),)



=== Downsampling split 'train' ===


Processing .: 0it [00:00, ?it/s]
Processing C002: 100%|██████████| 278/278 [00:03<00:00, 81.27it/s]
Processing C004: 100%|██████████| 359/359 [00:04<00:00, 85.60it/s]
Processing C012:   4%|▍         | 14/349 [00:00<00:03, 88.33it/s]


KeyboardInterrupt: 

# Model and Training

## Load the model

In [1]:
import os
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from diffusers import UNet2DModel, DDPMScheduler, DDIMScheduler

ON_COLAB = False  # Set to True if running on Google Colab

# Configuration
target_size_for_training = (128, 128)
data_root = f"./resized_data/{target_size_for_training[0]}x{target_size_for_training[1]}"  # root directory containing 'train' and 'test' subfolders
if ON_COLAB:
    from google.colab import drive

    drive.mount("/content/drive")
    data_root = "/content/drive/MyDrive/Colab Notebooks/128x128_images"  # Adjust this path as needed

train_dir = os.path.join(data_root, "train")
test_dir = os.path.join(data_root, "test")
model_save_dir = "checkpoints"
os.makedirs(model_save_dir, exist_ok=True)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LEARNING_RATE = 1e-4
IMAGE_SIZE = target_size_for_training[0]


# 2. Define the UNet diffusion model
from model_enrico import get_unet_model

model = get_unet_model(
    sample_size=IMAGE_SIZE,
    in_channels=1,
    out_channels=1,
    layers_per_block=2,
    block_out_channels=(64, 128, 256, 512),
    down_block_types=("DownBlock2D", "DownBlock2D", "DownBlock2D", "AttnDownBlock2D"),
    up_block_types=("AttnUpBlock2D", "UpBlock2D", "UpBlock2D", "UpBlock2D"),
).to(DEVICE)

# 4. Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)


# 5. Load the model checkpoint if available
def load_checkpoint(ckpt_path, model, optimizer=None, device=torch.device("cpu")):
    loaded = False
    if not os.path.exists(ckpt_path):
        print(f"No checkpoint found at {ckpt_path}, starting fresh.")
        return loaded, model, optimizer, 0

    checkpoint = torch.load(ckpt_path, map_location=device)
    model.load_state_dict(checkpoint["model"])
    model.to(device)
    loaded = True
    start_epoch = checkpoint.get("epoch", 0)
    if optimizer is not None and "optimizer" in checkpoint:
        optimizer.load_state_dict(checkpoint["optimizer"])
    print(f"Loaded checkpoint '{ckpt_path}' (epoch {start_epoch})")
    return loaded, model, optimizer, start_epoch


# Load the model checkpoint
ckpt = "checkpoints/ddim_unet_epoch5.pth"
isModelLoadedFromCheckpoint, model, optimizer, start_epoch = load_checkpoint(
    ckpt, model, optimizer, device=DEVICE
)
model.eval()
if isModelLoadedFromCheckpoint:
    print(f"Model {ckpt.split('/')[-1]} loaded and moved to {DEVICE}.")
else:
    print(f"Model {ckpt.split('/')[-1]} not found. Starting from scratch, loaded on {DEVICE}.")

No checkpoint found at checkpoints/ddim_unet_epoch5.pth, starting fresh.
Model ddim_unet_epoch5.pth not found. Starting from scratch, loaded on cuda.


## Training and validation

In [None]:
BATCH_SIZE = 16
NUM_EPOCHS = 5
NUM_TRAIN_TIMESTEPS = 1000

# 1. Data transforms and datasets
def get_transforms(image_size):
    return transforms.Compose(
        [
            transforms.Grayscale(num_output_channels=1),
            transforms.Resize((image_size, image_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5]),  # scale to [-1,1]
        ]
    )


train_dataset = datasets.ImageFolder(
    root=train_dir, transform=get_transforms(IMAGE_SIZE)
)
test_dataset = datasets.ImageFolder(root=test_dir, transform=get_transforms(IMAGE_SIZE))

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True
)
test_loader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True
)

# 3. Schedulers
noise_scheduler = DDPMScheduler(num_train_timesteps=NUM_TRAIN_TIMESTEPS)
ddim_scheduler = DDIMScheduler(
    beta_start=noise_scheduler.config.beta_start,
    beta_end=noise_scheduler.config.beta_end,
    beta_schedule=noise_scheduler.config.beta_schedule,
    clip_sample=False
)

# 5. Training + Validation Loop
print(f"Training on {DEVICE} with batch size {BATCH_SIZE} for {NUM_EPOCHS} epochs.")
for epoch in range(1, NUM_EPOCHS + 1):
    model.train()
    train_loss = 0.0
    for step, (images, _) in enumerate(train_loader, 1):
        images = images.to(DEVICE)
        batch_size = images.size(0)

        # sample random noise and timesteps
        noise = torch.randn_like(images)
        timesteps = torch.randint(0, NUM_TRAIN_TIMESTEPS, (batch_size,), device=DEVICE)

        # add noise
        noisy_images = noise_scheduler.add_noise(images, noise, timesteps)

        # predict noise
        noise_pred = model(noisy_images, timesteps).sample

        # loss
        loss = F.mse_loss(noise_pred, noise)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 50 == 0 or step == len(train_loader):
            print(f"[Epoch {epoch}/{NUM_EPOCHS} | Step {step}/{len(train_loader)}] Train Loss: {loss.item():.6f}")

    avg_train_loss = train_loss / len(train_loader)
    print(f"==> Epoch {epoch} Done. Avg Train Loss: {avg_train_loss:.6f}")

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(DEVICE)
            batch_size = images.size(0)
            noise = torch.randn_like(images)
            timesteps = torch.randint(0, NUM_TRAIN_TIMESTEPS, (batch_size,), device=DEVICE)
            noisy_images = noise_scheduler.add_noise(images, noise, timesteps)
            noise_pred = model(noisy_images, timesteps).sample
            val_loss += F.mse_loss(noise_pred, noise).item()

    avg_val_loss = val_loss / len(test_loader)
    print(f"==> Epoch {epoch} Done. Avg Validation Loss: {avg_val_loss:.6f}")

    # Save checkpoint
    ckpt_path = os.path.join(model_save_dir, f"ddim_unet_epoch{epoch}.pth")
    torch.save({
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "epoch": epoch,
    }, ckpt_path)

    print(f"Saved checkpoint: {ckpt_path}\n")


Training on cuda with batch size 16 for 5 epochs.


Consider using tensor.detach() first. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\native\Scalar.cpp:23.)
  train_loss += loss.item()


[Epoch 1/5 | Step 50/207] Train Loss: 0.042086


In [4]:

from diffusers import DDPMScheduler, DDIMScheduler

# Imposta il numero di step di inferenza per DDIM (uguale a NUM_TRAIN_TIMESTEPS di default)
# 6. Sampling example with DDIM
noise_scheduler = DDPMScheduler(num_train_timesteps=NUM_TRAIN_TIMESTEPS)

ddim_scheduler = DDIMScheduler(
    beta_start=noise_scheduler.beta_start,
    beta_end=noise_scheduler.beta_end,
    beta_schedule=noise_scheduler.beta_schedule,
    clip_sample=False
)

ddim_scheduler.set_timesteps(NUM_TRAIN_TIMESTEPS)
def sample_images(output_path="result/ddim_sample.png", num_steps=NUM_TRAIN_TIMESTEPS):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    model.eval()
    with torch.no_grad():
        sample = torch.randn((1, 3, IMAGE_SIZE, IMAGE_SIZE), device=DEVICE)
        for t in reversed(range(0, num_steps)):
            noise_pred = model(sample, torch.tensor([t], device=DEVICE)).sample
            sample = ddim_scheduler.step(noise_pred, t, sample).prev_sample

        # denormalize and save
        final = (sample.clamp(-1, 1) + 1) / 2
        transforms.ToPILImage()(final.squeeze(0).cpu()).save(output_path)
        print(f"Sample saved to {output_path}")

sample_images()

NameError: name 'NUM_TRAIN_TIMESTEPS' is not defined