In [7]:
# Cell 1: Imports
from PIL import Image
import os
from tqdm import tqdm

# Cell 2: Function Definition
def downsample_images(input_dir, output_dir, target_size=(128, 128)):
    """
    Downsamples all images in input_dir (e sue sottocartelle) a target_size
    e ricrea la struttura di directory corrispondente in output_dir.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    for root, dirs, files in os.walk(input_dir):
        rel_path = os.path.relpath(root, input_dir)
        target_dir = os.path.join(output_dir, rel_path)
        os.makedirs(target_dir, exist_ok=True)
        
        for filename in tqdm(files, desc=f"Processing {rel_path}"):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                input_path = os.path.join(root, filename)
                output_path = os.path.join(target_dir, filename)
                with Image.open(input_path) as img:
                    img_resized = img.resize(target_size, Image.LANCZOS)
                    img_resized.save(output_path)
    
    print(f"All images from '{input_dir}' have been downsampled to {target_size} and saved under '{output_dir}'.")


# Cell 3: Batch Processing per Split
target_size = (128, 128)
raw_data_root = "./raw_data"
output_root   = "./resized_data/" + f"{target_size[0]}x{target_size[1]}"
os.makedirs(output_root, exist_ok=True)
splits = ["train", "test"]

for split in splits:
    input_dir  = os.path.join(raw_data_root, split)
    output_dir = os.path.join(output_root, split)
    print(f"\n=== Downsampling split '{split}' ===")
    downsample_images(input_dir, output_dir, target_size=(128, 128),)



=== Downsampling split 'train' ===


Processing .: 0it [00:00, ?it/s]
Processing C002: 100%|██████████| 278/278 [00:03<00:00, 78.16it/s]
Processing C004: 100%|██████████| 359/359 [00:04<00:00, 76.91it/s]
Processing C012: 100%|██████████| 349/349 [00:04<00:00, 78.33it/s]
Processing C016: 100%|██████████| 317/317 [00:06<00:00, 50.99it/s]
Processing C027: 100%|██████████| 295/295 [00:07<00:00, 40.95it/s]
Processing C030: 100%|██████████| 303/303 [00:06<00:00, 44.35it/s]
Processing C050: 100%|██████████| 344/344 [00:13<00:00, 26.43it/s]
Processing C052: 100%|██████████| 340/340 [00:09<00:00, 36.13it/s]
Processing C067: 100%|██████████| 363/363 [00:12<00:00, 29.42it/s]
Processing C077: 100%|██████████| 358/358 [00:12<00:00, 28.28it/s]


All images from './raw_data\train' have been downsampled to (128, 128) and saved under './resized_data/128x128\train'.

=== Downsampling split 'test' ===


Processing .: 0it [00:00, ?it/s]
Processing C081: 100%|██████████| 327/327 [00:09<00:00, 35.01it/s]

All images from './raw_data\test' have been downsampled to (128, 128) and saved under './resized_data/128x128\test'.





In [None]:
import os
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from diffusers import UNet2DModel, DDPMScheduler, DDIMScheduler

ON_COLAB = False  # Set to True if running on Google Colab

# Configuration
target_size_for_training = (128, 128)
data_root = f"./resized_data/{target_size_for_training[0]}x{target_size_for_training[1]}"  # root directory containing 'train' and 'test' subfolders
if ON_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    data_root = "/content/drive/MyDrive/Colab Notebooks/128x128_images"  # Adjust this path as needed

train_dir = os.path.join(data_root, "train")
test_dir = os.path.join(data_root, "test")
model_save_dir = "checkpoints"
os.makedirs(model_save_dir, exist_ok=True)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 16
NUM_EPOCHS = 5
LEARNING_RATE = 1e-4
IMAGE_SIZE = 128
NUM_TRAIN_TIMESTEPS = 1000

# 1. Data transforms and datasets
def get_transforms(image_size):
    return transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])  # scale to [-1,1]
    ])

train_dataset = datasets.ImageFolder(root=train_dir, transform=get_transforms(IMAGE_SIZE))
test_dataset  = datasets.ImageFolder(root=test_dir,  transform=get_transforms(IMAGE_SIZE))

train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=4, pin_memory=True
)
test_loader = DataLoader(
    test_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=4, pin_memory=True
)

# 2. Define the UNet diffusion model
model = UNet2DModel(
    sample_size=IMAGE_SIZE,
    in_channels=3,
    out_channels=3,
    layers_per_block=2,
    block_out_channels=(64, 128, 256, 512),
    down_block_types=("DownBlock2D", "DownBlock2D", "DownBlock2D", "AttnDownBlock2D"),
    up_block_types=("AttnUpBlock2D", "UpBlock2D", "UpBlock2D", "UpBlock2D"),
).to(DEVICE)

# 3. Schedulers
noise_scheduler = DDPMScheduler(num_train_timesteps=NUM_TRAIN_TIMESTEPS)
ddim_scheduler = DDIMScheduler(
    beta_start=noise_scheduler.beta_start,
    beta_end=noise_scheduler.beta_end,
    beta_schedule=noise_scheduler.beta_schedule,
    clip_sample=False
)

# 4. Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 5. Training + Validation Loop
for epoch in range(1, NUM_EPOCHS + 1):
    model.train()
    train_loss = 0.0
    for step, (images, _) in enumerate(train_loader, 1):
        images = images.to(DEVICE)
        batch_size = images.size(0)

        # sample random noise and timesteps
        noise = torch.randn_like(images)
        timesteps = torch.randint(0, NUM_TRAIN_TIMESTEPS, (batch_size,), device=DEVICE)

        # add noise
        noisy_images = noise_scheduler.add_noise(images, noise, timesteps)

        # predict noise
        noise_pred = model(noisy_images, timesteps).sample

        # loss
        loss = F.mse_loss(noise_pred, noise)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 50 == 0 or step == len(train_loader):
            print(f"[Epoch {epoch}/{NUM_EPOCHS} | Step {step}/{len(train_loader)}] Train Loss: {loss.item():.6f}")

    avg_train_loss = train_loss / len(train_loader)
    print(f"==> Epoch {epoch} Done. Avg Train Loss: {avg_train_loss:.6f}")

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(DEVICE)
            batch_size = images.size(0)
            noise = torch.randn_like(images)
            timesteps = torch.randint(0, NUM_TRAIN_TIMESTEPS, (batch_size,), device=DEVICE)
            noisy_images = noise_scheduler.add_noise(images, noise, timesteps)
            noise_pred = model(noisy_images, timesteps).sample
            val_loss += F.mse_loss(noise_pred, noise).item()

    avg_val_loss = val_loss / len(test_loader)
    print(f"==> Epoch {epoch} Done. Avg Validation Loss: {avg_val_loss:.6f}")

    # Save checkpoint
    ckpt_path = os.path.join(model_save_dir, f"ddim_unet_epoch{epoch}.pth")
    torch.save(model.state_dict(), ckpt_path)
    print(f"Saved checkpoint: {ckpt_path}\n")


  from .autonotebook import tqdm as notebook_tqdm
  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)
  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)
  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)


KeyboardInterrupt: 

In [None]:
# 6. Sampling example with DDIM
def sample_images(output_path="ddim_sample.png", num_steps=NUM_TRAIN_TIMESTEPS):
    model.eval()
    with torch.no_grad():
        sample = torch.randn((1, 3, IMAGE_SIZE, IMAGE_SIZE), device=DEVICE)
        for t in reversed(range(0, num_steps)):
            noise_pred = model(sample, torch.tensor([t], device=DEVICE)).sample
            sample = ddim_scheduler.step(noise_pred, t, sample).prev_sample

        # denormalize and save
        final = (sample.clamp(-1, 1) + 1) / 2
        transforms.ToPILImage()(final.squeeze(0).cpu()).save(output_path)
        print(f"Sample saved to {output_path}")

sample_images()