In [10]:
import torch
from torch import nn
from torch.utils.data import Dataset
import os
from PIL import Image
from torchvision import transforms

In [11]:
class Generator(nn.Module):
    def __init__(self, latent_dim: int = 100, ngf: int = 64, channels: int = 3):
        super().__init__()
        # input Z => project to 4x4 feature map
        self.net = nn.Sequential(
            # input z: (N, latent_dim, 1, 1) after unsqueeze
            nn.ConvTranspose2d(latent_dim, ngf * 8, kernel_size=4, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),

            # state size: (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),

            # 8x8
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),

            # 16x16
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),

            # 32x32
            nn.ConvTranspose2d(ngf, channels, 4, 2, 1, bias=False),
            nn.Tanh()
            # output: channels x 64 x 64
        )

    def forward(self, z):
        # z: (N, latent_dim)
        z = z.view(z.size(0), z.size(1), 1, 1)
        return self.net(z)

In [12]:
class Discriminator(nn.Module):
    def __init__(self, ndf: int = 64, channels: int = 3):
        super().__init__()
        self.net = nn.Sequential(
            # input: channels x 64 x 64
            nn.Conv2d(channels, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),

            # final: 4x4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            # output is (N,1,1,1)
            nn.Sigmoid()
        )

    def forward(self, x):
        out = self.net(x)
        return out.view(-1, 1).squeeze(1)  # (N,)

In [13]:
# Define the Custom Dataset
class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None, image_num=-1):
        # Collects image file paths from the root directory, limited to `image_num` images.
        self.image_paths = sorted(
            [os.path.join(root_dir, fname) for fname in os.listdir(root_dir)
             if fname.lower().endswith(('.png', '.jpg', '.jpeg'))]
        )[:image_num]  # Limit to the first `image_num` images
        self.transform = transform # Transformation to apply to images

    def __len__(self):
        # Returns the number of images in the dataset.
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Loads an image by index, converts it to RGB, and applies transformations if provided.
        image = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            image = self.transform(image)
        # Since no actual labels, return 0 as dummy labels
        return image, 0

In [14]:
# Transformation pipeline for data augmentation and normalization
transform = transforms.Compose([
    transforms.Resize(size=(64, 64)),
    # Randomly applies a horizontal flip with 40% probability.
    transforms.RandomApply([
        transforms.RandomHorizontalFlip(),
    ], p=0.4),  
    transforms.ToTensor(), # Converts image to a PyTorch tensor.
    transforms.Normalize(mean=(0.5, 0.5, 0.5),
                         std=(0.5, 0.5, 0.5)), # Normalizes using mean and std
])

In [15]:
dataset = ImageDataset(root_dir="/datasets/delkon/dm_data", transform=transform)

In [16]:
generator = Generator()
discriminator = Discriminator()

In [17]:
from d2lightrainer.UnsupervisedLearning.DCGAN.trainer_config import DCGANTrainerConfig
from d2lightrainer.UnsupervisedLearning.DCGAN.trainer import DCGANTrainer

In [18]:
dcgan_cfg = DCGANTrainerConfig()
new_param_dict = {"device": 3, "save_dir": "runs_test", "batch_size": 16, "nominal_batch_size": 64}
dcgan_cfg.update(**new_param_dict)

In [19]:
dcgan_trainer = DCGANTrainer([generator, discriminator], dataset, dcgan_cfg)
dcgan_trainer.train()

2025-09-08 23:54:22,818 - INFO - Using GPU: 3
2025-09-08 23:54:22,823 - INFO - 'generator optimizer:' Adam(lr=0.0003, momentum=0.5) with parameter groups 4 weight(decay=0.0), 0 weight(decay=0.0), 5 weight(decay=0.0), 4 bias(decay=0.0)
2025-09-08 23:54:22,825 - INFO - 'discriminator optimizer:' Adam(lr=0.0003, momentum=0.5) with parameter groups 3 weight(decay=0.0), 0 weight(decay=1.0000000000000002e-06), 5 weight(decay=0.0001), 3 bias(decay=0.0)
2025-09-08 23:54:22,939 - INFO - --------------------

0/200: 100%|██████████| 269/269 [00:07<00:00, 33.99it/s]
2025-09-08 23:54:30,976 - INFO - Generator: all types `lr` of epoch 0: {'lr/param_group0': np.float64(0.0005402230483271375), 'lr/param_group1': np.float64(5.977695167286245e-05), 'lr/param_group2': np.float64(5.977695167286245e-05), 'lr/param_group3': np.float64(5.977695167286245e-05)}
- lr/param_group0: regular weights (full weight decay applied)
- lr/param_group1: batchnorm and logit_scale parameters (no weight decay)
- lr/param_gr

KeyboardInterrupt: 