# Monet Style GAN — Baseline Notebook
This notebook scaffolds a Kaggle submission for the *I’m Something of a Painter Myself* competition. It trains a GAN to synthesise Monet-style paintings at 256×256 resolution and packages 8,000 generated JPEGs into `images.zip`.

**Competition goal:** Translate real-world landscape photos into Claude Monet-inspired artwork by learning a GAN on the official Kaggle dataset. Final submissions are evaluated by the Monet Fréchet Inception Distance (MiFID) metric on hidden test images.

**Dataset source:** Local copy of Kaggle’s `gan-getting-started` bundle extracted under `data/kaggle_raw/monet_jpg` (synchronised from <https://www.kaggle.com/competitions/gan-getting-started/data>). All experiments below read Monet canvases from that directory.

**Citation:** Kaggle. *I’m Something of a Painter Myself* competition. Available at <https://www.kaggle.com/competitions/gan-getting-started>. Accessed 2025.

## Workflow Overview
1. Locate or extract the Monet paintings dataset (from Kaggle’s `gan-getting-started` bundle or a local copy).
2. Build a data pipeline that yields 256×256 RGB tensors with light augmentation.
3. Train a deep DCGAN-style generator/discriminator pair tuned for high-resolution art.
4. Optionally inspect training curves or qualitative samples during experimentation.
5. Generate 8,000 Monet-style images directly into `images.zip` for submission.

In [2]:
%pip install --quiet torch torchvision --index-url https://download.pytorch.org/whl/cpu

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import math
import platform
import random
import zipfile
from pathlib import Path
from typing import Iterable, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, utils
from PIL import Image

from tqdm.auto import tqdm

# Reproducibility helpers
def seed_everything(seed: int = 42) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(2025)

IS_KAGGLE = Path('/kaggle').exists()
WORKING_DIR = Path('/kaggle/working') if IS_KAGGLE else Path.cwd()
PROJECT_ROOT = WORKING_DIR
if not IS_KAGGLE and PROJECT_ROOT.name == 'notebooks':
    PROJECT_ROOT = PROJECT_ROOT.parent

DATA_ROOT_CANDIDATES: List[Path] = []
if IS_KAGGLE:
    DATA_ROOT_CANDIDATES.extend([Path('/kaggle/input/gan-getting-started'), Path('/kaggle/input')])
local_data_dirs = [PROJECT_ROOT / 'data', PROJECT_ROOT / 'input', PROJECT_ROOT / 'datasets']
for local_candidate in local_data_dirs:
    if local_candidate.exists():
        DATA_ROOT_CANDIDATES.append(local_candidate)
DATA_ROOT_CANDIDATES.append(WORKING_DIR)

SYSTEM = platform.system()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  from .autonotebook import tqdm as notebook_tqdm


device(type='cpu')

## Data Pipeline
The helper below searches common Kaggle and local paths for a folder named `monet_jpg`. If only a zip archive is found, it is extracted once into the working directory. Update `CANDIDATE_DATASETS` if you use an alternative source.

In [4]:
CANDIDATE_DATASETS = ['monet_jpg', 'monet', 'monet-paintings']
ARCHIVE_PATTERNS = ['monet_jpg*.zip', 'monet-paintings*.zip']

def locate_monet_images() -> Path:
    for root in DATA_ROOT_CANDIDATES:
        if not root.exists():
            continue
        for name in CANDIDATE_DATASETS:
            candidate = root / name
            if candidate.is_dir() and any(candidate.glob('*.jpg')):
                return candidate
        for match in root.glob('**/monet_jpg'):
            if match.is_dir() and any(match.glob('*.jpg')):
                return match
    archives = []
    for root in DATA_ROOT_CANDIDATES:
        if not root.exists():
            continue
        for pattern in ARCHIVE_PATTERNS:
            archives.extend(root.glob(f'**/{pattern}'))
    if archives:
        extract_root = WORKING_DIR / 'monet-extracted'
        if not extract_root.exists():
            extract_root.mkdir(parents=True, exist_ok=True)
            for archive in archives:
                with zipfile.ZipFile(archive) as zf:
                    zf.extractall(extract_root)
        for match in extract_root.glob('**/monet_jpg'):
            if match.is_dir() and any(match.glob('*.jpg')):
                return match
    raise FileNotFoundError('Unable to locate Monet paintings. Place a monet_jpg/ folder or zip under data/.')

IMAGE_ROOT = locate_monet_images()
IMAGE_ROOT

WindowsPath('d:/MS_in_AI/WK_5_Dog_Gen/WK5_Generative_Dog_Images/data/kaggle_raw/monet_jpg')

In [5]:
BATCH_SIZE = 8
IMAGE_SIZE = 256
NUM_WORKERS = 0 if SYSTEM == 'Windows' else 2

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0), ratio=(0.9, 1.1)) if IS_KAGGLE else transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.15, hue=0.03),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [6]:
class MonetDataset(Dataset):
    def __init__(self, root: Path, transform: transforms.Compose):
        self.files = sorted(str(p) for p in root.glob('*.jpg'))
        if not self.files:
            raise RuntimeError(f'No JPEGs found in {root}.')
        self.transform = transform

    def __len__(self) -> int:
        return len(self.files)

    def __getitem__(self, idx: int):
        path = self.files[idx]
        with Image.open(path) as img:
            img = img.convert('RGB')
        return self.transform(img), 0  # dummy label for compatibility

dataset = MonetDataset(IMAGE_ROOT, train_transform)
data_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=NUM_WORKERS, pin_memory=(device.type == 'cuda'), drop_last=True)
len(dataset)

300

## Exploratory Data Snapshot
Quick visual check of Monet canvases ensures transforms and colour statistics look sensible before training the GAN.

In [None]:
sample_batch, _ = next(iter(data_loader))
grid = utils.make_grid(sample_batch[:16], nrow=4, normalize=True, value_range=(-1, 1))
plt.figure(figsize=(6, 6))
plt.imshow(grid.permute(1, 2, 0).cpu().numpy())
plt.axis('off')
plt.title('Random Monet paintings (after augmentation)')
plt.tight_layout()
plt.show()

**Observations:**
- Palette skews toward soft blues, greens, and ochres; augmentations keep tones within Monet’s range.
- Random crops preserve large structures (bridges, water reflections) which should help the discriminator distinguish composition.
- Moderate colour jitter adds diversity without breaking artistic coherence.

## DCGAN Architecture
The generator upsamples a latent vector to 256×256 while the discriminator mirrors the process. BatchNorm and learning-rate-friendly Adam optimisers keep training stable without extending runtime too much.

In [8]:
LATENT_DIM = 128
GEN_FEATURES = 64
DISC_FEATURES = 64

def gen_block(in_ch: int, out_ch: int) -> Iterable[nn.Module]:
    return (
        nn.ConvTranspose2d(in_ch, out_ch, 4, 2, 1, bias=False),
        nn.BatchNorm2d(out_ch),
        nn.ReLU(inplace=True),
    )

def disc_block(in_ch: int, out_ch: int, *, use_bn: bool = True) -> Iterable[nn.Module]:
    layers = [nn.Conv2d(in_ch, out_ch, 4, 2, 1, bias=False)]
    if use_bn:
        layers.append(nn.BatchNorm2d(out_ch))
    layers.append(nn.LeakyReLU(0.2, inplace=True))
    return layers

class Generator(nn.Module):
    def __init__(self, latent_dim: int, feature_maps: int, channels: int = 3):
        super().__init__()
        layers: List[nn.Module] = [
            nn.ConvTranspose2d(latent_dim, feature_maps * 16, 4, 1, 0, bias=False),
            nn.BatchNorm2d(feature_maps * 16),
            nn.ReLU(inplace=True)
        ]
        layers.extend(gen_block(feature_maps * 16, feature_maps * 8))
        layers.extend(gen_block(feature_maps * 8, feature_maps * 4))
        layers.extend(gen_block(feature_maps * 4, feature_maps * 2))
        layers.extend(gen_block(feature_maps * 2, feature_maps))
        layers.extend(gen_block(feature_maps, feature_maps // 2))
        layers.append(nn.ConvTranspose2d(feature_maps // 2, channels, 4, 2, 1, bias=False))
        layers.append(nn.Tanh())
        self.net = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)

class Discriminator(nn.Module):
    def __init__(self, feature_maps: int, channels: int = 3):
        super().__init__()
        layers: List[nn.Module] = [
            nn.Conv2d(channels, feature_maps // 2, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True)
        ]
        layers.extend(disc_block(feature_maps // 2, feature_maps))
        layers.extend(disc_block(feature_maps, feature_maps * 2))
        layers.extend(disc_block(feature_maps * 2, feature_maps * 4))
        layers.extend(disc_block(feature_maps * 4, feature_maps * 8))
        layers.extend(disc_block(feature_maps * 8, feature_maps * 16))
        layers.append(nn.Conv2d(feature_maps * 16, 1, 4, 1, 0, bias=False))
        self.net = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x).view(-1)

In [9]:
def init_weights(module: nn.Module) -> None:
    classname = module.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(module.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(module.weight.data, 1.0, 0.02)
        nn.init.constant_(module.bias.data, 0)


generator = Generator(LATENT_DIM, GEN_FEATURES).to(device)
discriminator = Discriminator(DISC_FEATURES).to(device)
generator.apply(init_weights)
discriminator.apply(init_weights)

criterion = nn.BCEWithLogitsLoss()
optimizer_g = torch.optim.Adam(generator.parameters(), lr=2e-4, betas=(0.5, 0.999))
optimizer_d = torch.optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.999))
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optimizer_g, gamma=0.99)
scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optimizer_d, gamma=0.99)

fixed_noise = torch.randn(64, LATENT_DIM, 1, 1, device=device)


def save_state_dict(target_path: Path, state: dict) -> None:
    temp_path = (
        target_path.with_suffix(target_path.suffix + '.tmp')
        if target_path.suffix
        else target_path.with_name(target_path.name + '.tmp')
    )
    if temp_path.exists():
        temp_path.unlink()
    try:
        torch.save(state, temp_path, _use_new_zipfile_serialization=False)
        temp_path.replace(target_path)
    except Exception:
        if temp_path.exists():
            temp_path.unlink(missing_ok=True)
        raise

## Training Loop
This loop tracks generator/discriminator losses, saves checkpoints, and emits progress images to `/kaggle/working/samples`.

**Key hyperparameters**
- Latent dim: 128 (normal noise)
- Batch size: 8 (Windows CPU-friendly)
- Learning rate: 2e-4 for both optimisers with betas (0.5, 0.999)
- Epochs: configurable per run (default 3 locally, increase on Kaggle GPU)
- Augmentations: random crop/flip, mild colour jitter

Adjust `NUM_EPOCHS`, `LOG_INTERVAL`, and learning rates to balance runtime versus MiFID quality.

In [10]:
NUM_EPOCHS = 3  # tweak as needed to fit kernel runtime budget
LOG_INTERVAL = 50
CHECKPOINT_DIR = WORKING_DIR / "checkpoints"
SAMPLES_DIR = WORKING_DIR / "samples"
CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
SAMPLES_DIR.mkdir(parents=True, exist_ok=True)

best_g_loss = math.inf
history = []

for epoch in range(1, NUM_EPOCHS + 1):
    generator.train()
    discriminator.train()
    for step, (real_imgs, _) in enumerate(data_loader, start=1):
        real_imgs = real_imgs.to(device)
        batch_size = real_imgs.size(0)

        # Train Discriminator
        noise = torch.randn(batch_size, LATENT_DIM, 1, 1, device=device)
        fake_imgs = generator(noise).detach()
        optimizer_d.zero_grad(set_to_none=True)
        real_targets = torch.ones(batch_size, device=device)
        fake_targets = torch.zeros(batch_size, device=device)
        real_logits = discriminator(real_imgs)
        fake_logits = discriminator(fake_imgs)
        loss_d = criterion(real_logits, real_targets) + criterion(fake_logits, fake_targets)
        loss_d.backward()
        optimizer_d.step()

        # Train Generator
        noise = torch.randn(batch_size, LATENT_DIM, 1, 1, device=device)
        optimizer_g.zero_grad(set_to_none=True)
        generated = generator(noise)
        gen_logits = discriminator(generated)
        loss_g = criterion(gen_logits, real_targets)
        loss_g.backward()
        optimizer_g.step()

        if step % LOG_INTERVAL == 0:
            print(f"Epoch {epoch:03d}/{NUM_EPOCHS} | Step {step:04d}/{len(data_loader)} | D: {loss_d.item():.3f} | G: {loss_g.item():.3f}")
            with torch.no_grad():
                preview = generator(fixed_noise).cpu()
                utils.save_image(preview, SAMPLES_DIR / f"epoch_{epoch:03d}_step_{step:04d}.png",
                                 nrow=8, normalize=True, value_range=(-1, 1))

    scheduler_g.step()
    scheduler_d.step()
    mean_g = loss_g.item()
    history.append({"epoch": epoch, "generator_loss": mean_g, "discriminator_loss": loss_d.item()})
    if mean_g < best_g_loss:
        best_g_loss = mean_g
        save_state_dict(CHECKPOINT_DIR / "generator_best.pt", generator.state_dict())
        save_state_dict(CHECKPOINT_DIR / "discriminator_best.pt", discriminator.state_dict())
        print(f"Saved new best generator checkpoint (epoch {epoch}).")

Saved new best generator checkpoint (epoch 1).
Saved new best generator checkpoint (epoch 3).
Saved new best generator checkpoint (epoch 3).


In [None]:
if history:
    history_df = pd.DataFrame(history)
    display(history_df.head())
    ax = history_df.plot(x='epoch', y=['generator_loss', 'discriminator_loss'], marker='o', figsize=(6, 4))
    ax.set_title('Training Losses by Epoch')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print('History is empty. Run the training cell first to populate metrics.')

**Results & limitations:**
- Generator loss trends downward faster than discriminator, signalling reasonable training balance during early epochs.
- CPU-only run is slow; expect far smoother curves on Kaggle’s GPU where more epochs are feasible.
- No quantitative MiFID is computed offline; evaluation still depends on Kaggle submission feedback.

### Optional: Inspect Training History
The dictionary `history` contains per-epoch losses. Convert it to a DataFrame or plot within Kaggle for deeper analysis.

## (Optional) Metric Prototyping
MiFID cannot be reproduced exactly offline, but public FID approximations are informative. You can enable the cell below once `torchmetrics` is available (already shipped in Kaggle notebooks). Skip during competition runs to save time.

In [11]:
# from torchmetrics.image.fid import FrechetInceptionDistance
# fid_metric = FrechetInceptionDistance(feature=2048).to(device)
# generator.eval()
# with torch.no_grad():
#     for real_imgs, _ in data_loader:
#         real_imgs = real_imgs.to(device)
#         fid_metric.update(real_imgs, real=True)
#         noise = torch.randn(real_imgs.size(0), LATENT_DIM, 1, 1, device=device)
#         fake_imgs = generator(noise)
#         fid_metric.update(fake_imgs, real=False)
# fid_score = fid_metric.compute().item()
# print(f"Approximate FID: {fid_score:.2f}")

## Generate Submission Images
Load the best generator checkpoint, sample 10,000 latent vectors in manageable batches, save PNGs, and zip them to `images.zip`. Kaggle will pick up the archive from `/kaggle/working`.

In [12]:
SUBMISSION_IMAGES = 8_000
GEN_BATCH = 64
OUTPUT_ZIP = WORKING_DIR / 'images.zip'
TEMP_IMAGE_DIR = WORKING_DIR / 'submission_images'

checkpoint_path = CHECKPOINT_DIR / 'generator_best.pt'
if checkpoint_path.exists():
    generator.load_state_dict(torch.load(checkpoint_path, map_location=device))
generator.eval()
TEMP_IMAGE_DIR.mkdir(parents=True, exist_ok=True)

with torch.no_grad():
    produced = 0
    progress = tqdm(total=SUBMISSION_IMAGES, desc='Generating Monet-style images')
    while produced < SUBMISSION_IMAGES:
        current = min(GEN_BATCH, SUBMISSION_IMAGES - produced)
        noise = torch.randn(current, LATENT_DIM, 1, 1, device=device)
        fake_batch = generator(noise).cpu()
        for idx in range(current):
            filename = TEMP_IMAGE_DIR / f'monet_{produced + idx:05d}.jpg'
            utils.save_image(fake_batch[idx], filename, normalize=True, value_range=(-1, 1))
        produced += current
        progress.update(current)
    progress.close()

with zipfile.ZipFile(OUTPUT_ZIP, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
    for img_path in sorted(TEMP_IMAGE_DIR.glob('*.jpg')):
        zf.write(img_path, arcname=img_path.name)
print(f'Created submission archive at {OUTPUT_ZIP}')

# Clean up intermediate files
for img_path in TEMP_IMAGE_DIR.glob('*.jpg'):
    img_path.unlink()
TEMP_IMAGE_DIR.rmdir()

Generating Monet-style images: 100%|██████████| 8000/8000 [02:46<00:00, 48.04it/s]



Created submission archive at d:\MS_in_AI\WK_5_Dog_Gen\WK5_Generative_Dog_Images\notebooks\images.zip


## Summary & Reproducibility Notes
- **What worked:** end-to-end DCGAN pipeline, checkpointing via `save_state_dict`, and submission image generator validated on local data.
- **What needs tuning:** extend training on GPU hardware, explore architectural upgrades (EMA, attention, StyleGAN2), and iterate based on Kaggle MiFID feedback.
- **Reproducibility pointers:** keep data under `data/kaggle_raw/`, run cells sequentially after `seed_everything`, and sync the notebook plus helper scripts to the public GitHub repo referenced in the introduction.