### Imports & Paths (no data is downloaded here)

In [1]:
from pathlib import Path
from PIL import Image
from tqdm import tqdm

DATA_ROOT = Path("..\\data")
HR_VALID = DATA_ROOT / "HR" / "valid"
HR_TRAIN = DATA_ROOT / "HR" / "train"

In [2]:
def lr_dir(scale: int, split: str) -> Path:
    return DATA_ROOT / f"LR_bicubic/X{scale}" / split

Sanity check: do we see the HR images?

In [3]:
def count_images(p: Path):
    return len([x for x in p.glob("*") if x.suffix.lower() in [".png", ".jpg", ".jpeg"]])

print("HR train dir:", HR_TRAIN.resolve())
print("HR valid dir:", HR_VALID.resolve())
print("HR train count:", count_images(HR_TRAIN))
print("HR valid count:", count_images(HR_VALID))

HR train dir: C:\Users\Caleb\Documents\GitHub Repos\PixelForge\data\HR\train
HR valid dir: C:\Users\Caleb\Documents\GitHub Repos\PixelForge\data\HR\valid
HR train count: 800
HR valid count: 100


### Helper to Generate a LR Image (Idempotent)

In [4]:
def ensure_dir(p: Path):
    if not p.exists():
        p.mkdir(parents=True, exist_ok=True)

def hr_to_lr(hr_path: Path, out_dir: Path, scale: int):
    out_path = out_dir / hr_path.name
    if out_path.exists():
        return # Skip existing files
    img = Image.open(hr_path).convert("RGB")
    w, h = img.size
    w2, h2 = w - (w % scale), h - (h % scale)
    if (w2, h2) != (w, h):
        img = img.crop((0, 0, w2, h2))
    lr = img.resize((w2 // scale, h2 // scale), resample=Image.BICUBIC)
    ensure_dir(out_dir)
    lr.save(out_path, quality=95)

Tiny smoke test (valid split, X4, first 5 images)

In [5]:
SCALE = 4
SPLIT = "valid"
OUT_DIR = lr_dir(SCALE, SPLIT)

valid_imgs = sorted([p for p in HR_VALID.glob("*") if p.suffix.lower() in [".png", ".jpg", ".jpeg"]])
subset = valid_imgs[:5] # Take the first 5 images

for p in tqdm(subset, desc=f"Generating X{SCALE} ({SPLIT})"):
    hr_to_lr(p, OUT_DIR, SCALE)

print("Wrote to:", OUT_DIR.resolve())

Generating X4 (valid): 100%|██████████| 5/5 [00:00<00:00,  8.97it/s]

Wrote to: C:\Users\Caleb\Documents\GitHub Repos\PixelForge\data\LR_bicubic\X4\valid





### Small Batch Function (Choose Split + Scale + Limit)

In [6]:
# Flexible wrapper to perform low-level image operations
def generate_lr_split(scale: int, split : str, limit: int | None = None, verbose: bool = True):
    hr_dir = HR_TRAIN if split == "train" else HR_VALID
    out_dir = lr_dir(scale, split)
    imgs = sorted([p for p in hr_dir.glob("*") if p.suffix.lower() in [".png", ".jpg", ".jpeg"]])
    if limit is not None:
        imgs = imgs[:limit]
    for p in tqdm(imgs, desc=f"X{scale} {split} ({len(imgs)} imgs)"):
        hr_to_lr(p, out_dir, scale)
    if verbose:
        print(f"Done: {out_dir.resolve()} ({count_images(out_dir)} images)")

Try slightly larger test to confirm functionality

In [9]:
generate_lr_split(scale=2, split="train", limit=20)

X2 train (20 imgs): 100%|██████████| 20/20 [00:00<00:00, 9995.96it/s]

Done: PixelForge\data\LR_bicubic\X2\train (20 images)



