In [1]:
from pathlib import Path
import math
from typing import Tuple

import numpy as np
from PIL import Image
from tqdm import tqdm

In [2]:
# Путь к исходному DeepRockSR-2D (как у тебя сейчас в train.py)
DATA_ROOT = Path("../../DeepRockSR-2D")  # подставь свой путь

# Путь, куда складывать патчи
OUT_ROOT = Path("../../DeepRockSR-2D_patches")

# Размер HR-патча
PATCH_SIZE = 100  # например, 100x100

# Масштаб суперразрешения (X2 или X4 и т.п.)
UPSCALE = 4  # или 2, в зависимости от твоего эксперимента

# Сплиты: исходное имя в DeepRock -> желаемый суффикс в папках патчей
SPLITS = [
    ("train", "train"),  # shuffled2D_train_HR -> HR_train/LR_train
    ("valid", "val"),    # shuffled2D_valid_HR -> HR_val/LR_val
]

In [3]:
def pad_to_multiple_np(img: np.ndarray, patch_size: int) -> Tuple[np.ndarray, Tuple[int,int,int,int]]:
    """
    img: np.ndarray (H, W) или (H, W, C)
    Возвращает:
      img_padded: np.ndarray
      pads: (pad_left, pad_right, pad_top, pad_bottom)
    """
    if img.ndim == 2:
        h, w = img.shape
        channels = None
    elif img.ndim == 3:
        h, w, channels = img.shape
    else:
        raise ValueError(f"unexpected img ndim={img.ndim}, shape={img.shape}")

    n_h = math.ceil(h / patch_size)
    n_w = math.ceil(w / patch_size)
    H_pad = n_h * patch_size
    W_pad = n_w * patch_size

    pad_h = H_pad - h
    pad_w = W_pad - w

    pad_top = pad_h // 2
    pad_bottom = pad_h - pad_top
    pad_left = pad_w // 2
    pad_right = pad_w - pad_left

    if img.ndim == 2:
        pads = ((pad_top, pad_bottom), (pad_left, pad_right))
    else:
        pads = ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0))

    img_padded = np.pad(img, pads, mode="reflect")
    return img_padded, (pad_left, pad_right, pad_top, pad_bottom)


In [4]:
def extract_patches_from_hr_image(
    hr_path: Path,
    patch_size: int,
    upscale: int,
) -> Tuple[list[Image.Image], list[Image.Image]]:
    """
    Читает один HR-файл, добавляет паддинг до кратности patch_size,
    режет на патчи и для каждого HR-патча строит LR-патч через bicubic downscale.

    Возвращает:
      hr_patches: список PIL.Image (patch_size x patch_size)
      lr_patches: список PIL.Image (patch_size//upscale x patch_size//upscale)
    """
    # Читаем как grayscale (если хочешь, можешь сделать "RGB")
    hr_img = Image.open(hr_path).convert("L")
    hr_np = np.array(hr_img).astype(np.float32)  # (H,W)

    hr_np_padded, pads = pad_to_multiple_np(hr_np, patch_size)
    H_pad, W_pad = hr_np_padded.shape

    n_h = H_pad // patch_size
    n_w = W_pad // patch_size

    hr_patches = []
    lr_patches = []

    for i in range(n_h):
        for j in range(n_w):
            y0 = i * patch_size
            x0 = j * patch_size
            y1 = y0 + patch_size
            x1 = x0 + patch_size

            patch_np = hr_np_padded[y0:y1, x0:x1]  # (patch_size, patch_size)
            patch_hr = Image.fromarray(patch_np.astype(np.float32), mode="F")
            # Если хочешь 8-bit для сохранения:
            patch_hr_8 = patch_hr.convert("L")

            # Строим LR: downscale
            lr_size = (patch_size // upscale, patch_size // upscale)
            patch_lr = patch_hr_8.resize(lr_size, resample=Image.BICUBIC)

            hr_patches.append(patch_hr_8)
            lr_patches.append(patch_lr)

    return hr_patches, lr_patches


In [5]:
def process_split(
    data_root: Path,
    out_root: Path,
    split_src: str,   # "train" или "valid" (как в DeepRock)
    split_out: str,   # "train" или "val" (для имён папок HR_*/LR_*)
    patch_size: int,
    upscale: int,
):
    # Исходная директория HR (как в _get_dirs_deeprock)
    hr_dir = data_root / "shuffled2D" / f"shuffled2D_{split_src}_HR"
    if not hr_dir.is_dir():
        raise FileNotFoundError(f"HR dir not found: {hr_dir}")

    # Куда писаем патчи
    out_hr_dir = out_root / f"HR_{split_out}"
    out_lr_dir = out_root / f"LR_{split_out}"
    out_hr_dir.mkdir(parents=True, exist_ok=True)
    out_lr_dir.mkdir(parents=True, exist_ok=True)

    hr_files = sorted([p for p in hr_dir.iterdir() if p.is_file() and p.suffix.lower() in (".png", ".tif", ".tiff", ".jpg", ".jpeg")])
    print(f"[{split_src}] HR files: {len(hr_files)}")

    patch_counter = 0

    for hr_path in tqdm(hr_files, desc=f"Processing {split_src}"):
        hr_patches, lr_patches = extract_patches_from_hr_image(
            hr_path,
            patch_size=patch_size,
            upscale=upscale,
        )

        stem = hr_path.stem  # основа имени файла
        for idx, (hr_patch, lr_patch) in enumerate(zip(hr_patches, lr_patches)):
            # можно кодировать индекс патча как rXX_cYY, но для простоты возьмём просто _pXXXX
            patch_name = f"{stem}_p{idx:04d}.png"

            hr_out_path = out_hr_dir / patch_name
            lr_out_path = out_lr_dir / patch_name

            hr_patch.save(hr_out_path)
            lr_patch.save(lr_out_path)

            patch_counter += 1

    print(f"[{split_src}] total patches saved: {patch_counter}")

In [6]:
OUT_ROOT.mkdir(parents=True, exist_ok=True)

for split_src, split_out in SPLITS:
    process_split(
        data_root=DATA_ROOT,
        out_root=OUT_ROOT,
        split_src=split_src,
        split_out=split_out,
        patch_size=PATCH_SIZE,
        upscale=UPSCALE,
    )

[train] HR files: 9600


  patch_hr = Image.fromarray(patch_np.astype(np.float32), mode="F")
Processing train: 100%|████████████████████████████████████████████████████████████| 9600/9600 [04:03<00:00, 39.43it/s]


[train] total patches saved: 240000
[valid] HR files: 1200


Processing valid: 100%|████████████████████████████████████████████████████████████| 1200/1200 [00:34<00:00, 34.87it/s]

[valid] total patches saved: 30000



