Create dataset from swiss card images

In [7]:
%pip install opencv-python numpy matplotlib

Collecting matplotlib
  Using cached matplotlib-3.10.1-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.3.1-cp312-cp312-win_amd64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Using cached fonttools-4.56.0-cp312-cp312-win_amd64.whl.metadata (103 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.8-cp312-cp312-win_amd64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Using cached pillow-11.1.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.2.1-py3-none-any.whl.metadata (5.0 kB)
Using cached matplotlib-3.10.1-cp312-cp312-win_amd64.whl (8.1 MB)
Using cached contourpy-1.3.1-cp312-cp312-win_amd64.whl (220 kB)
Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)
Using cached fonttools


[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [46]:
import numpy as np
import cv2 as cv
from pathlib import Path
import random
from itertools import cycle

from jassair.utils import get_dataset_path, Datasets

## Create background images

In [51]:
dest_path = get_dataset_path(Datasets.BACKGROUNDS)

In [52]:
target_x = 640

for i, image_path in enumerate(Path("data/raw_backgrounds").glob("*.jpg"), 1):
    image = cv.imread(image_path)
    
    h, w = image.shape[:2]
    h_2, w_2 = h // 2, w // 2
    x = min(h, w)
    x_2 = x // 2
    
    if x < target_x:
        raise ValueError(f"Image {image_path} is too small!, {image.shape[:2]}")
    
    h_start = h_2 - x_2
    h_stop = h_2 + x_2
    w_start = w_2 - x_2
    w_stop = w_2 + x_2        
    
    image = image[h_start:h_stop, w_start:w_stop]

    image = cv.resize(image, (target_x, target_x))
    cv.imwrite(f"{dest_path}/background_{i}.png", image) 

## Read images

In [53]:
dataset_path = get_dataset_path(Datasets.SWISS)

In [54]:
FOREGROUND_IMAGES: list[tuple[np.ndarray, str, int]] = []

In [55]:
for image_path in (dataset_path / "test" / "images").glob("*.jpg"):
    image = cv.imread(image_path)
    name = image_path.name.split("_")[0]
    label_path = dataset_path / "test" / "labels" / f"{image_path.stem}.txt"
    with label_path.open("r", encoding="utf-8") as f:
        label = int(f.readline().split()[0])
    
    FOREGROUND_IMAGES.append((image, name, label))

In [56]:
BACKGROUND_IMAGES : list[np.ndarray] = []

In [57]:
for image_path in get_dataset_path(Datasets.BACKGROUNDS).glob("*.png"):
    BACKGROUND_IMAGES.append(cv.imread(image_path))

## Create synthetic images

In [58]:
def rotate_image(fg, angle):
    """Rotates the image without cropping any part of it."""
    h, w = fg.shape[:2]
    center = (w // 2, h // 2)

    # Compute the bounding box of the rotated image
    rot_matrix = cv.getRotationMatrix2D(center, angle, 1.0)
    cos = np.abs(rot_matrix[0, 0])
    sin = np.abs(rot_matrix[0, 1])

    # Compute new bounding box dimensions
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    # Adjust the rotation matrix to consider the new image size
    rot_matrix[0, 2] += (new_w / 2) - center[0]
    rot_matrix[1, 2] += (new_h / 2) - center[1]

    # Perform rotation
    rotated_fg = cv.warpAffine(fg, rot_matrix, (new_w, new_h), borderMode=cv.BORDER_CONSTANT, borderValue=(0, 0, 0, 0))
    
    return rotated_fg

In [59]:
def overlay_image(bg: np.ndarray, fg: np.ndarray, scale: float, angle: float):
    """
    Overlays a foreground object on a background at a given position, scale, and rotation.
    """
    # Resize foreground image
    fg = rotate_image(fg, angle)
    h_fg, w_fg = fg.shape[:2]
    h_bg, w_bg = bg.shape[:2]
    ratio = min(h_bg / h_fg, w_bg / w_fg)
    new_h, new_w = int(h_fg * ratio * scale), int(w_fg * ratio * scale)
    fg = cv.resize(fg, (new_w, new_h))

    new_h, new_w = fg.shape[:2]

    x = random.randint(0, bg.shape[1] - new_w)
    y = random.randint(0, bg.shape[0] - new_h)


    # Extract region from background
    roi = bg[y:y+new_h, x:x+new_w]

    # Create mask where fg is non-black (i.e., has content)
    mask = np.any(fg > 10, axis=-1).astype(np.uint8) * 255  # Threshold to ignore black areas

    # Convert mask to 3 channels
    mask_inv = cv.bitwise_not(mask)
    mask_3ch = cv.merge((mask, mask, mask)) / 255.0
    mask_inv_3ch = cv.merge((mask_inv, mask_inv, mask_inv)) / 255.0

    # Blend images using mask
    fg_part = (fg * mask_3ch).astype(np.uint8)
    bg_part = (roi * mask_inv_3ch).astype(np.uint8)
    blended = cv.add(bg_part, fg_part)

    # Place blended region back onto background
    bg[y:y+new_h, x:x+new_w] = blended
    
    box_center_x = (x + new_w / 2) / bg.shape[1]
    box_center_y = (y + new_h / 2) / bg.shape[0]
    box_height = new_h / bg.shape[0]
    box_width = new_w / bg.shape[1]

    return bg, f"{box_center_x} {box_center_y} {box_width} {box_height}"

In [60]:
def create_synthetic_images(num_images: int, image_dest: Path, label_dest: Path):
    images = cycle(FOREGROUND_IMAGES)
    for i in range(num_images):
        bg = random.choice(BACKGROUND_IMAGES).copy()
        fg, name, label = next(images)
        fg = fg.copy()
        
        # Random transformations
        scale = random.uniform(0.5, 1.0)  # Scale between 30% and 100% of background size
        angle = random.gauss(0, 45.0)
    
        # Overlay object onto background
        synthetic_image, box = overlay_image(bg, fg, scale, angle)
    
        # Save image
        cv.imwrite(f"{image_dest}/{name}_{i}.png", synthetic_image)
        with (label_dest / f"{name}_{i}.txt").open("w", encoding="utf-8") as f:
            f.write(f"{label} {box}")

In [61]:
def create_synthetic_dataset(dataset_dest: Path, num_train: int, num_val: int, num_test: int):
    # Create training data
    image_target = dataset_dest / "train" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "train" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(num_train, image_target, label_target)
    
    # Create validation data
    image_target = dataset_dest / "valid" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "valid" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(num_val, image_target, label_target)
    
    # Create test data
    image_target = dataset_dest / "test" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "test" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(num_test, image_target, label_target)

In [62]:
random.seed(42)
create_synthetic_dataset(get_dataset_path(Datasets.SYNTHETIC_SINGLE), 10, 1, 1)

print("Synthetic dataset created successfully!")

Synthetic dataset created successfully!
