In [1]:
%pip install -q numpy opencv-python

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Create dataset from swiss card images

In [23]:
import math
import numpy as np
import cv2 as cv
from pathlib import Path
import random
from itertools import cycle, accumulate

from jassair.synthetic_data.image_augmentation import Augmentation
from jassair.utils import get_dataset_path, Datasets, get_class_for_name, CardClass
from jassair.synthetic_data import rotate_image, add_drop_shadow, get_yolo_boxes, augment_image, Vignette, LightSpots, RotateMult90, LightingGradient, ColorJitter, NoiseNormal, GaussianBlur, PerspectiveWarp, WhiteBalanceShift, SynthConfig

## Create background images

In [24]:
dest_path = get_dataset_path(Datasets.BACKGROUNDS)

In [25]:
transform_backgrounds = False

target_x = 640

source_path = Path("data/raw_backgrounds")

if transform_backgrounds:
    for i, image_path in enumerate(source_path.glob("*"), 1):
        image = cv.imread(image_path)

        h, w = image.shape[:2]
        h_2, w_2 = h // 2, w // 2
        x = min(h, w)
        x_2 = x // 2

        if x < target_x:
            raise ValueError(f"Image {image_path} is too small!, {image.shape[:2]}")

        h_start = h_2 - x_2
        h_stop = h_2 + x_2
        w_start = w_2 - x_2
        w_stop = w_2 + x_2

        image = image[h_start:h_stop, w_start:w_stop]

        image = cv.resize(image, (target_x, target_x))
        cv.imwrite(f"{dest_path}/background_{i}.png", image)

In [26]:
def create_single_color_background(color: str, height: int = 640, width: int = 640):
    r = int(color[:2], base=16)
    g = int(color[2:4], base=16)
    b = int(color[4:], base=16)

    img = np.zeros((height, width, 3), np.uint8)
    img[:] = (b, g, r)
    return img

## Read images

In [27]:
dataset_path = get_dataset_path(Datasets.CARD_TEMPLATE)

In [28]:
FOREGROUND_IMAGES: list[tuple[np.ndarray, int]] = []

In [29]:
for image_path in dataset_path.glob("*"):
    image = cv.imread(image_path, cv.IMREAD_UNCHANGED)
    label = int(get_class_for_name(image_path.stem))

    FOREGROUND_IMAGES.append((image, label))
print(len(FOREGROUND_IMAGES))

36


In [30]:
BACKGROUND_IMAGES: list[np.ndarray] = []

In [31]:
for image_path in get_dataset_path(Datasets.BACKGROUNDS).glob("*"):
    BACKGROUND_IMAGES.append(cv.imread(image_path))
print(len(BACKGROUND_IMAGES))

99


## YOLO Dataset description

In [32]:
def create_dataset_description(classes: int, labels: list[str]):
    return (f"train: ./train/images\n"
            f"val: ./valid/images\n"
            f"test: ./test/images\n\n"
            f"nc: {classes}\n"
            f"names: {labels}\n")

## Create synthetic images

In [33]:
class PlacementError(Exception): ...

In [34]:
def iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1b, y1b, x2b, y2b = box2
    xi1, yi1 = max(x1, x1b), max(y1, y1b)
    xi2, yi2 = min(x2, x2b), min(y2, y2b)
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2b - x1b) * (y2b - y1b)
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area else 0

In [52]:
def generate_synthetic_card_scene(
        bg: np.ndarray,
        card_images: list[np.ndarray],
        angles: list[float],
        scale: float = 0.7,
        iou_threshold: float = 0.1,
        augment_bg: list[Augmentation] = (),
        augment_final: list[Augmentation] = (),
) -> tuple[np.ndarray, list[str]]:
    """
    Generates a realistic card scene on a background.
    Returns final image and YOLO-format annotations.
    """
    shadow_size = 30
    num_cards = len(card_images)
    
    if augment_bg:
        bg = augment_image(bg, augment_bg)
    h_bg, w_bg = bg.shape[:2]

    rotated = [rotate_image(img, angle) for img, angle in zip(card_images, angles)]
    max_card = max(rotated, key=lambda x: sum(x.shape[:2]))
    h_max, w_max = max_card.shape[:2]
    ratio = min(h_bg / h_max, w_bg / w_max)

    processed_cards = []
    for img in rotated:
        r = ratio * scale
        shadowed = add_drop_shadow(img, (shadow_size, shadow_size), 15, 0.8)
        h, w = shadowed.shape[:2]
        h = int(h * r)
        w = int(w * r)
        processed_cards.append(cv.resize(shadowed, (w, h), interpolation=cv.INTER_AREA))

    placed = []
    card_regions = np.zeros((h_bg, w_bg), np.uint8)
    for i, (card, angle) in enumerate(zip(processed_cards, angles), 1):
        h_card, w_card = card.shape[:2]
        MAX_ATTEMPTS = 15
        for _ in range(MAX_ATTEMPTS):
            x = np.random.randint(0, w_bg - w_card)
            y = np.random.randint(0, h_bg - h_card)

            # Check for overlap
            bbox = (x, y, x + w_card, y + h_card)
            if all(iou(bbox, existing) < iou_threshold for existing in placed):
                placed.append(bbox)
                break
        else:
            raise PlacementError("Unable to place card without overlap")

        roi = bg[y:y + h_card, x:x + w_card]
        alpha = card[:, :, 3] / 255
        alpha_3ch = np.dstack([alpha] * 3)
        card_rgb = card[:, :, :3]
        blended = (roi * (1 - alpha_3ch) + card_rgb * alpha_3ch)
        bg[y:y + h_card, x:x + w_card] = blended

        mask = alpha > 0.9
        card_regions[y:y + h_card, x:x + w_card][mask] = i
    
    boxes = get_yolo_boxes(card_regions, num_cards)

    if augment_final:
        bg = augment_image(bg, augment_final)
    return bg, boxes

In [53]:
def get_handheld_positions(angles: list[float], cards: list[tuple[np.ndarray, tuple[int, int]]], bg: np.ndarray) -> list[tuple[int, int]]:
    hb, wb = bg.shape[:2]
    base_x = wb // 2
    base_y = 3 * hb // 4
    positions = []
    for angle, card in zip(angles, cards):
        hc, wc = card.shape[:2]
        c_x = math.cos(math.pi * angle / 180)
        c_y = math.sin(math.pi * angle / 180)
        o_x = (wc // 2) * c_x
        o_y = (hc // 2) * c_y
        o_x -= wc // 2
        o_y += hc // 2
        pos = (int(base_x + o_x), int(base_y - o_y))
        positions.append(pos)
    
    return positions
    

In [68]:
def generate_handheld_card_scene(
        bg: np.ndarray,
        card_images: list[np.ndarray],
        angles: list[float],
        scale: float = 0.7,
        augment_bg: list[Augmentation] = (),
        augment_final: list[Augmentation] = (),
) -> tuple[np.ndarray, list[str]]:
    """
    Generates a realistic card scene on a background.
    Returns final image and YOLO-format annotations.
    """
    shadow_size = 30
    num_cards = len(card_images)
    direction = random.choice([-1, 1])
    start_angle = random.uniform(5 * num_cards, 13 * num_cards) * (-direction)
    angles = [angle + start_angle + 90 for angle in list(accumulate([angle * direction for angle in angles]))]
    
    if augment_bg:
        bg = augment_image(bg, augment_bg)
    h_bg, w_bg = bg.shape[:2]

    rotated = [rotate_image(img, angle) for img, angle in zip(card_images, angles)]
    max_card = max(rotated, key=lambda x: sum(x.shape[:2]))
    h_max, w_max = max_card.shape[:2]
    ratio = min(h_bg / h_max, w_bg / w_max)
    
    processed_cards = []
    for img in rotated:
        r = ratio * scale
        shadowed = add_drop_shadow(img, (shadow_size, shadow_size), 15, 0.8)
        h, w = shadowed.shape[:2]
        h = int(h * r)
        w = int(w * r)
        processed_cards.append(cv.resize(shadowed, (w, h), interpolation=cv.INTER_AREA))
        
    positions = get_handheld_positions(angles, processed_cards, bg)
    card_regions = np.zeros((h_bg, w_bg), np.uint8)
    
    for i, (card, (x, y), angle) in enumerate(zip(processed_cards, positions, angles), 1):
        h_card, w_card = card.shape[:2]

        try:
            roi = bg[y:y + h_card, x:x + w_card]
            alpha = card[:, :, 3] / 255
            alpha_3ch = np.dstack([alpha] * 3)
            card_rgb = card[:, :, :3]
            blended = (roi * (1 - alpha_3ch) + card_rgb * alpha_3ch)
            bg[y:y + h_card, x:x + w_card] = blended
        except ValueError:
            raise PlacementError()
        
        mask = alpha > 0.9
        card_regions[y:y + h_card, x:x + w_card][mask] = i

    boxes = get_yolo_boxes(card_regions, num_cards)

    if augment_final:
        bg = augment_image(bg, augment_final)
    return bg, boxes

In [69]:
def assort_random_layout(background: np.ndarray, cards: list[np.ndarray], config: SynthConfig):
    num_cards = len(cards)
    angles = [random.gauss(0, 45.0) for _ in range(num_cards)]
    scale = random.uniform(0.6 / max(1.0, math.log2(num_cards * (2 - config.max_overlap))),
                           0.8 / max(1.0, math.log2(num_cards * (2 - config.max_overlap))))
    scale *= (1 + 0.5 * math.log10(num_cards))

    return generate_synthetic_card_scene(
        background,
        cards,
        angles,
        scale,
        config.max_overlap,
        config.augment_background,
        config.augment_final
    )
    

In [80]:
angle_range = (14, 22)  # in degrees

def assort_handheld_layout(background: np.ndarray, cards: list[np.ndarray], config: SynthConfig):
    num_cards = len(cards)
    angles = [0] + [random.uniform(*angle_range) for _ in range(num_cards - 1)]
    
    scale = random.uniform(0.5 / max(1.0, math.log2(num_cards * 4) / 3),
                           0.7 / max(1.0, math.log2(num_cards * 4) / 3))
    
    return generate_handheld_card_scene(
        background,
        cards,
        angles,
        scale,
        config.augment_background,
        config.augment_final
    )
    

In [81]:
def create_synthetic_images(
        images: list[np.ndarray],
        backgrounds: list[np.ndarray],
        num_images: int,
        image_dest: Path,
        label_dest: Path,
        set_name: str,
        config: SynthConfig
):
    imgs = iter(())
    i = 0
    while i < num_images:
        if not i % 128:
            random.shuffle(images)
            imgs = cycle(images)

        bg = random.choice(backgrounds).copy()
        num_cards = random.randint(config.min_cards_per_image, config.max_cards_per_image)
        cards = [next(imgs) for _ in range(num_cards)]
        labels = [card[1] for card in cards]
        cards = [card[0].copy() for card in cards]
        
        method = random.choice(config.generation_methods)

        try:
            synthetic_image, boxes = method(bg, cards, config)
        except PlacementError:
            continue

        # Save image
        cv.imwrite(f"{image_dest}/{set_name}_{i}.png", synthetic_image)
        with (label_dest / f"{set_name}_{i}.txt").open("w+", encoding="utf-8") as f:
            for label, box in zip(labels, boxes):
                f.write(f"{label} {box}\n")
                
        i += 1

In [82]:
def create_synthetic_dataset(
        dataset_dest: Path,
        config: SynthConfig
):
    if config.classes:
        images = [(i, l) for i, l in FOREGROUND_IMAGES if l in config.classes]
        labels = [CardClass(l).name.replace("_", " ").lower() for _, l in sorted(images, key=lambda x: x[1])]
        images = [(i, l) for l, (i, _) in enumerate(images)]
    else:
        images = FOREGROUND_IMAGES
        labels = [CardClass(l).name.replace("_", " ").lower() for _, l in sorted(images, key=lambda x: x[1])]

    if config.backgrounds:
        backgrounds = [create_single_color_background(color) for color in config.backgrounds]
    else:
        backgrounds = BACKGROUND_IMAGES

    # Create training data
    image_target = dataset_dest / "train" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "train" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(images, backgrounds, config.num_train, image_target, label_target, "train", config)

    # Create validation data
    image_target = dataset_dest / "valid" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "valid" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(images, backgrounds, config.num_valid, image_target, label_target, "valid", config)

    # Create test data
    image_target = dataset_dest / "test" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "test" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(images, backgrounds, config.num_test, image_target, label_target, "test", config)

    # Write data.yaml
    with (dataset_dest / "data.yaml").open("w+", encoding="utf-8") as f:
        f.write(create_dataset_description(len(labels), labels))


In [83]:
# Target Dataset
target = Datasets.S_1TO9_36C_OVLP

# Base configurations
config = SynthConfig()
config.num_train = 3600
config.num_valid = 360
config.num_test = 360
config.max_overlap = 0.5
config.min_cards_per_image = 2
config.max_cards_per_image = 9

# Augmentation configuration
config.augment_background = [
    RotateMult90(),
    # ColorJitter(hue=0.1)
]

config.augment_final = [
    # ColorJitter(0.3, 0.3, 0.2, 0.1),
    # LightingGradient(0.7),
    # LightSpots(),
    # Vignette(0.3)
]

# Classes and background configurations
config.classes = []

config.backgrounds = []

# Generation methods (choices: `assort_random_layout`, `assort_handheld_layout`)
config.generation_methods = [assort_handheld_layout, assort_random_layout]

In [84]:
random.seed(42)
create_synthetic_dataset(get_dataset_path(target), config)

print("Synthetic dataset created successfully!")

263 307 319 383


ValueError: operands could not be broadcast together with shapes (319,333,3) (319,383,3) 