Create dataset from swiss card images

In [1]:
%pip install opencv-python numpy

Note: you may need to restart the kernel to use updated packages.


In [1]:
import math
import numpy as np
import cv2 as cv
from pathlib import Path
import random
from itertools import cycle

from jassair.utils import get_dataset_path, Datasets

## Create background images

In [15]:
dest_path = get_dataset_path(Datasets.BACKGROUNDS)

In [16]:
target_x = 640

for i, image_path in enumerate(Path("data/raw_backgrounds").glob("*.jpg"), 1):
    image = cv.imread(image_path)
    
    h, w = image.shape[:2]
    h_2, w_2 = h // 2, w // 2
    x = min(h, w)
    x_2 = x // 2
    
    if x < target_x:
        raise ValueError(f"Image {image_path} is too small!, {image.shape[:2]}")
    
    h_start = h_2 - x_2
    h_stop = h_2 + x_2
    w_start = w_2 - x_2
    w_stop = w_2 + x_2        
    
    image = image[h_start:h_stop, w_start:w_stop]

    image = cv.resize(image, (target_x, target_x))
    cv.imwrite(f"{dest_path}/background_{i}.png", image) 

## Read images

In [2]:
dataset_path = get_dataset_path(Datasets.SWISS)

In [3]:
FOREGROUND_IMAGES: list[tuple[np.ndarray, int]] = []

In [4]:
for image_path in (dataset_path / "test" / "images").glob("*.jpg"):
    image = cv.imread(image_path)
    label_path = dataset_path / "test" / "labels" / f"{image_path.stem}.txt"
    with label_path.open("r", encoding="utf-8") as f:
        label = int(f.readline().split()[0])
    
    FOREGROUND_IMAGES.append((image, label))
print(len(FOREGROUND_IMAGES))

36


In [5]:
BACKGROUND_IMAGES : list[np.ndarray] = []

In [6]:
for image_path in get_dataset_path(Datasets.BACKGROUNDS).glob("*.png"):
    BACKGROUND_IMAGES.append(cv.imread(image_path))
print(len(BACKGROUND_IMAGES))

600


## YOLO Dataset description

In [7]:
DESCRIPTION = """train: ./train/images
val: ./valid/images
test: ./test/images

nc: 36
names: ['Eichel 10', 'Eichel 6', 'Eichel 7', 'Eichel 8', 'Eichel 9', 'Eichel Ass', 'Eichel Konig', 'Eichel Ober', 'Eichel Under', 'Rose 10', 'Rose 6', 'Rose 7', 'Rose 8', 'Rose 9', 'Rose Ass', 'Rose Konig', 'Rose Ober', 'Rose Under', 'Schelle 10', 'Schelle 6', 'Schelle 7', 'Schelle 8', 'Schelle 9', 'Schelle Ass', 'Schelle Konig', 'Schelle Ober', 'Schelle Under', 'Schilte 10', 'Schilte 6', 'Schilte 7', 'Schilte 8', 'Schilte 9', 'Schilte Ass', 'Schilte Konig', 'Schilte Ober', 'Schilte Under']
"""

## Create synthetic images

In [8]:
def rotate_image(fg, angle):
    """Rotates the image without cropping any part of it."""
    h, w = fg.shape[:2]
    center = (w // 2, h // 2)

    # Compute the bounding box of the rotated image
    rot_matrix = cv.getRotationMatrix2D(center, angle, 1.0)
    cos = np.abs(rot_matrix[0, 0])
    sin = np.abs(rot_matrix[0, 1])

    # Compute new bounding box dimensions
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    # Adjust the rotation matrix to consider the new image size
    rot_matrix[0, 2] += (new_w / 2) - center[0]
    rot_matrix[1, 2] += (new_h / 2) - center[1]

    # Perform rotation
    rotated_fg = cv.warpAffine(fg, rot_matrix, (new_w, new_h), borderMode=cv.BORDER_CONSTANT, borderValue=(0, 0, 0, 0))
    
    return rotated_fg

In [9]:
def calculate_overlap_percentage(x1, y1, w1, h1, x2, y2, w2, h2):
    intersection_width = max(0, min(x1 + w1, x2 + w2) - max(x1, x2))
    intersection_height = max(0, min(y1 + h1, y2 + h2) - max(y1, y2))
    intersection_area = intersection_width * intersection_height

    union_area = w1 * h1 + w2 * h2 - intersection_area

    overlap_percentage = intersection_area / union_area
    return overlap_percentage

In [10]:
def check_overlap(x, y, image, placed_images, overlap_threshold: float):
    for (p_x, p_y), img in placed_images:
        r1, c1 = image.shape[:2]
        r2, c2 = img.shape[:2]
        if calculate_overlap_percentage(x, y, c1, r1, p_x, p_y, c2, r2) > overlap_threshold:
            return True
    return False

In [11]:
def get_card_positions(bg_height, bg_width, cards, overlap_threshold: float):
    placed_images = []

    for card in cards:
        for i in range(10):
            x, y = np.random.randint(0, bg_width - card.shape[1]), np.random.randint(0, bg_height - card.shape[0])
            if not check_overlap(x, y, card, placed_images, overlap_threshold):
                break
        else:
            return None
        placed_images.append(((x, y), card))
    return [coords for coords, _ in placed_images]

In [12]:
def overlay_image(bg: np.ndarray, cards: list[np.ndarray], scale: float, angles: list[float], overlap_threshold: float):
    """
    Overlays a foreground object on a background at a given position, scale, and rotation.
    """
    # Resize foreground image
    cards = [rotate_image(card, angle) for card, angle in zip(cards, angles)]
    h_bg, w_bg = bg.shape[:2]
    
    max_card = max(cards, key=lambda x: sum(x.shape[:2]))
    h_max, w_max = max_card.shape[:2]
    ratio = min(h_bg / h_max, w_bg / w_max)
    shapes = [(int(card.shape[0] * ratio * scale), int(card.shape[1] * ratio * scale)) for card in cards]
    cards = [cv.resize(card, (shape[1], shape[0])) for card, shape in zip(cards, shapes)]
    while True:
        if positions := get_card_positions(h_bg, w_bg, cards, overlap_threshold):
            break
    
    boxes = []
    for card, (x, y), (new_h, new_w) in zip(cards, positions, shapes):
        roi = bg[y:y+new_h, x:x+new_w]
    
        # Create mask where fg is non-black (i.e., has content)
        mask = np.any(card > 0, axis=-1).astype(np.uint8) * 255  # Threshold to ignore black areas
    
        # Convert mask to 3 channels
        mask_inv = cv.bitwise_not(mask)
        mask_3ch = cv.merge((mask, mask, mask)) / 255.0
        mask_inv_3ch = cv.merge((mask_inv, mask_inv, mask_inv)) / 255.0
    
        # Blend images using mask
        fg_part = (card * mask_3ch).astype(np.uint8)
        bg_part = (roi * mask_inv_3ch).astype(np.uint8)
        blended = cv.add(bg_part, fg_part)
    
        # Place blended region back onto background
        bg[y:y+new_h, x:x+new_w] = blended
        boxes.append(f"{(x + new_w / 2) / w_bg} {(y + new_h / 2) / h_bg} {new_w / w_bg} {new_h / h_bg}")

    return bg, boxes

## Augment images

In [34]:
def add_gaussian_blur(image, max_kernel=30):
    """Applies a random Gaussian blur."""
    kernel_size = random.choice(range(1, max_kernel, 2))  # Odd kernel size
    return cv.GaussianBlur(image, (kernel_size, kernel_size), 0)

In [35]:
def add_noise(image, noise_level=10):
    """Adds random Gaussian noise."""
    noise = np.random.normal(0, noise_level, image.shape).astype(np.float32)
    
    # Blend noise with the image instead of direct addition
    noisy_image = image.astype(np.float32) + noise  
    noisy_image = np.clip(noisy_image, 0, 255)  # Keep pixel values valid

    return noisy_image.astype(np.uint8)

In [36]:
def shift_white_balance(image, shift_value=50):
    """Shifts white balance by adjusting blue and red channels."""
    b, g, r = cv.split(image)
    b = cv.add(b, random.randint(-shift_value, shift_value))
    r = cv.add(r, random.randint(-shift_value, shift_value))
    g = cv.add(g, random.randint(-shift_value, shift_value))
    return cv.merge((b, g, r))

In [37]:
def add_light_spots(image, max_radius=280, min_radius=80, intensity=255):
    """Adds bright light spots with a soft gradient effect to simulate reflections."""
    h, w = image.shape[:2]
    overlay = image.copy()

    x, y = random.randint(0, w), random.randint(0, h)
    radius = random.randint(min_radius, max_radius)
    light_spot = np.zeros((h, w, 3), dtype=np.uint8)
    cv.circle(light_spot, (x, y), radius, (intensity, intensity, intensity), -1)

    blur_amount = int(radius * 0.6)  # Blur proportional to radius
    if blur_amount % 2 == 0:  
        blur_amount += 1  # Kernel size must be odd for GaussianBlur
    light_spot = cv.GaussianBlur(light_spot, (blur_amount, blur_amount), 0)
    overlay = cv.addWeighted(overlay, 1, light_spot, 0.5, 0)

    return overlay

In [38]:
AUGMENTATIONS = [add_gaussian_blur, add_noise, shift_white_balance, add_light_spots]

In [39]:
def augment_image(image):
    """Applies random augmentations."""
    random.shuffle(AUGMENTATIONS)
    threshold = 0
    for augmentation in AUGMENTATIONS:
        threshold += random.random()
        if threshold > 0.6:
            image = augmentation(image)
            threshold = 0
    return image

In [40]:
def create_synthetic_images(num_images: int, image_dest: Path, label_dest: Path, set_name: str, cards_per_image: tuple[int, int], overlap_threshold: float):
    images = iter(())
    for i in range(num_images):
        if not i % 128:
            random.shuffle(FOREGROUND_IMAGES)
            images = cycle(FOREGROUND_IMAGES)
        
        bg = random.choice(BACKGROUND_IMAGES).copy()
        
        num_cards = random.randint(*cards_per_image)
        cards = [next(images) for _ in range(num_cards)]
        angles = [random.gauss(0, 45.0) for _ in range(num_cards)]
        labels = [card[1] for card in cards]
        cards = [card[0].copy() for card in cards]
        scale = random.uniform(0.6 / max(1.0, math.log2(num_cards * (2 - overlap_threshold))), 1.0 / max(1.0, math.log2(num_cards * (2 - overlap_threshold))))
    
        # Overlay object onto background
        synthetic_image, boxes = overlay_image(bg, cards, scale, angles, overlap_threshold)
        synthetic_image = augment_image(synthetic_image)
    
        # Save image
        cv.imwrite(f"{image_dest}/{set_name}_{i}.png", synthetic_image)
        with (label_dest / f"{set_name}_{i}.txt").open("w+", encoding="utf-8") as f:
            for label, box in zip(labels, boxes):
                f.write(f"{label} {box}\n")

In [41]:
def create_synthetic_dataset(dataset_dest: Path, num_train: int, num_val: int, num_test: int, cards_per_image: tuple[int, int], max_overlap: float):
    # Create training data
    image_target = dataset_dest / "train" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "train" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(num_train, image_target, label_target, "train", cards_per_image, max_overlap)
    
    # Create validation data
    image_target = dataset_dest / "valid" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "valid" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(num_val, image_target, label_target, "valid", cards_per_image, max_overlap)
    
    # Create test data
    image_target = dataset_dest / "test" / "images"
    image_target.mkdir(parents=True, exist_ok=True)
    label_target = dataset_dest / "test" / "labels"
    label_target.mkdir(parents=True, exist_ok=True)
    create_synthetic_images(num_test, image_target, label_target, "test", cards_per_image, max_overlap)
    
    # Write data.yaml
    with (dataset_dest / "data.yaml").open("w+", encoding="utf-8") as f:
        f.write(DESCRIPTION)
        

In [42]:
num_train = 100
num_valid = 0
num_test = 0
overlap = 0.0
cards_per_image = (1, 1)

In [43]:
random.seed(42)
create_synthetic_dataset(get_dataset_path(Datasets.SYNTHETIC_SINGLE), num_train, num_valid, num_test, cards_per_image, overlap)

print("Synthetic dataset created successfully!")

Synthetic dataset created successfully!
