In [4]:
import cv2
import numpy as np
import random
import time
from pathlib import Path
from jassair.utils.paths import get_data_path, get_dataset_path, Datasets
from shapely.geometry import Polygon
from shapely.affinity import rotate, translate
from matplotlib import pyplot as plt
from typing import List

In [5]:
class Card:
    def __init__(self, img, label):
        self.img = img
        self.label = label  # [label_id, x_centre, y_centre, width in %, height in %] yolo11
        self.rot_img = None
        self.rot = None
        self.centre_x = None
        self.centre_y = None
        self.poly = None
        self.mask = None
        
    def calc_area(self):
        return self.img.shape[0] * self.img.shape[1]
    
    def initial_polygon(self):
        y_max = self.img.shape[0]
        x_max = self.img.shape[1]
        self.poly = Polygon([(0, 0), (x_max, 0), (x_max, y_max), (0, y_max)])
        
    def calc_mask(self):
        mask = np.zeros(self.rot_img.shape[:2], dtype=np.uint8)
        coords = np.array(self.poly.exterior.coords).round().astype(np.int32)
        cv2.fillPoly(mask, [coords], 255)
        self.mask = mask
        
    def get_label_string(self):
        return ' '.join(str(x) for x in self.label)
        
        
    def plot_mask(self):
        if self.mask is not None:
            plt.imshow(self.mask, cmap='gray')
            plt.show()
            
    def plot(self):
        if self.rot_img is None:
            img_rgb = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
        else:
            img_rgb = cv2.cvtColor(self.rot_img, cv2.COLOR_BGR2RGB)
        plt.imshow(img_rgb)
        plt.axis("on")
        plt.show()

In [6]:
def get_data_creation_paths() -> tuple[Path, Path, Path]:
    """Returns the path for the image folder, image labels and background images."""
    dataset = Datasets.SWISS
    dataset_path = get_dataset_path(dataset)
    data_path = get_data_path()
    c_images_folder = dataset_path / "test" / "images"
    c_labels_folder = dataset_path / "test" / "labels"
    b_images_folder = data_path / "background_images"
    return c_images_folder, c_labels_folder, b_images_folder

In [7]:
def smart_cast(x: str):
    return int(x) if x.isdigit() else float(x)

In [8]:
def get_card_list(ci_folder: Path, cl_folder: Path) -> List[Card]:
    card_list = []
    
    for image_file in ci_folder.glob("*.jpg"):
        label_file = cl_folder / (image_file.stem + ".txt")
        
        img = cv2.imread(str(image_file))
        if img is None:
            print(f"Image not found: {image_file}")
            continue
            
        if label_file.exists():
            label = label_file.read_text(encoding="utf-8").strip()
            label_list = [smart_cast(x) for x in label.split()]
        else:
            print(f"Label not found: {image_file.name}")
            label_list = ""
            
        card = Card(img, label_list)
        card_list.append(card)
        
    return card_list

In [9]:
def get_background_images(bgi_folder: Path) -> List[np.ndarray]:
    background_img_list = []
    
    for image_file in bgi_folder.iterdir():
        background_img_list.append(cv2.imread(str(image_file)))
    return background_img_list

In [10]:
def choose_n_random_cards(n: int, card_list: List[Card]) -> List[Card]:
    # no duplicates
    return random.sample(card_list, n)

def choose_random_background(background_list: List[np.ndarray]) -> np.ndarray:
    return random.choice(background_list)

In [11]:
def calc_cards_area(random_card_list: List[Card]) -> int:
    area = 0.0
    for card in random_card_list:
        area += card.calc_area()
    return int(area)

In [12]:
def scale_cards(rnd_card_list: List[Card], bg_img: np.ndarray, percentage_background: float, card_size_range: float) -> bool:
    max_card_area = (1 - percentage_background) * (bg_img.shape[0] * bg_img.shape[1])
    card_area = calc_cards_area(rnd_card_list)
    scale_factor = max_card_area / card_area
    min_scale = scale_factor * card_size_range
    
    for card in rnd_card_list:
        img = card.img
        scale = random.uniform(min_scale, scale_factor)
        
        new_width = int(img.shape[1] * (scale ** (1/2)))
        new_height = int(img.shape[0] * (scale ** (1/2)))
        new_size = (new_width, new_height)
        
        if new_width < 20 or new_height < 20:
            print('Card is to small after scaling')
            return False

        if scale < 1:
            new_card_img = cv2.resize(img, new_size, interpolation=cv2.INTER_AREA)
        else:
            new_card_img = img
            
        card.img = new_card_img
        card.initial_polygon()
    return True

In [13]:
def flip_y(polygon: Polygon, image_height: int) -> Polygon:
    flipped_coords = [(x, image_height - y) for x, y in polygon.exterior.coords]
    return Polygon(flipped_coords)

def rotate_card(card: Card, angle: float) -> None:
    image = card.img
    card.rot = angle
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)

    M = cv2.getRotationMatrix2D(center, angle, 1.0)

    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    new_w = int((h * sin) + (w * cos))
    new_h = int((h * cos) + (w * sin))

    tx = (new_w / 2) - center[0]
    ty = (new_h / 2) - center[1]
    M[0, 2] += tx
    M[1, 2] += ty

    rotated = cv2.warpAffine(image, M, (new_w, new_h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0,0))
    card.rot_img = rotated

    r_poly = rotate(card.poly, angle, origin=center)
    t_poly = translate(r_poly, xoff=tx, yoff=ty)
    card.poly = flip_y(t_poly, int(card.rot_img.shape[0]))

In [14]:
def cards_random_rotation(card_list: List[Card], min_rotation: float, max_rotation: float) -> None:
    
    for card in card_list:
        rot = random.uniform(min_rotation, max_rotation)
        rotate_card(card, int(rot))

In [15]:
def move_polygon_centroid_to(polygon, target_x, target_y):
    centroid = polygon.centroid
    dx = target_x - centroid.x
    dy = target_y - centroid.y
    return translate(polygon, xoff=dx, yoff=dy)

In [16]:
def is_overlapping_poly(poly_list: List[Polygon], polygon: Polygon) -> bool:
    for poly in poly_list:
        if poly.intersects(polygon):
            return True
    return False

In [17]:
def define_random_positions_time_stop(card_list: List[Card], background: np.ndarray, time_break: float) -> bool:
    """Defines a random position for each card and sets this position for the label. If it takes longer to define
    random position without overlapping cards, the function will stop and returns False."""
    bg_x, bg_y = background.shape[1], background.shape[0]
    poly_list = []
    xs = []
    ys = []
    start_time = time.time()
    
    remove_index = []
    # define random position
    # if the card has no space (rotation), then the card will remove
    for i, card in enumerate(card_list):
        c_x, c_y = card.rot_img.shape[1] // 2, card.rot_img.shape[0] // 2
        c_poly = card.poly
        
        if c_x > bg_x - c_x or c_y > bg_y - c_y:
            print('Card has no space and will be removed.')
            remove_index.append(i)
            continue
        
        x = random.randint(c_x, bg_x - c_x)
        y = random.randint(c_y, bg_y - c_y)
        new_poly = move_polygon_centroid_to(c_poly, x, y)
        
        while is_overlapping_poly(poly_list, new_poly):
            x = random.randint(c_x, bg_x - c_x)
            y = random.randint(c_y, bg_y - c_y)
            new_poly = move_polygon_centroid_to(c_poly, x, y)
            if time.time() - start_time > time_break:
                return False
            
        poly_list.append(new_poly)
        xs.append(x)
        ys.append(y)
        
    # remove cards if necessary
    for i in sorted(remove_index, reverse=True):
        card_list.pop(i)
        
    # set attributes
    for i, card in enumerate(card_list):
        card.poly = poly_list[i]
        card.centre_x = xs[i]
        card.centre_y = ys[i]
        card.label[1] = xs[i]
        card.label[2] = ys[i]
    return True

In [18]:
def define_random_positions(card_list: List[Card], background: np.ndarray, time_break: float, max_tries: int) -> bool:
    state = False
    i = 0
    while not state:
        state = define_random_positions_time_stop(card_list, background, time_break)
        i += 1
        if i >= max_tries:
            return False
    return True

In [19]:
def set_label_width_height(card_list: List[Card], background: np.ndarray) -> None:
    bg_x, bg_y = background.shape[1], background.shape[0]
    
    for card in card_list:
        c_x, c_y = card.rot_img.shape[1], card.rot_img.shape[0]
        card.label[3] = c_x / bg_x
        card.label[4] = c_y / bg_y

In [20]:
def add_card_to_img(card: Card, img: np.ndarray) -> np.ndarray:
    card_img = card.rot_img
    mask = card.mask

    c_h, c_w = mask.shape[0], mask.shape[1]
    img_x_min = int(card.centre_x - c_w // 2)
    img_x_max = img_x_min + c_w
    img_y_min = int(card.centre_y - c_h // 2)
    img_y_max = img_y_min + c_h

    roi = img[img_y_min:img_y_max, img_x_min:img_x_max]
    
    h = min(mask.shape[0], card_img.shape[0], roi.shape[0])
    w = min(mask.shape[1], card_img.shape[1], roi.shape[1])
    mask = mask[:h, :w]
    card_img = card_img[:h, :w, :]
    roi = roi[:h, :w, :]

    for color in range(3):
        roi[:, :, color] = np.where(
            mask == 255,
            card_img[:, :, color],
            roi[:, :, color]
        )
    img[img_y_min:img_y_max, img_x_min:img_x_max] = roi

    return img

In [21]:
def add_cards_to_background(card_list: List[Card], background: np.ndarray) -> np.ndarray:
    img = background.copy()
    for card in card_list:
        img = add_card_to_img(card, img)
    return img

In [22]:
def create_new_picture(card_list: List[Card], background_img_list: List[np.ndarray], max_cards: int, min_cards: int,
                       min_percentage_background: float, max_percentage_background: float, card_size_range: float,
                       min_rotation: float, max_rotation: float, time_break: float, max_tries: int) -> (np.ndarray, List[List[str]]):
    
    # random cards and background
    n = random.randint(min_cards, max_cards)
    random_cards = choose_n_random_cards(n, card_list)
    background = choose_random_background(background_img_list)
    
    # scale cards
    percentage_background = random.uniform(min_percentage_background, max_percentage_background)
    status = scale_cards(random_cards, background, percentage_background, card_size_range)
    if not status:
        return None, None
    
    # rotate cards
    cards_random_rotation(random_cards, min_rotation, max_rotation)
    
    # create mask for each card (binary mask to decide which pixel to the background)
    for card in random_cards:
        card.calc_mask()
    
    # define for each card a position without overlapping and set the label x and y for each card
    status = define_random_positions(random_cards, background, time_break, max_tries)
    if not status:
        return None, None
    
    # set the label width and height for each card
    set_label_width_height(random_cards, background)
    
    # add every card to the background image
    random_img = add_cards_to_background(random_cards, background)
    
    img_label = []
    # create_label
    for card in random_cards:
        c_label = card.get_label_string()
        img_label.append([c_label])
    
    return random_img, img_label

In [23]:
def label_to_txt(label_list: List[List[str]], file_path: str) -> None:
    with open(file_path, "w") as f:
        for row in label_list:
            f.write(row[0].strip() + "\n")

In [24]:
def img_to_jpg(img: np.ndarray, file_path: str) -> None:
    cv2.imwrite(file_path, img)

In [25]:
def plot_img(image):
        img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(img_rgb)
        plt.axis("on")
        plt.show()

In [26]:
def generate_images(max_cards: int, min_cards: int, min_percentage_background: float, max_percentage_background: float,
                    card_size_range: float, min_rotation: float, max_rotation: float, time_break: float, number_train: int,
                    number_valid: int, number_test: int, max_tries: int, name_dataset: str) -> None:
    
    # create folders
    data_path = get_data_path()
    dataset_folder = data_path / name_dataset
    dataset_folder.mkdir(parents=True, exist_ok=True)
    
    train_folder = dataset_folder / "train"
    valid_folder = dataset_folder / "valid"
    test_folder = dataset_folder / "test"
    train_folder.mkdir(parents=True, exist_ok=True)
    valid_folder.mkdir(parents=True, exist_ok=True)
    test_folder.mkdir(parents=True, exist_ok=True)
    
    img_train_folder = train_folder / "images"
    label_train_folder = train_folder / "labels"
    img_train_folder.mkdir(parents=True, exist_ok=True)
    label_train_folder.mkdir(parents=True, exist_ok=True)
    
    img_valid_folder = valid_folder / "images"
    label_valid_folder = valid_folder / "labels"
    img_valid_folder.mkdir(parents=True, exist_ok=True)
    label_valid_folder.mkdir(parents=True, exist_ok=True)
    
    img_test_folder = test_folder / "images"
    label_test_folder = test_folder / "labels"
    img_test_folder.mkdir(parents=True, exist_ok=True)
    label_test_folder.mkdir(parents=True, exist_ok=True)
    
    # get all data to choose
    cif, clf, bf = get_data_creation_paths()
    all_cards = get_card_list(cif, clf)
    bg_img_list = get_background_images(bf)
    
    # create train images and labels
    for i in range(number_train):
        print(f"Picture train_{i} is generating")
        new_img, label = create_new_picture(all_cards, bg_img_list, max_cards, min_cards, min_percentage_background,
                                            max_percentage_background, card_size_range, min_rotation, max_rotation,
                                            time_break, max_tries)
        
        if new_img is None:
            continue
            
        file_name = f"train_{i}"
        img_path = img_train_folder / file_name
        txt_path = label_train_folder / file_name
        img_file_path = f"{img_path}.jpg"
        txt_file_path = f"{txt_path}.txt"
        
        img_to_jpg(new_img, img_file_path)
        label_to_txt(label, txt_file_path)
        
    # create valid images and labels
    for i in range(number_valid):
        print(f"Picture valid_{i} is generating")
        new_img, label = create_new_picture(all_cards, bg_img_list, max_cards, min_cards, min_percentage_background,
                                            max_percentage_background, card_size_range, min_rotation, max_rotation,
                                            time_break, max_tries)
        
        if new_img is None:
            continue
            
        file_name = f"valid_{i}"
        img_path = img_valid_folder / file_name
        txt_path = label_valid_folder / file_name
        img_file_path = f"{img_path}.jpg"
        txt_file_path = f"{txt_path}.txt"
        
        img_to_jpg(new_img, img_file_path)
        label_to_txt(label, txt_file_path)
        
    # create test images and labels
    for i in range(number_test):
        print(f"Picture test_{i} is generating")
        new_img, label = create_new_picture(all_cards, bg_img_list, max_cards, min_cards, min_percentage_background,
                                            max_percentage_background, card_size_range, min_rotation, max_rotation,
                                            time_break, max_tries)
        if new_img is None:
            continue
        file_name = f"test_{i}"
        img_path = img_test_folder / file_name
        txt_path = label_test_folder / file_name
        img_file_path = f"{img_path}.jpg"
        txt_file_path = f"{txt_path}.txt"
        
        img_to_jpg(new_img, img_file_path)
        label_to_txt(label, txt_file_path)

In [27]:
max_c = 6 # max amount of cards in picture
min_c = 3 # min amount of cards in picture
min_pbg = 0.6  # min value for background area in percent
max_pbg = 0.75  # max value for background area in percent
csr = 0.99  # max difference of size between the cards
min_rot = -90
max_rot = 90
tb = 0.5  # max time in sec to find random positions for all cards
n_train = 0 # amount of pictures and labels in train set
n_valid = 0 # amount of pictures and labels in valid set
n_test = 10 # amount of pictures and labels in test set
max_t = 5 # max_tries to find a position
name = 'data_v2.0'

In [28]:
generate_images(max_c, min_c, min_pbg, max_pbg, csr, min_rot, max_rot, tb, n_train, n_valid, n_test, max_t, name)

Picture test_0 is generating
Picture test_1 is generating
Picture test_2 is generating
Picture test_3 is generating
Picture test_4 is generating
Picture test_5 is generating
Picture test_6 is generating
Picture test_7 is generating
Picture test_8 is generating
Picture test_9 is generating
