In [1]:
from pathlib import Path
import shutil
import cv2 as cv

## Filter SRF images to only leave Swiss cards

In [18]:
ROOT = Path(".").parent
source = ROOT / "data" / "jass-yolov8-v20-data"
target = ROOT / "data" / "jass-yolov8-v20-clean"

In [3]:
LABEL_MAP = [None] * 36

In [4]:
SRF_LABELS = ['schilte 7', 'schelle under', 'rose ass', 'schelle 7', 'schelle 10', 'schelle 6', 'eichel 7', 'eichel 6', 'eichel 8', 'schelle 9', 'rose 8', 'eichel 9', 'rose 6', 'rose 7', 'rose 9', 'rose konig', 'schilte 6', 'schilte 8', 'rose under', 'rose ober', 'schelle 8', 'eichel under', 'schelle konig', 'eichel konig', 'eichel ass', 'schilte ass', 'schilte 10', 'schilte konig', 'eichel ober', 'schilte ober', 'schilte under', 'schelle ober', 'schelle ass', 'eichel 10', 'rose 10', 'schilte 9']

In [5]:
OUR_LABELS = ['Eichel 10', 'Eichel 6', 'Eichel 7', 'Eichel 8', 'Eichel 9', 'Eichel Ass', 'Eichel Konig', 'Eichel Ober', 'Eichel Under', 'Rose 10', 'Rose 6', 'Rose 7', 'Rose 8', 'Rose 9', 'Rose Ass', 'Rose Konig', 'Rose Ober', 'Rose Under', 'Schelle 10', 'Schelle 6', 'Schelle 7', 'Schelle 8', 'Schelle 9', 'Schelle Ass', 'Schelle Konig', 'Schelle Ober', 'Schelle Under', 'Schilte 10', 'Schilte 6', 'Schilte 7', 'Schilte 8', 'Schilte 9', 'Schilte Ass', 'Schilte Konig', 'Schilte Ober', 'Schilte Under']

In [6]:
for index, label in enumerate(SRF_LABELS):
    for i, l in enumerate(OUR_LABELS):
        if l.lower() == label.lower():
            LABEL_MAP[index] = i
            break
            
print(LABEL_MAP)

[29, 26, 14, 20, 18, 19, 2, 1, 3, 22, 12, 4, 10, 11, 13, 15, 28, 30, 17, 16, 21, 8, 24, 6, 5, 32, 27, 33, 7, 34, 35, 25, 23, 0, 9, 31]


In [7]:
def clean_data_source(source_path: Path, target_path: Path):
    for label_file in (source_path / "labels").glob("*.txt"):
        image_file = source_path / "images" / f"{label_file.stem}.jpg"
        if not image_file.exists():
            print(label_file)
            continue
        with label_file.open("r", encoding="utf-8") as f:
            labels = f.readlines()
        labels = [label.split() for label in labels]
        if all(int(label[0]) >= (len(LABEL_MAP)) for label in labels):
            continue
        labels = [" ".join([str(LABEL_MAP[int(label[0])]), *label[1:]]) for label in labels if int(label[0]) < len(LABEL_MAP)]
        images_dir = target_path / "images"
        labels_dir = target_path / "labels"
        images_dir.mkdir(exist_ok=True, parents=True)
        labels_dir.mkdir(exist_ok=True)
        with (labels_dir / label_file.name).open("w", encoding="utf-8") as f:
            f.write("\n".join(labels))
        shutil.copy(image_file, images_dir)

In [8]:
clean_data_source(source / "validation", target / "valid")

In [9]:
clean_data_source(source / "train", target / "train")

## Square & resize images, adjust labels accordingly

In [19]:
source = target
target = ROOT / "data" / "jass-yolov8-v20-formatted"

In [20]:
target.mkdir(exist_ok=True, parents=True)

In [21]:
target_size = 640

In [22]:
def pad_to_square_and_resize(image):
    h, w = image.shape[:2]
    size = max(h, w)
    top = (size - h) // 2
    bottom = size - h - top
    left = (size - w) // 2
    right = size - w - left

    padded_image = cv.copyMakeBorder(
        image, top, bottom, left, right,
        cv.BORDER_CONSTANT, value=(0, 0, 0)
    )

    resized_image = cv.resize(padded_image, (target_size, target_size), interpolation=cv.INTER_AREA)
    return resized_image, w, h, left, top, size

In [23]:
def adjust_labels(label_path, orig_w, orig_h, pad_left, pad_top, padded_size):
    new_lines = []
    with label_path.open('r') as file:
        for line in file:
            cls, x, y, w, h = map(float, line.strip().split())

            x_abs = x * orig_w + pad_left
            y_abs = y * orig_h + pad_top
            w_abs = w * orig_w
            h_abs = h * orig_h

            x_new = x_abs / padded_size
            y_new = y_abs / padded_size
            w_new = w_abs / padded_size
            h_new = h_abs / padded_size

            new_lines.append(f"{int(cls)} {x_new:.6f} {y_new:.6f} {w_new:.6f} {h_new:.6f}")
    return new_lines

In [24]:
def format_data_source(source_path: Path, target_path: Path):
    images_dir = target_path / "images"
    labels_dir = target_path / "labels"
    images_dir.mkdir(exist_ok=True, parents=True)
    labels_dir.mkdir(exist_ok=True)
    for image_file in (source_path / "images").glob("*.jpg"):
        label_file = source_path / "labels" / f"{image_file.stem}.txt"
        
        image = cv.imread(image_file)
        image, w, h, l, t, p = pad_to_square_and_resize(image)
        cv.imwrite(images_dir / image_file.name, image)
        
        labels = adjust_labels(label_file, w, h, l, t, p)
        with open(labels_dir / label_file.name, 'w') as f:
            f.write('\n'.join(labels))

In [25]:
format_data_source(source / "valid", target / "valid")

In [26]:
format_data_source(source / "train", target / "train")