In [37]:
ROOT = r"X:\doc_layout_analysis\logo\test" # yolo format
classes = ['logo']

In [38]:
import cv2
import os
import matplotlib.pyplot as plt

def read_yolo_labels(label_path):
    with open(label_path, 'r') as file:
        lines = file.readlines()
    labels = []
    for line in lines:
        class_id, x_center, y_center, width, height = map(float, line.strip().split())
        labels.append((int(class_id), x_center, y_center, width, height))
    return labels

def visualize_image_with_labels(image_path, labels, classes):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    height, width, _ = image.shape
    print(labels)
    for label in labels:
        class_id, x_center, y_center, w, h = label
        x_center, y_center, w, h = x_center * width, y_center * height, w * width, h * height
        x1, y1, x2, y2 = int(x_center - w / 2), int(y_center - h / 2), int(x_center + w / 2), int(y_center + h / 2)
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(image, classes[class_id], (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2) 

    return image

In [39]:
def merge_bounding_boxes(labels):
    if not labels:
        return []

    x_min = min([x_center - w / 2 for _, x_center, _, w, _ in labels])
    y_min = min([y_center - h / 2 for _, _, y_center, _, h in labels])
    x_max = max([x_center + w / 2 for _, x_center, _, w, _ in labels])
    y_max = max([y_center + h / 2 for _, _, y_center, _, h in labels])

    merged_label = (0, (x_min + x_max) / 2, (y_min + y_max) / 2, x_max - x_min, y_max - y_min)
    # merged_label = f"0 {(x_min + x_max) / 2} {(y_min + y_max) / 2} {x_max - x_min} {y_max - y_min}"
    return merged_label


image_dir = os.path.join(ROOT, 'images')
label_dir = os.path.join(ROOT, 'labels')

image_files = [f for f in os.listdir(image_dir) if f.endswith(('jpg', 'png', 'jpeg'))]
label_files = [".".join(f.split('.')[:-1]) + '.txt' for f in image_files]

for i, (image_file, label_file) in enumerate(zip(image_files, label_files)):
    image_path = os.path.join(image_dir, image_file)
    label_path = os.path.join(label_dir, label_file)
    labels = read_yolo_labels(label_path)
    merged_label = merge_bounding_boxes(labels)
    new_label_dir = os.path.join(ROOT, 'merged_labels')
    os.makedirs(new_label_dir, exist_ok=True)
    new_label_path = os.path.join(new_label_dir, label_file)
    with open(new_label_path, 'w') as file:
        file.write(" ".join(map(str, merged_label)) + '\n')
    
    viz_img = visualize_image_with_labels(image_path, [merged_label], classes)
    # plt.imshow(viz_img)
    # plt.show()
    # if i == 5:
    #     break

[(0, 0.49886363636363634, 0.49782608695652175, 0.42499999999999993, 0.834782608695652)]
[(0, 0.5285171102661597, 0.47719594594594594, 0.9125475285171103, 0.8766891891891893)]
[(0, 0.48977272727272725, 0.491304347826087, 0.9113636363636364, 0.8130434782608695)]
[(0, 0.4954545454545455, 0.5, 0.5613636363636363, 0.9652173913043478)]
[(0, 0.5085227272727273, 0.5130434782608696, 0.6988636363636365, 0.10000000000000003)]
[(0, 0.5, 0.49782608695652175, 0.4795454545454545, 0.8695652173913044)]
[(0, 0.5056818181818182, 0.5, 0.6068181818181817, 0.9217391304347826)]
[(0, 0.5017045454545455, 0.4956521739130435, 0.8397727272727273, 0.33913043478260874)]
[(0, 0.5, 0.5, 0.5659090909090909, 0.7565217391304349)]
[(0, 0.5011363636363636, 0.49782608695652175, 0.2886363636363636, 0.791304347826087)]
[(0, 0.5272727272727272, 0.5, 0.4545454545454546, 0.9347826086956523)]
[(0, 0.5215909090909091, 0.47173913043478266, 0.7727272727272727, 0.7652173913043478)]
[(0, 0.4988636363636363, 0.4923913043478261, 0.3681