## Import libraries

In [1]:
import xml.etree.ElementTree as ET
import glob
import cv2
import os

## Define paths

In [2]:
annotation_file_path = '../archive/annotations.xml'
images_folder_path = '../archive/'
save_img_folder_path_train = "data/images/train/"
save_label_folder_path_train = "data/labels/train/"
save_img_folder_path_val = "data/images/val/"
save_label_folder_path_val = "data/labels/val/"

## Define function to get information from xml file

In [3]:
def parse_annotation(xml_path):
    images_path = []
    all_bboxes = []
    tree = ET.parse(xml_path)
    root = tree.getroot()
    for image in root.findall("image"):
        images_path.append(image.attrib['name'])
        width_img = float(image.attrib["width"])
        height_img = float(image.attrib["height"])
        boxes = []
        for box in image.findall("box"):
            xtl = float(box.attrib['xtl'])
            ytl = float(box.attrib['ytl'])
            xbr = float(box.attrib['xbr'])
            ybr = float(box.attrib['ybr'])
            x_c = (xtl + xbr)/2
            y_c = (ytl + ybr)/2
            w = xbr - xtl
            h = ybr - ytl
            boxes.append([x_c/width_img, y_c/height_img, w/width_img, h/height_img])
        all_bboxes.append(boxes)
    return images_path, all_bboxes


## Get data and split to train and evaluate

In [4]:
images_path, all_bboxes = parse_annotation(annotation_file_path)

In [6]:
n_o_t = int(0.8*len(images_path))
images_path_train = images_path[:n_o_t]
all_bboxes_train = all_bboxes[:n_o_t]
images_path_val = images_path[n_o_t:]
all_bboxes_val = all_bboxes[n_o_t:]

## Save data

In [25]:
def save_data(img_folder, images_path, save_img_folder_path, save_label_folder_path, all_bboxes):
    for i in range(len(images_path)):
        path = images_path[i]
        file_name = path.split("/")[1].split(".")[0]
        img = cv2.imread(img_folder+path)
        cv2.imwrite(save_img_folder_path + file_name + ".png", img)
        bboxes = all_bboxes[i]
        full_path = os.path.join(save_label_folder_path, file_name + ".txt")
        with open(full_path, 'w', encoding="utf-8") as f:
            for box in bboxes:
                text = f"{0} {box[0]} {box[1]} {box[2]} {box[3]}\n"
                f.write(text)

In [26]:
save_data(images_folder_path, images_path_train, save_img_folder_path_train, save_label_folder_path_train, all_bboxes_train)

In [27]:
save_data(images_folder_path, images_path_val, save_img_folder_path_val, save_label_folder_path_val, all_bboxes_val)