# xml to txt

In [21]:
# 경로를 지정해주자.
import os

path = "./dataset"
annot_path = os.path.join(path,"labels", 'val', 'Annotations')
img_path = os.path.join(path,"images")
label_path = os.path.join(path,"labels")

In [22]:
annot_path

'./dataset\\labels\\val\\Annotations'

In [23]:
import xml.etree.ElementTree as ET
import glob
import os
import json

# xml bbox 형식을 yolo bbox 형태로 변환하는 함수

def xml_to_yolo_bbox(bbox, w, h):
    # xmin, ymin, xmax, ymax
    x_center = ((bbox[2] + bbox[0]) / 2) / w
    y_center = ((bbox[3] + bbox[1]) / 2) / h
    width = (bbox[2] - bbox[0]) / w
    height = (bbox[3] - bbox[1]) / h
    return [x_center, y_center, width, height]
    
classes = ['RBC', 'WBC', 'Platelets']

In [24]:
glob.glob(os.path.join(annot_path, '*.xml'))

['./dataset\\labels\\val\\Annotations\\BloodImage_00002.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00004.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00007.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00011.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00017.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00019.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00024.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00028.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00031.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00039.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00045.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00046.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00048.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00049.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00050.xml',
 './dataset\\labels\\val\\Annotations\\BloodImage_00058.xml',
 './data

In [25]:
from tqdm import tqdm

files = glob.glob(os.path.join(annot_path, '*.xml'))
for fil in tqdm(files):
    
    basename = os.path.basename(fil)
    filename = os.path.splitext(basename)[0]
    
    result = []
    
    tree = ET.parse(fil)
    root = tree.getroot()
    width = int(root.find("size").find("width").text)
    height = int(root.find("size").find("height").text)
    for obj in root.findall('object'):
        label = obj.find("name").text
        if label not in classes:
            classes.append(label)
        index = classes.index(label)
        pil_bbox = [int(x.text) for x in obj.find("bndbox")]
        yolo_bbox = xml_to_yolo_bbox(pil_bbox, width, height)
        bbox_string = " ".join([str(x) for x in yolo_bbox])
        result.append(f"{index} {bbox_string}")
        
        print((f"{index} {bbox_string}"))
    if result:
        with open(os.path.join(label_path, f"{filename}.txt"), "w", encoding="utf-8") as f:
            f.write("\n".join(result))

100%|█████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 710.79it/s]

0 0.709375 0.30833333333333335 0.215625 0.20833333333333334
0 0.68984375 0.44166666666666665 0.1796875 0.2
0 0.25390625 0.35625 0.1921875 0.2125
0 0.284375 0.8197916666666667 0.1625 0.20625
0 0.32421875 0.896875 0.1453125 0.20625
0 0.10703125 0.5958333333333333 0.1703125 0.24166666666666667
0 0.56328125 0.73125 0.1703125 0.24166666666666667
0 0.90859375 0.3375 0.1578125 0.24583333333333332
0 0.84765625 0.6041666666666666 0.1578125 0.2375
0 0.64765625 0.6625 0.1578125 0.2375
0 0.49140625 0.24375 0.1578125 0.2375
0 0.4828125 0.4895833333333333 0.1875 0.25416666666666665
1 0.6640625 0.11145833333333334 0.44375 0.21875
0 0.24296875 0.103125 0.1453125 0.20208333333333334
0 0.28359375 0.23958333333333334 0.1453125 0.20416666666666666
0 0.8359375 0.8895833333333333 0.171875 0.22083333333333333
1 0.33828125 0.4739583333333333 0.3359375 0.38958333333333334
0 0.75 0.590625 0.15 0.17291666666666666
0 0.871875 0.31979166666666664 0.15 0.17291666666666666
0 0.840625 0.8708333333333333 0.175 0.2375





# create file for train

In [26]:
root_dir = "./dataset"
img_dir = os.path.join(root_dir, "images")
label_dir = os.path.join(root_dir,"labels")

In [33]:
train = glob.glob(os.path.join(img_dir, 'train', '*jpg'))
valid = glob.glob(os.path.join(img_dir, 'val', '*jpg'))
test = glob.glob(os.path.join(img_dir, 'test', '*jpg'))

In [34]:
# train.txt
with open(os.path.join(root_dir, "train.txt"), 'w') as f:
    f.write('\n'.join(train) + '\n')

# valid.txt
with open(os.path.join(root_dir, "valid.txt"), 'w') as f:
    f.write('\n'.join(valid) + '\n')

# test.txt
with open(os.path.join(root_dir, "test.txt"), 'w') as f:
    f.write('\n'.join(test) + '\n')

# create yaml 

In [35]:
import yaml

yaml_data = {"names":['RBC', 'WBC', 'Platelets'], # 클래스 이름
             "nc":3, # 클래스 수
             "path":root_dir, # root 경로
             "train":os.path.join(root_dir, "train.txt"), # train.txt 경로
             "val":os.path.join(root_dir, "valid.txt"), # valid.txt 경로
             "test":os.path.join(root_dir,"test.txt") # test.txt 경로
             }

with open(os.path.join(root_dir, "custom.yaml"), "w") as f:
    yaml.dump(yaml_data, f)