In [5]:
import os
import shutil

# 定义路径
base_path = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test'
image_output_path = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test_xiuzheng_images'
label_output_path = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test_unxiuzheng_labels'

# 创建输出文件夹
os.makedirs(image_output_path, exist_ok=True)
os.makedirs(label_output_path, exist_ok=True)

# 定义图像文件扩展名和标签文件扩展名
image_extensions = ('.jpg', '.jpeg', '.png')
label_extension = '.json'

# 遍历base_path目录下的所有文件
for filename in os.listdir(base_path):
    file_path = os.path.join(base_path, filename)
    if os.path.isfile(file_path):
        # 获取文件的前缀
        prefix = os.path.splitext(filename)[0]
        
        # 判断文件类型并移动到对应文件夹
        if filename.endswith(image_extensions):  # 根据实际图像文件格式调整
            shutil.move(file_path, os.path.join(image_output_path, filename))
            # 同时移动对应的标签文件
            label_file = prefix + label_extension
            label_file_path = os.path.join(base_path, label_file)
            if os.path.exists(label_file_path):
                shutil.move(label_file_path, os.path.join(label_output_path, label_file))
        elif filename.endswith(label_extension):  # 假设标签文件格式为.json
            # 仅在图像文件已移动的情况下才移动标签文件
            if not os.path.exists(os.path.join(image_output_path, prefix + image_extensions[0])):
                shutil.move(file_path, os.path.join(label_output_path, filename))


In [6]:
import os
import shutil

# 定义路径
base_path = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test'
image_output_path = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test_xiuzheng_images'
label_output_path = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test_unxiuzheng_labels'

# 创建输出文件夹
os.makedirs(image_output_path, exist_ok=True)
os.makedirs(label_output_path, exist_ok=True)

# 定义类别和对应的文件数量
categories = {
    'fhy': {'train': 111, 'test': 31},
    'mh': {'train': 124, 'test': 25},
    'bjtu': {'train': 263, 'test': 75},
    'zx': {'train': 117, 'test': 32},
    'tsg': {'train': 55, 'test': 10},
    'TYHT': {'train': 33, 'test': 7}
}

# 遍历base_path目录下的所有文件
for filename in os.listdir(base_path):
    file_path = os.path.join(base_path, filename)
    if os.path.isfile(file_path):
        # 判断文件类型并移动到对应文件夹
        if filename.endswith(('.jpg', '.jpeg', '.png')):  # 根据实际图像文件格式调整
            shutil.move(file_path, os.path.join(image_output_path, filename))
        elif filename.endswith('.txt'):  # 假设标签文件格式为.txt
            shutil.move(file_path, os.path.join(label_output_path, filename))


In [7]:
import json
import os
import xml.etree.ElementTree as ET

def create_xml(json_file_path, output_dir):
    with open(json_file_path, 'r') as f:
        data = json.load(f)

    root = ET.Element("annotation")
    
    folder = ET.SubElement(root, "folder")
    folder.text = "jiaoda"
    
    filename = ET.SubElement(root, "filename")
    filename.text = data["imagePath"]
    
    size = ET.SubElement(root, "size")
    width = ET.SubElement(size, "width")
    width.text = str(data["imageWidth"])
    height = ET.SubElement(size, "height")
    height.text = str(data["imageHeight"])
    depth = ET.SubElement(size, "depth")
    depth.text = "3"  # Assuming RGB images

    for shape in data["shapes"]:
        object = ET.SubElement(root, "object")
        name = ET.SubElement(object, "name")
        name.text = shape["label"]

        bndbox = ET.SubElement(object, "bndbox")
        xmin = ET.SubElement(bndbox, "xmin")
        xmin.text = str(int(shape["points"][0][0]))
        ymin = ET.SubElement(bndbox, "ymin")
        ymin.text = str(int(shape["points"][0][1]))
        xmax = ET.SubElement(bndbox, "xmax")
        xmax.text = str(int(shape["points"][1][0]))
        ymax = ET.SubElement(bndbox, "ymax")
        ymax.text = str(int(shape["points"][1][1]))

    tree = ET.ElementTree(root)
    xml_output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(json_file_path))[0] + ".xml")
    tree.write(xml_output_path, encoding='utf-8', xml_declaration=True)

def convert_json_to_xml(json_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
    for json_file in json_files:
        create_xml(os.path.join(json_dir, json_file), output_dir)

# Example usage
json_dir = '/home/gxyu/yolov5-7.0/datasets/testdata_process/test_unxiuzheng_labels'
output_dir = '/home/gxyu/yolov5-7.0/datasets/testdata_process/xml_labels'
convert_json_to_xml(json_dir, output_dir)


In [8]:
import os
import xml.etree.ElementTree as ET

def update_label(xml_file, prefix_mapping):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    filename = os.path.basename(xml_file)
    prefix = filename[:2]  # 获取文件名前两个字符

    if prefix in prefix_mapping:
        new_label = prefix_mapping[prefix]
        for obj in root.findall('object'):
            name = obj.find('name')
            name.text = new_label
        
        tree.write(xml_file, encoding='utf-8', xml_declaration=True)

def update_labels_in_directory(directory, prefix_mapping):
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            xml_file = os.path.join(directory, filename)
            update_label(xml_file, prefix_mapping)

if __name__ == "__main__":
    # 定义文件名前两个字符与新标签的映射关系
    prefix_mapping = {
        'nm': 'NM',
        'sj': 'SJZ',
        'sy': 'SY'
    }
    # 指定需要更新标签的XML文件目录
    xml_files_path = r'/home/gxyu/yolov5-7.0/datasets/testdata_process/xml_labels'
    
    update_labels_in_directory(xml_files_path, prefix_mapping)


In [9]:
import os
import xml.etree.ElementTree as ET

def convert(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)

def convert_annotation(xml_files_path, save_txt_files_path, classes):
    xml_files = os.listdir(xml_files_path)
    for xml_name in xml_files:
        if not xml_name.endswith('.xml'):
            continue
        xml_file = os.path.join(xml_files_path, xml_name)
        out_txt_path = os.path.join(save_txt_files_path, xml_name.replace('.xml', '.txt'))
        with open(out_txt_path, 'w') as out_txt_f:
            tree = ET.parse(xml_file)
            root = tree.getroot()
            size = root.find('size')
            w = int(size.find('width').text)
            h = int(size.find('height').text)
            for obj in root.iter('object'):
                cls = obj.find('name').text
                if cls not in classes:
                    continue
                cls_id = classes.index(cls)
                xmlbox = obj.find('bndbox')
                b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
                bb = convert((w, h), b)
                out_txt_f.write(f"{cls_id} " + " ".join([f"{a:.6f}" for a in bb]) + '\n')

if __name__ == "__main__":
    # 需要转换的类别
    classes = ['FHY', 'MH', 'NM', 'SJZ', 'SY', 'TSG' , 'TYHT' , 'ZX']
    # voc格式的xml标签文件路径
    xml_files_path = r'/home/gxyu/yolov5-7.0/datasets/testdata_process/xml_labels'
    # 转化为yolo格式的txt标签文件存储路径
    save_txt_files_path = r'/home/gxyu/yolov5-7.0/datasets/testdata_process/labels_yolo'

    if not os.path.exists(save_txt_files_path):
        os.makedirs(save_txt_files_path)

    convert_annotation(xml_files_path, save_txt_files_path, classes)
