In [4]:
import os
import xml.etree.ElementTree as ET
from tqdm import tqdm  # Import tqdm for progress tracking

def convert_to_int(element, tag_name, xml_path):
    """Convert the text of an XML tag to an integer, and print if a float was found."""
    tag = element.find(tag_name)
    if tag is not None:
        try:
            value = float(tag.text)
            if value != int(value):
                print(f"Float found in {xml_path}: {tag_name} = {value}")
            tag.text = str(int(value))  # Convert to float first, then to int, and back to string
        except ValueError:
            print(f"Error converting {tag_name} to int in file: {xml_path}")

def process_xml_file(xml_path):
    """Process an XML file and convert width, height, and bounding box values to integers."""
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Find the size tag and convert width and height to integers
    size_tag = root.find('size')
    if size_tag is not None:
        convert_to_int(size_tag, 'width', xml_path)
        convert_to_int(size_tag, 'height', xml_path)

    # Find all bndbox tags and convert xmin, ymin, xmax, ymax to integers
    for bndbox in root.findall('.//bndbox'):
        convert_to_int(bndbox, 'xmin', xml_path)
        convert_to_int(bndbox, 'ymin', xml_path)
        convert_to_int(bndbox, 'xmax', xml_path)
        convert_to_int(bndbox, 'ymax', xml_path)

    # Save the updated XML file
    tree.write(xml_path)

def iterate_xml_folder(input_folder):
    """Iterate through all XML files in the input folder and process them with a progress bar."""
    xml_files = [f for f in os.listdir(input_folder) if f.endswith('.xml')]  # List of XML files
    
    # Use tqdm to show progress bar
    for xml_file in tqdm(xml_files, desc="Processing XML files"):
        xml_path = os.path.join(input_folder, xml_file)
        process_xml_file(xml_path)

if __name__ == "__main__":
    input_folder = '../data/VOCdevkit/VOC2012/Annotations'  # Replace with the path to your folder containing XML files

    # Iterate through the folder and process XML files with a loader
    iterate_xml_folder(input_folder)


Processing XML files:  76%|███████▌  | 13040/17125 [02:23<00:44, 92.76it/s] 

Float found in ../data/VOCdevkit/VOC2012/Annotations\2011_003353.xml: ymin = 45.70000076293945


Processing XML files:  86%|████████▌ | 14748/17125 [02:42<00:29, 80.18it/s] 

Float found in ../data/VOCdevkit/VOC2012/Annotations\2011_006777.xml: ymin = 281.70000076293945


Processing XML files: 100%|██████████| 17125/17125 [03:07<00:00, 91.16it/s] 
