In [3]:
import os
import xml.etree.ElementTree as ET
import json
from collections import Counter
from tqdm import tqdm  # Import tqdm for progress tracking

def create_json_file(object_names_count, folder, filename):
    # Create a JSON file name based on the objects and their counts
    json_filename = "_".join([f"{obj}{count}" for obj, count in object_names_count.items()]) + ".json"
    json_filepath = os.path.join(folder, json_filename)

    # If the JSON file already exists, load the existing data
    if os.path.exists(json_filepath):
        with open(json_filepath, 'r') as json_file:
            data = json.load(json_file)
    else:
        data = {"fileNames": []}
    
    # Add the filename to the list
    if filename not in data["fileNames"]:
        data["fileNames"].append(filename)

    # Write the updated data to the JSON file
    with open(json_filepath, 'w') as json_file:
        json.dump(data, json_file, indent=4)

def process_xml_file(xml_path, output_folder):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Extract filename
    filename = root.find('filename').text

    # Collect object names
    object_names = [obj.find('name').text for obj in root.findall('object')]

    # Count the occurrence of each object name
    object_names_count = Counter(object_names)

    # Create the folder if it does not exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Create JSON file based on the object names and their counts
    create_json_file(object_names_count, output_folder, filename)

def iterate_xml_folder(input_folder, output_folder):
    # Get the list of XML files in the folder
    xml_files = [f for f in os.listdir(input_folder) if f.endswith('.xml')]
    
    # Iterate over the XML files with a progress bar
    for xml_file in tqdm(xml_files, desc="Processing XML files"):
        xml_path = os.path.join(input_folder, xml_file)
        process_xml_file(xml_path, output_folder)

if __name__ == "__main__":
    input_folder = '../data/VOCdevkit/VOC2012/Annotations'  # Replace with the path to your folder containing XML files
    output_folder = './relations'  # Replace with the path where you want to store JSON files

    iterate_xml_folder(input_folder, output_folder)


Processing XML files: 100%|██████████| 17125/17125 [03:59<00:00, 71.35it/s]
