In [1]:
import os
import json
from collections import defaultdict
from tqdm import tqdm

def parse_json_filename(filename):
    """Parse the JSON filename and extract object types and their counts."""
    # Remove the .json extension and split the filename by underscores
    filename = filename.replace(".json", "")
    parts = filename.split('_')
    
    object_counts = {}
    
    # Iterate through the parts and extract object name and count
    for part in parts:
        # Separate object name and count (e.g., aeroplane1 -> aeroplane, 1)
        for i in range(len(part)):
            if part[i].isdigit():
                obj_name = part[:i]
                count = int(part[i:])
                object_counts[obj_name.capitalize()] = count  # Capitalize to normalize object names
                break
    
    return object_counts

def process_json_file(json_path, object_counts):
    """Process a JSON file and count object appearances."""
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    filename = os.path.basename(json_path)
    
    # Extract object types and their counts from the filename
    json_object_counts = parse_json_filename(filename)

    # Count the number of filenames in the current JSON file
    num_filenames = len(data["fileNames"])

    # Update the total count of objects based on the filenames and counts in the filename
    for obj, count in json_object_counts.items():
        object_counts[obj] += num_filenames * count

def iterate_json_folder(input_folder, output_json):
    """Iterate through all JSON files in the input folder and count object appearances."""
    object_counts = defaultdict(int)  # Store object counts
    
    # Get the list of JSON files in the folder
    json_files = [f for f in os.listdir(input_folder) if f.endswith('.json')]
    
    # Iterate over the JSON files with a progress bar
    for json_file in tqdm(json_files, desc="Processing JSON files"):
        json_path = os.path.join(input_folder, json_file)
        process_json_file(json_path, object_counts)
    
    # Save the results to the output JSON file
    with open(output_json, 'w') as f:
        json.dump(object_counts, f, indent=4)

if __name__ == "__main__":
    input_folder = './relations'  # Replace with the path to your folder containing JSON files
    output_json = './distribution/object_appearance_count.json'  # Replace with the path to your target JSON file

    # Iterate through the folder and count object appearances
    iterate_json_folder(input_folder, output_json)

    print(f"Object appearance count saved to {output_json}")


Processing JSON files: 100%|██████████| 2309/2309 [00:32<00:00, 71.14it/s] 

Object appearance count saved to ./distribution/object_appearance_count.json



