In [26]:
import os
import json
from collections import defaultdict
import re

# Set the target count
TARGET_COUNT = 256

# Starting with the smallest total count
object_priority = ["Diningtable", "Sofa", "Pottedplant", "Tvmonitor", "Chair", "Bus", 
                   "Motorbike", "Bicycle", "Bottle", "Horse", "Cow", "Sheep", 
                   "Train", "Car", "Boat", "Dog", "Cat", "Bird", "Aeroplane", "Person"]

# Regular expression to extract object counts from filenames
pattern = re.compile(r"([a-zA-Z]+)(\d+)")

def parse_json_filename(filename):
    """Parse the JSON filename and extract object types and their counts."""
    filename = filename.replace(".json", "")
    parts = filename.split('_')
    
    object_counts = {}
    
    # Iterate through the parts and extract object name and count
    for part in parts:
        match = pattern.match(part)
        if match:
            obj_name = match.group(1).capitalize()  # Capitalize the object name for consistency
            count = int(match.group(2))  # Get the object count as an integer
            object_counts[obj_name] = count
    
    return object_counts

def collect_images_for_target_count(input_folder):
    """Collect images for each object type until the count reaches 256 for all."""
    object_collections = defaultdict(list)  # To store selected images for each object type
    image_tuples = []  # To store the tuples of (original_image, new_image_with_json)
    object_counts = defaultdict(int)  # To track the count of objects collected so far

    # Get the list of JSON files
    json_files = [f for f in os.listdir(input_folder) if f.endswith('.json')]
    
    # Iterate over object types in priority order (from smallest count to largest)
    for target_object in object_priority:
        # Continue if this object has already reached the target count
        if object_counts[target_object] >= TARGET_COUNT:
            continue

        # Iterate through the JSON files to collect images
        for json_file in json_files:
            json_path = os.path.join(input_folder, json_file)
            
            # Parse the object counts from the JSON filename
            json_object_counts = parse_json_filename(json_file)
            
            # Check if the target object is in this JSON file and hasn't reached the target count
            if target_object in json_object_counts and object_counts[target_object] < TARGET_COUNT:
                # Open the JSON and load the image file names
                with open(json_path, 'r') as f:
                    data = json.load(f)
                
                # Get the number of images in this JSON file
                num_images = len(data["fileNames"])
                
                # For each object in this JSON, update its count based on its multiplier
                for obj_name, multiplier in json_object_counts.items():
                    if obj_name in object_priority and object_counts[obj_name] < TARGET_COUNT:
                        # Calculate how many new counts this JSON adds for this object
                        new_count = num_images * multiplier
                        
                        # If the new count will exceed the target, cap it at the target count
                        if object_counts[obj_name] + new_count > TARGET_COUNT:
                            new_count = TARGET_COUNT - object_counts[obj_name]
                        
                        # Add the images to the collection for this object and create the tuple
                        for img_file in data["fileNames"]:
                            # Create new image name by appending the json name (without extension)
                            new_img_name = f"{os.path.splitext(img_file)[0]}_{os.path.splitext(json_file)[0]}.jpeg"
                            image_tuples.append((img_file, new_img_name))
                            object_collections[obj_name].append(new_img_name)
                        
                        # Update the count for this object
                        object_counts[obj_name] += new_count

                        # If we've reached the target count for this object, break out of the loop
                        if object_counts[obj_name] >= TARGET_COUNT:
                            break

    # Ensure only distinct rows remain in image_tuples by converting to a set and back to a list
    image_tuples = list(set(image_tuples))

    return image_tuples, object_counts

if __name__ == "__main__":
    input_folder = './relations'  # Folder containing JSON files
    
    # Collect images for each object type until the count reaches 256 for each
    image_tuples, object_counts = collect_images_for_target_count(input_folder)
    
    # Output the collected image tuples
    print("\nCollected image tuples (original image, new image with JSON name):")
    for original_img, new_img in image_tuples:
        print(f"Original: {original_img}, New: {new_img}")
    
    # Output the total number of unique tuples
    print(f"\nTotal unique image tuples: {len(image_tuples)}")



Collected image tuples (original image, new image with JSON name):
Original: 2010_005094.jpg, New: 2010_005094_chair2_diningtable1.jpeg
Original: 2011_000338.jpg, New: 2011_000338_chair4_diningtable1.jpeg
Original: 2009_003066.jpg, New: 2009_003066_aeroplane1.jpeg
Original: 2010_001760.jpg, New: 2010_001760_dog1.jpeg
Original: 2011_001320.jpg, New: 2011_001320_aeroplane1.jpeg
Original: 2011_000909.jpg, New: 2011_000909_train1.jpeg
Original: 2009_001537.jpg, New: 2009_001537_sheep1.jpeg
Original: 2011_001544.jpg, New: 2011_001544_horse1.jpeg
Original: 2009_000771.jpg, New: 2009_000771_cow2.jpeg
Original: 2010_001457.jpg, New: 2010_001457_cat1.jpeg
Original: 2009_002099.jpg, New: 2009_002099_aeroplane1.jpeg
Original: 2011_000243.jpg, New: 2011_000243_aeroplane1.jpeg
Original: 2008_000472.jpg, New: 2008_000472_bird1.jpeg
Original: 2009_001689.jpg, New: 2009_001689_cat1.jpeg
Original: 2009_003521.jpg, New: 2009_003521_cow2.jpeg
Original: 2010_001534.jpg, New: 2010_001534_cat1.jpeg
Origina

In [28]:
import os
import shutil

def copy_and_rename_images(image_tuples, input_folder, output_folder):
    """Copy and rename images from input_folder to output_folder based on image_tuples."""
    
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through the image tuples and copy each image
    for original_img, new_img in image_tuples:
        original_img_path = os.path.join(input_folder, original_img)
        new_img_path = os.path.join(output_folder, new_img)
        
        # Check if the original image exists in the input folder
        if os.path.exists(original_img_path):
            # Copy and rename the image
            shutil.copy(original_img_path, new_img_path)
            print(f"Copied and renamed: {original_img_path} -> {new_img_path}")
        else:
            print(f"Error: {original_img_path} does not exist!")

if __name__ == "__main__":
    input_folder = '../data/VOCdevkit/VOC2012/JPEGImages'  # Folder containing original images
    output_folder = '../data/VOCdevkit/VOC2012/Train_Val_Test/Train'  # Folder to copy and rename images to

    # Move and rename images based on the tuples
    copy_and_rename_images(image_tuples, input_folder, output_folder)


Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2010_005094.jpg -> ../data/VOCdevkit/VOC2012/Train_Val_Test/Train\2010_005094_chair2_diningtable1.jpeg
Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2011_000338.jpg -> ../data/VOCdevkit/VOC2012/Train_Val_Test/Train\2011_000338_chair4_diningtable1.jpeg
Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2009_003066.jpg -> ../data/VOCdevkit/VOC2012/Train_Val_Test/Train\2009_003066_aeroplane1.jpeg
Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2010_001760.jpg -> ../data/VOCdevkit/VOC2012/Train_Val_Test/Train\2010_001760_dog1.jpeg
Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2011_001320.jpg -> ../data/VOCdevkit/VOC2012/Train_Val_Test/Train\2011_001320_aeroplane1.jpeg
Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2011_000909.jpg -> ../data/VOCdevkit/VOC2012/Train_Val_Test/Train\2011_000909_train1.jpeg
Copied and renamed: ../data/VOCdevkit/VOC2012/JPEGImages\2009_001537.jpg -> ../data/VO