## Version 1 - Patrones Nombre

In [2]:
import os
from collections import defaultdict

def analyze_images():
    # Path to the directory containing images
    #image_dir = "ANPR2.v1i.yolov8/train/images"
    image_dir = "Peru Plate Numbers.v3i.yolov8/train/images"

    # Read the first 30 images in alphabetical order
    images = sorted(os.listdir(image_dir))[:30]

    # Save the names of the files
    image_names = images

    # Group images by their potential original sets
    grouped_images = defaultdict(list)
    for name in image_names:
        # Assuming a pattern like "image_01_aug1.png", "image_01_aug2.png", etc.
        base_name = name.rsplit("_", 1)[0]  # Remove the augmentation suffix
        grouped_images[base_name].append(name)

    # Identify potential original images
    original_images = []
    for base, group in grouped_images.items():
        if len(group) == 3:  # Check if there are exactly 3 images
            # Assume the original is the one without augmentation identifiers
            original_candidate = [img for img in group if "aug" not in img.lower()]
            if original_candidate:
                original_images.append(original_candidate[0])

    # Save the 10 identified originals (or less if fewer sets match)
    identified_originals = original_images[:10]

    # Generalization check
    if len(grouped_images) < len(image_names) // 3:
        generalization_comment = "It might not generalize; some sets have fewer than 3 images."
    elif len(identified_originals) < 10:
        generalization_comment = "It's difficult to tell which ones are original in some cases."
    else:
        generalization_comment = "The pattern seems consistent and can likely be generalized."

    # Return results
    return {
        "image_names": image_names,
        "original_images": identified_originals,
        "generalization_comment": generalization_comment
    }


In [3]:
# Execute the analysis
result = analyze_images()

# Print the results
print("First 30 Image Names:", result["image_names"])
print("Identified Original Images:", result["original_images"])
print("Generalization Comment:", result["generalization_comment"])

First 30 Image Names: ['20231009_192435.rf.102ad47e3f2d2a9dad4dc37c33e5e30a.jpg', '20231009_192435.rf.14919a38e97c954674d53b1ecd39bb13.jpg', '20231009_192435.rf.77c0fc7846580623d2466871fadb5056.jpg', '20231009_192435_jpg.rf.34b97c88992b4dcc712e9379a28af6b3.jpg', '20231009_192435_jpg.rf.c147b08999ae83852b028d9f9afd00cc.jpg', '20231009_192435_jpg.rf.ee9a1439af23e23dfdd6054fa69f6d25.jpg', '20231009_192443_jpg.rf.497ccde5dfe549319cfe57a169a0bdeb.jpg', '20231009_192443_jpg.rf.831c95728d23b435d314e98eb107097d.jpg', '20231009_192443_jpg.rf.da5d595c40e613d9878567c3c34f2c1d.jpg', '20231009_192448_jpg.rf.33407f1c8685f1bd9b95b14d54cc2847.jpg', '20231009_192448_jpg.rf.3bd390650207d0d7fa765cb011769e37.jpg', '20231009_192448_jpg.rf.7116d5e98afd348ef687e046a5a78db7.jpg', '20231009_192500_jpg.rf.6d29e65988d0ce031534a9ecd2998ed0.jpg', '20231009_192500_jpg.rf.883007b8597abe20748d3eb9c1138cc5.jpg', '20231009_192500_jpg.rf.c5c397aa1c4a75432ef0e28cbf2ba5c7.jpg', '20231009_192603_jpg.rf.40219cc8ef2466795876

# Version 2 - Patrones Nombre

In [6]:
import os
from collections import defaultdict

# Path to the directory containing the images
image_dir = "ANPR2.v1i.yolov8/train/images"

# Define the mapping of original image positions based on provided examples
# This assumes the original is consistently identified by its position in sorted order
example_positions = {
    "000113368M": 0,  # First in sorted order
    "000123177W": 1,  # Second in sorted order
    "000150041M": 0,  # First in sorted order
    "000179492W": 1,  # Second in sorted order
    "000242724W": 1,  # Second in sorted order
    "000246942M": 1,  # Second in sorted order
    "000316312W": 1,  # Second in sorted order
    "000343880W": 1,  # Second in sorted order
    "000356269W": 0,  # First in sorted order
    "000380321W": 1,  # Second in sorted order
}

# Dictionary to store the original images
original_images = []

# Group files by their prefix (common part before ".rf.")
grouped_files = defaultdict(list)
for filename in os.listdir(image_dir):
    if filename.endswith(".jpg"):
        prefix = filename.split(".rf.")[0]
        grouped_files[prefix].append(filename)

# Identify the original images
for prefix, files in grouped_files.items():
    # Sort files to ensure consistent order
    files.sort()

    # Determine the position of the original image based on the prefix
    position = example_positions.get(prefix, 0)  # Default to the first if not specified

    # Add the original image to the list
    if position < len(files):
        original_images.append(files[position])

# Output the original images
print("Original Images:")
for img in original_images:
    print(img)


Original Images:
000113368M_jpg.rf.0c7e48f8aa7ae8114697332a3a49eeb5.jpg
000123177W_jpg.rf.0057f768325268105aebacfc1918cd36.jpg
000150041M_jpg.rf.c87ecb5147344fbbf62b293cc2fd13e4.jpg
000179492W_jpg.rf.43f290d38706a38642c90dbdd8eb307a.jpg
000242724W_jpg.rf.327e0780d0115657b54f978afd52a5ce.jpg
000246942M_jpg.rf.07663195722b223f6f00d412ee742e81.jpg
000316312W_jpg.rf.127162bfda83fd7cd28eb6d2193c24c2.jpg
000343880W_jpg.rf.1dc2b37bf149f590f3597ebb60e63e80.jpg
000356269W_jpg.rf.63b10ecf941a19ccd11971e472b11213.jpg
000380321W_jpg.rf.3472ba95c5293d621175f1c004caba8a.jpg
000386735W_jpg.rf.24beb513347e24ff7798c163236f8b05.jpg
000629019W_jpg.rf.b1b3135601dbbe00647da47c41a88d38.jpg
000644648W_jpg.rf.6d7b531c34133afc3955bc9387f691fe.jpg
000668274W_jpg.rf.0e722b16b24c5d5567436d183b537799.jpg
000689906W_jpg.rf.8982d575248d5d6fd5502a1e86e35041.jpg
000741242W_jpg.rf.294ef9ff04254f9495a6b34274fe522f.jpg
000793760W_jpg.rf.011eb97ebf45e266d8b083ab02aa8132.jpg
000983069W_jpg.rf.04c5997590ca744942227b15cbbe81

# Version 2.1 - Agrupamiento

In [1]:
import os
import shutil
from collections import defaultdict

In [7]:
# Path to the directory containing the images
image_dir = "ANPR2.v1i.yolov8/train/images"
filtered_dir = "ANPR2.v1i.yolov8/train_filter_v2/images"

# Ensure the filtered directory exists
os.makedirs(filtered_dir, exist_ok=True)

# Define the mapping of original image positions based on provided examples
# This assumes the original is consistently identified by its position in sorted order
example_positions = {
    "000113368M": 0,  # First in sorted order
    "000123177W": 1,  # Second in sorted order
    "000150041M": 0,  # First in sorted order
    "000179492W": 1,  # Second in sorted order
    "000242724W": 1,  # Second in sorted order
    "000246942M": 1,  # Second in sorted order
    "000316312W": 1,  # Second in sorted order
    "000343880W": 1,  # Second in sorted order
    "000356269W": 0,  # First in sorted order
    "000380321W": 1,  # Second in sorted order
}

# Dictionary to store the original images
original_images = []

# Group files by their prefix (common part before ".rf.")
grouped_files = defaultdict(list)
for filename in os.listdir(image_dir):
    if filename.endswith(".jpg"):
        prefix = filename.split(".rf.")[0]
        grouped_files[prefix].append(filename)

# Identify the original images
for prefix, files in grouped_files.items():
    # Sort files to ensure consistent order
    files.sort()

    # Determine the position of the original image based on the prefix
    position = example_positions.get(prefix, 0)  # Default to the first if not specified

    # Add the original image to the list
    if position < len(files):
        original_images.append(files[position])

# Copy the original images to the filtered directory
for img in original_images:
    src_path = os.path.join(image_dir, img)
    dst_path = os.path.join(filtered_dir, img)
    shutil.copy(src_path, dst_path)

# Output the original images
print("Original Images copied to filtered directory:")
for img in original_images:
    print(img)


Original Images copied to filtered directory:
000113368M_jpg.rf.0c7e48f8aa7ae8114697332a3a49eeb5.jpg
000123177W_jpg.rf.0057f768325268105aebacfc1918cd36.jpg
000150041M_jpg.rf.c87ecb5147344fbbf62b293cc2fd13e4.jpg
000179492W_jpg.rf.43f290d38706a38642c90dbdd8eb307a.jpg
000242724W_jpg.rf.327e0780d0115657b54f978afd52a5ce.jpg
000246942M_jpg.rf.07663195722b223f6f00d412ee742e81.jpg
000316312W_jpg.rf.127162bfda83fd7cd28eb6d2193c24c2.jpg
000343880W_jpg.rf.1dc2b37bf149f590f3597ebb60e63e80.jpg
000356269W_jpg.rf.63b10ecf941a19ccd11971e472b11213.jpg
000380321W_jpg.rf.3472ba95c5293d621175f1c004caba8a.jpg
000386735W_jpg.rf.24beb513347e24ff7798c163236f8b05.jpg
000629019W_jpg.rf.b1b3135601dbbe00647da47c41a88d38.jpg
000644648W_jpg.rf.6d7b531c34133afc3955bc9387f691fe.jpg
000668274W_jpg.rf.0e722b16b24c5d5567436d183b537799.jpg
000689906W_jpg.rf.8982d575248d5d6fd5502a1e86e35041.jpg
000741242W_jpg.rf.294ef9ff04254f9495a6b34274fe522f.jpg
000793760W_jpg.rf.011eb97ebf45e266d8b083ab02aa8132.jpg
000983069W_jpg.rf.0

# Version Dataset 2 - Agrupamiento

In [4]:
import os
import shutil
from collections import defaultdict

# Define paths
source_dir = "Peru Plate Numbers.v3i.yolov8/train/images"
target_dir = "Peru Plate Numbers.v3i.yolov8/train_filter_v2/images"

# Ensure target directories exist
os.makedirs(target_dir, exist_ok=True)

# Get list of images
image_files = sorted(os.listdir(source_dir))

# Group images by their prefix before '.rf.'
grouped_images = defaultdict(list)

for image in image_files:
    if ".rf." in image:
        prefix = image.split(".rf.")[0]  # Extract prefix before .rf.
        grouped_images[prefix].append(image)

# Identify the original images and copy them
for prefix, group in grouped_images.items():
    group.sort()  # Sort within the group (to maintain order)
    
    # Assume the last image in sorted order is the original
    original_image = group[-1]  

    # Copy the original image to the target directory
    src_path = os.path.join(source_dir, original_image)
    dest_path = os.path.join(target_dir, original_image)
    shutil.copy2(src_path, dest_path)

print("Original images copied successfully.")


Original images copied successfully.


# Print Nro


In [5]:
import os

# Define paths
source_dir = "Peru Plate Numbers.v3i.yolov8/train/images"
target_dir = "Peru Plate Numbers.v3i.yolov8/train_filter_v2/images"

# Function to count image files in a directory
def count_images(directory):
    if not os.path.exists(directory):
        return 0  # Return 0 if the directory does not exist
    return len([f for f in os.listdir(directory) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])

# Count images in both directories
source_count = count_images(source_dir)
target_count = count_images(target_dir)

# Print results
print(f"Number of images in source directory ({source_dir}): {source_count}")
print(f"Number of images in target directory ({target_dir}): {target_count}")


Number of images in source directory (Peru Plate Numbers.v3i.yolov8/train/images): 1470
Number of images in target directory (Peru Plate Numbers.v3i.yolov8/train_filter_v2/images): 490
