In [1]:
import os
import glob
import shutil # Used only for the demonstration setup (creates/cleans temporary files)
import tempfile # Used only for the demonstration setup

# --- SECTION 1: USER CONFIGURATION (YOU MUST EDIT THESE VARIABLES) ---

# 1. DATASET_FOLDER_PATH: The path to the folder containing your image files
#    (e.g., .jpg, .png) AND their corresponding label files (e.g., .txt).
#    IMPORTANT: YOU MUST CHANGE THIS TO YOUR ACTUAL FOLDER PATH.
#    Example: DATASET_FOLDER_PATH = "/home/youruser/my_dataset"
#    Example: DATASET_FOLDER_PATH = "C:\\Users\\YourName\\Documents\\project_data"
DATASET_FOLDER_PATH = "Bangla_License_Plate" # <--- CHANGE THIS LINE!

# 2. TARGET_CLASS_IDS_FOR_SEARCH: An array of numeric class IDs you want to find images for.
#    Example: [0, 1, 5]
TARGET_CLASS_IDS_FOR_SEARCH = [102,103,105] # Your provided array, duplicates will be handled

# --- END OF USER CONFIGURATION ---


# --- SECTION 2: CORE LOGIC FUNCTION ---

def find_and_list_images_by_class_id(
    folder_path_to_scan: str,
    target_ids: list[int]
) -> None:
    """
    Scans a folder for image and label files and lists image filenames
    that contain objects belonging to the specified numeric class IDs.

    Args:
        folder_path_to_scan (str): The directory containing both images and label files.
        target_ids (list[int]): A list of numeric class IDs to search for.
    """
    # Convert target_ids to a set for efficient lookup and to handle duplicates
    target_class_ids_set = set(target_ids)

    print(f"\n--- Searching for images containing class IDs: {sorted(list(target_class_ids_set))} ---")
    print(f"Scanning folder: '{folder_path_to_scan}'")

    if not target_class_ids_set:
        print("No valid target class IDs provided. Exiting.")
        return

    # Dictionary to store results: {class_id: [list_of_image_filenames]}
    found_images_by_class_id = {class_id: [] for class_id in target_class_ids_set}

    # Get all label files (.txt) in the specified folder
    label_files = glob.glob(os.path.join(folder_path_to_scan, '*.txt'))
    print(f"Found {len(label_files)} label files to process.")

    # Process each label file
    for label_file_path in label_files:
        base_filename = os.path.splitext(os.path.basename(label_file_path))[0]

        # Try to find a corresponding image file for this label
        # Checks common image extensions
        image_filename = None
        for ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
            potential_image_path = os.path.join(folder_path_to_scan, base_filename + ext)
            if os.path.exists(potential_image_path):
                image_filename = base_filename + ext
                break # Found the image, no need to check other extensions

        if not image_filename:
            # print(f"Debug: No image found for label '{label_file_path}'. Skipping.")
            continue # Skip if no image is found for this label file

        try:
            with open(label_file_path, 'r', encoding='utf-8') as f:
                for line in f:
                    parts = line.strip().split()
                    if parts:
                        try:
                            class_id_from_label = int(parts[0])
                            
                            # Check if this class ID is one of our desired target IDs
                            if class_id_from_label in target_class_ids_set:
                                # Add the image filename to the list for this class ID,
                                # ensuring no duplicates if an image has multiple objects
                                # of the same target class ID.
                                if image_filename not in found_images_by_class_id[class_id_from_label]:
                                    found_images_by_class_id[class_id_from_label].append(image_filename)
                        except ValueError:
                            # This handles lines that don't start with a valid integer
                            # print(f"Debug: Invalid class ID format in '{label_file_path}': '{line.strip()}'")
                            continue
        except Exception as e:
            print(f"Error reading label file '{label_file_path}': {e}")
            continue

    # --- Print Final Results ---
    print("\n--- Results ---")
    any_images_found_overall = False
    # Sort by class ID for consistent output
    for class_id in sorted(found_images_by_class_id.keys()):
        image_list = found_images_by_class_id[class_id]
        if image_list:
            any_images_found_overall = True
            print(f"Images containing class ID '{class_id}':")
            for img_name in sorted(image_list): # Sort filenames for consistent output
                print(f"  - {img_name}")
        else:
            print(f"Class ID '{class_id}': No images found.")
    
    if not any_images_found_overall:
        print("No images found for any of the specified target class IDs.")


# --- SECTION 3: DEMONSTRATION / EXECUTION (FOR TESTING THE CODE DIRECTLY) ---
# This section allows you to run the script immediately.
# If you haven't changed DATASET_FOLDER_PATH, it creates a temporary mock dataset.
# In your real usage, you will have set DATASET_FOLDER_PATH above to your actual data.

if __name__ == "__main__":
    # Store original path for cleanup check
    original_dataset_path = DATASET_FOLDER_PATH

    # Check if the user has provided a real path or if we need to set up a mock one.
    if not os.path.exists(DATASET_FOLDER_PATH) or DATASET_FOLDER_PATH == "path/to/your/actual/dataset/folder":
        print("\n--- Setting up a mock dataset folder for demonstration ---")
        temp_dir_for_demo = tempfile.mkdtemp(prefix="mock_dataset_")
        DATASET_FOLDER_PATH = temp_dir_for_demo # Use the temporary path for the demo
        print(f"Mock dataset created at: {DATASET_FOLDER_PATH}")
        print("REMINDER: For real use, change 'DATASET_FOLDER_PATH' at the top of the script.")

        # Create mock image and label files within the temporary folder
        # image_A.jpg contains class ID 12
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_A.jpg'), 'w') as f: pass
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_A.txt'), 'w', encoding='utf-8') as f:
            f.write('12 0.5 0.5 0.1 0.1\n') # Class ID 12

        # image_B.png contains class ID 4 and 12
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_B.png'), 'w') as f: pass
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_B.txt'), 'w', encoding='utf-8') as f:
            f.write('4 0.5 0.5 0.1 0.1\n')  # Class ID 4
            f.write('12 0.6 0.6 0.2 0.2\n') # Class ID 12

        # image_C.jpeg contains class ID 99 (not in TARGET_CLASS_IDS_FOR_SEARCH)
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_C.jpeg'), 'w') as f: pass
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_C.txt'), 'w', encoding='utf-8') as f:
            f.write('99 0.5 0.5 0.1 0.1\n') # Class ID 99

        # image_D.jpg contains multiple instances of class ID 4
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_D.jpg'), 'w') as f: pass
        with open(os.path.join(DATASET_FOLDER_PATH, 'image_D.txt'), 'w', encoding='utf-8') as f:
            f.write('4 0.1 0.1 0.1 0.1\n') # Class ID 4
            f.write('4 0.2 0.2 0.1 0.1\n') # Another instance of Class ID 4

    # --- Execute the main function with the defined variables ---
    find_and_list_images_by_class_id(DATASET_FOLDER_PATH, TARGET_CLASS_IDS_FOR_SEARCH)

    # --- Clean up the mock dataset folder (ONLY FOR DEMONSTRATION) ---
    # This section removes the temporary folder created for the demonstration.
    # If you changed DATASET_FOLDER_PATH to your real data, this part will be skipped.
    if 'temp_dir_for_demo' in locals() and os.path.exists(temp_dir_for_demo):
        print(f"\n--- Cleaning up mock dataset folder: {temp_dir_for_demo} ---")
        shutil.rmtree(temp_dir_for_demo)
        print("Mock dataset folder removed.")



--- Searching for images containing class IDs: [102, 103, 105] ---
Scanning folder: 'Bangla_License_Plate'
Found 2669 label files to process.

--- Results ---
Images containing class ID '102':
  - 1844.jpg
Images containing class ID '103':
  - 2570.jpg
Images containing class ID '105':
  - 2584.jpg
