# notes: dataset sanitizer utility (move images with blackened regions) ~ detection dataset

In [None]:
import os
import shutil
import numpy as np
import cv2
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

def calculate_black_percentage(image_path, black_threshold=10):
    """
    Calculate the percentage of black pixels in the image and for each side.
    
    Args:
        image_path (str): Path to the image file
        black_threshold (int): Pixel value threshold for considering a pixel as black
    
    Returns:
        dict: Dictionary containing overall and side-specific black percentages
    """
    # Read the image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Failed to read image: {image_path}")
        return None
    
    # Create a binary mask of black pixels
    black_mask = img < black_threshold
    
    # Calculate percentage of black pixels in the entire image
    height, width = img.shape
    total_pixels = height * width
    black_pixel_count = np.sum(black_mask)
    overall_percentage = (black_pixel_count / total_pixels) * 100
    
    # Define regions to check (top, bottom, left, right 20% of the image)
    regions = {
        'top': black_mask[:int(height * 0.2), :],
        'bottom': black_mask[int(height * 0.8):, :],
        'left': black_mask[:, :int(width * 0.2)],
        'right': black_mask[:, int(width * 0.8):]
    }
    
    # Calculate percentage for each region
    region_percentages = {}
    for region_name, region in regions.items():
        region_size = region.size
        region_black = np.sum(region)
        region_percentages[region_name] = (region_black / region_size) * 100
    
    return {
        'overall': overall_percentage,
        'regions': region_percentages
    }

def is_black_region_significant(image_path, check_region=True, region_threshold=20, 
                              check_overall=False, overall_threshold=20, 
                              black_threshold=10):
    """
    Check if the image has significant black region based on chosen criteria.
    
    Args:
        image_path (str): Path to the image file
        check_region (bool): Whether to check region-specific thresholds
        region_threshold (float): Percentage threshold for black pixels in regions (0-100)
        check_overall (bool): Whether to check the overall threshold
        overall_threshold (float): Percentage threshold for black pixels in the entire image (0-100)
        black_threshold (int): Pixel value threshold for considering a pixel as black
    
    Returns:
        tuple: (is_significant, percentages, triggered_by) - Boolean indicating if black region 
               is significant, dict of percentages, and what triggered the move
    """
    percentages = calculate_black_percentage(image_path, black_threshold)
    if percentages is None:
        return False, None, None
    
    triggered_by = []
    
    # Check overall percentage if requested
    if check_overall and percentages['overall'] >= overall_threshold:
        triggered_by.append('overall')
    
    # Check regions if requested
    if check_region:
        for region_name, region_percentage in percentages['regions'].items():
            if region_percentage >= region_threshold:
                triggered_by.append(region_name)
    
    # Image is significant if any enabled condition is triggered
    is_significant = len(triggered_by) > 0
    
    return is_significant, percentages, triggered_by

def visualize_sample_images(source_dir, num_samples=5, check_region=True, region_threshold=20, 
                          check_overall=False, overall_threshold=20, black_threshold=10):
    """
    Visualize a few sample images with their black pixel masks for verification
    """
    # Get all image files including those in subfolders
    image_files = []
    for root, _, files in os.walk(source_dir):
        for f in files:
            if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_files.append(os.path.join(root, f))
    
    if not image_files:
        print("No image files found.")
        return
    
    # Sample a few images
    sample_files = np.random.choice(image_files, min(num_samples, len(image_files)), replace=False)
    
    fig, axes = plt.subplots(len(sample_files), 2, figsize=(12, 4 * len(sample_files)))
    if len(sample_files) == 1:
        axes = np.array([axes])
    
    for i, img_path in enumerate(sample_files):
        # Get relative path for display
        rel_path = os.path.relpath(img_path, source_dir)
        
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        # Create a binary mask of black pixels
        black_mask = img < black_threshold
        
        # Check if image meets criteria and get percentages
        is_significant, percentages, triggered_by = is_black_region_significant(
            img_path, check_region, region_threshold, check_overall, overall_threshold, black_threshold)
        
        # Plot original image
        axes[i, 0].imshow(img, cmap='gray')
        title = f"{rel_path}\nOverall black: {percentages['overall']:.1f}%"
        if check_overall:
            title += f" (threshold: {overall_threshold}%)"
        axes[i, 0].set_title(title)
        
        # Plot black mask
        axes[i, 1].imshow(black_mask, cmap='gray')
        
        # Create detailed title with region percentages and triggered information
        region_info = ""
        if check_region:
            region_values = [f"{k}: {v:.1f}%" for k, v in percentages['regions'].items()]
            region_info = f"Regions (threshold: {region_threshold}%): " + ", ".join(region_values)
        
        trigger_info = f"Triggered by: {', '.join(triggered_by) if triggered_by else 'None'}"
        criteria_info = f"Criteria: {'Region' if check_region else ''}{'Overall' if check_overall else ''}"
        
        title = f"Move: {'Yes' if is_significant else 'No'}\n{region_info}\n{trigger_info}\n{criteria_info}"
        axes[i, 1].set_title(title)
    
    plt.tight_layout()
    plt.show()

def move_images_with_annotations(source_dir, target_dir, check_region=True, region_threshold=20, 
                                check_overall=False, overall_threshold=20, 
                                annotation_ext='.txt', image_exts=('.jpg', '.jpeg', '.png', '.bmp'),
                                black_threshold=10, verbose=True):
    """
    Move images with significant black regions and their corresponding annotation files
    to the target directory, preserving subfolder structure.
    
    Args:
        source_dir (str): Source directory containing images and annotations
        target_dir (str): Target directory to move files to
        check_region (bool): Whether to check region-specific thresholds
        region_threshold (float): Percentage threshold for black pixels in regions (0-100)
        check_overall (bool): Whether to check the overall threshold
        overall_threshold (float): Percentage threshold for black pixels in the entire image (0-100)
        annotation_ext (str): Extension of annotation files
        image_exts (tuple): Extensions of image files to check
        black_threshold (int): Pixel value threshold for considering a pixel as black
    """
    # Create target directory if it doesn't exist
    os.makedirs(target_dir, exist_ok=True)
    
    # Counter for moved files
    moved_count = 0
    processed_count = 0
    
    # Walk through all subdirectories
    for root, _, files in tqdm(list(os.walk(source_dir)), desc="Processing directories"):
        # Get relative path from source directory
        rel_path = os.path.relpath(root, source_dir)
        
        # Get all image files in current directory
        image_files = [f for f in files if any(f.lower().endswith(ext) for ext in image_exts)]
        
        # Process each image
        for img_file in tqdm(image_files, desc=f"Processing images in {rel_path}", leave=False):
            processed_count += 1
            img_path = os.path.join(root, img_file)
            
            # Check if image has significant black region and get percentages
            is_significant, percentages, triggered_by = is_black_region_significant(
                img_path, check_region, region_threshold, check_overall, overall_threshold, black_threshold)
            
            if is_significant:
                # Create corresponding subfolder in target directory
                if rel_path != '.':
                    target_subfolder = os.path.join(target_dir, rel_path)
                    os.makedirs(target_subfolder, exist_ok=True)
                else:
                    target_subfolder = target_dir
                
                # Print detailed percentages for the moved image (only if verbose)
                if verbose:
                    print(f"\nMoving {os.path.join(rel_path, img_file)}:")
                    print(f"  Overall black percentage: {percentages['overall']:.2f}%", end="")
                    if check_overall:
                        print(f" (threshold: {overall_threshold}%)")
                    else:
                        print(" (not used for detection)")
                        
                    if check_region:
                        print(f"  Region percentages (threshold: {region_threshold}%):")
                        for region, pct in percentages['regions'].items():
                            print(f"    {region}: {pct:.2f}%")
                    else:
                        print("  Region percentages not used for detection")
                        
                    # Print what triggered the move
                    print(f"  Triggered by: {', '.join(triggered_by)}")
                
                # Move image file
                shutil.move(img_path, os.path.join(target_subfolder, img_file))
                
                # Get corresponding annotation file name
                base_name = os.path.splitext(img_file)[0]
                ann_file = base_name + annotation_ext
                ann_path = os.path.join(root, ann_file)
                
                # Move annotation file if it exists
                if os.path.exists(ann_path):
                    shutil.move(ann_path, os.path.join(target_subfolder, ann_file))
                    if verbose:
                        print(f"  Moved annotation file: {ann_file}")
                elif verbose:
                    print(f"  Warning: Annotation file {ann_file} not found")
                
                moved_count += 1
    
    return processed_count, moved_count

# Example usage with visualization
# Note: Run these cells one by one in a Jupyter notebook

# Replace with your source directory ~ default : '/path/to/source/folder'

# example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels' ~ detection
# example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_125' ~ detection
# example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_125' ~ detection
# example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_150' ~ detection
# example 05 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_150' ~ detection
# example 08 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_175' ~ detection
# example 09 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_175' ~ detection
# notes : automated labels + 25% bounding box area expansion + 480 by 480 pixels ;
# address : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\480_T_slicing_output_automated_labels_pc_125'

source_dir = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\480_T_slicing_output_automated_labels_pc_125'  
# Replace with your target directory ~ default : '/path/to/target/folder'

# example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_black_boxes' ~ detection
# example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_125_black_boxes' ~ detection
# example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_125_black_boxes' ~ detection
# example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_150_black_boxes' ~ detection
# example 05 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_150_black_boxes' ~ detection
# example 08 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_175_black_boxes' ~ detection
# example 09 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_175_black_boxes' ~ detection
# notes : automated labels + 25% bounding box area expansion + 480 by 480 pixels ;
# address : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\480_T_slicing_output_automated_labels_pc_125_black_boxes'

target_dir = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\480_T_slicing_output_automated_labels_pc_125_black_boxes'  

# Threshold settings
check_region = False      # Whether to use region thresholds
region_threshold = 50    # Percentage threshold for black pixels in regions (sides)
check_overall = True     # Whether to use overall threshold
overall_threshold = 20   # Percentage threshold for black pixels in the entire image
black_threshold = 1     # Pixel value threshold for considering a pixel as black (0-255) ~ choosing "0" causes no image to move, so the actually lowest is "1"

# Visualize a few sample images to verify the detection
visualize_sample_images(
    source_dir, 
    num_samples=1, 
    check_region=check_region,
    region_threshold=region_threshold,
    check_overall=check_overall,
    overall_threshold=overall_threshold, 
    black_threshold=black_threshold
)

# Move images and annotations
processed, moved = move_images_with_annotations(
    source_dir=source_dir,
    target_dir=target_dir,
    check_region=check_region,
    region_threshold=region_threshold,
    check_overall=check_overall,
    overall_threshold=overall_threshold,
    annotation_ext='.txt',  # Change if your annotation files have different extension
    black_threshold=black_threshold,
    verbose=False  # Set to False for less verbose output when processing large datasets ~ default : True ;
)

print(f"\nSummary:")
print(f"Processed {processed} images")
print(f"Moved {moved} images with significant black regions")
print(f"Criteria used:")
if check_region:
    print(f"- Region threshold: {region_threshold}%")
if check_overall:
    print(f"- Overall threshold: {overall_threshold}%")

# notes: dataset sanitizer utility (move images with blackened regions) ~ classification dataset

In [None]:
import os
import shutil
import numpy as np
import cv2
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

def calculate_black_percentage(image_path, black_threshold=10):
    """
    Calculate the percentage of black pixels in the image and for each side.
    
    Args:
        image_path (str): Path to the image file
        black_threshold (int): Pixel value threshold for considering a pixel as black
    
    Returns:
        dict: Dictionary containing overall and side-specific black percentages
    """
    # Read the image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Failed to read image: {image_path}")
        return None
    
    # Create a binary mask of black pixels
    black_mask = img < black_threshold
    
    # Calculate percentage of black pixels in the entire image
    height, width = img.shape
    total_pixels = height * width
    black_pixel_count = np.sum(black_mask)
    overall_percentage = (black_pixel_count / total_pixels) * 100
    
    # Define regions to check (top, bottom, left, right 20% of the image)
    regions = {
        'top': black_mask[:int(height * 0.2), :],
        'bottom': black_mask[int(height * 0.8):, :],
        'left': black_mask[:, :int(width * 0.2)],
        'right': black_mask[:, int(width * 0.8):]
    }
    
    # Calculate percentage for each region
    region_percentages = {}
    for region_name, region in regions.items():
        region_size = region.size
        region_black = np.sum(region)
        region_percentages[region_name] = (region_black / region_size) * 100
    
    return {
        'overall': overall_percentage,
        'regions': region_percentages
    }

def is_black_region_significant(image_path, check_region=True, region_threshold=20, 
                              check_overall=False, overall_threshold=20, 
                              black_threshold=10):
    """
    Check if the image has significant black region based on chosen criteria.
    
    Args:
        image_path (str): Path to the image file
        check_region (bool): Whether to check region-specific thresholds
        region_threshold (float): Percentage threshold for black pixels in regions (0-100)
        check_overall (bool): Whether to check the overall threshold
        overall_threshold (float): Percentage threshold for black pixels in the entire image (0-100)
        black_threshold (int): Pixel value threshold for considering a pixel as black
    
    Returns:
        tuple: (is_significant, percentages, triggered_by) - Boolean indicating if black region 
               is significant, dict of percentages, and what triggered the move
    """
    percentages = calculate_black_percentage(image_path, black_threshold)
    if percentages is None:
        return False, None, None
    
    triggered_by = []
    
    # Check overall percentage if requested
    if check_overall and percentages['overall'] >= overall_threshold:
        triggered_by.append('overall')
    
    # Check regions if requested
    if check_region:
        for region_name, region_percentage in percentages['regions'].items():
            if region_percentage >= region_threshold:
                triggered_by.append(region_name)
    
    # Image is significant if any enabled condition is triggered
    is_significant = len(triggered_by) > 0
    
    return is_significant, percentages, triggered_by

def visualize_sample_images(source_dir, num_samples=5, check_region=True, region_threshold=20, 
                          check_overall=False, overall_threshold=20, black_threshold=10):
    """
    Visualize a few sample images with their black pixel masks for verification
    """
    # Get all image files including those in subfolders
    image_files = []
    for root, _, files in os.walk(source_dir):
        for f in files:
            if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_files.append(os.path.join(root, f))
    
    if not image_files:
        print("No image files found.")
        return
    
    # Sample a few images
    sample_files = np.random.choice(image_files, min(num_samples, len(image_files)), replace=False)
    
    fig, axes = plt.subplots(len(sample_files), 2, figsize=(12, 4 * len(sample_files)))
    if len(sample_files) == 1:
        axes = np.array([axes])
    
    for i, img_path in enumerate(sample_files):
        # Get relative path for display
        rel_path = os.path.relpath(img_path, source_dir)
        
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        
        # Create a binary mask of black pixels
        black_mask = img < black_threshold
        
        # Check if image meets criteria and get percentages
        is_significant, percentages, triggered_by = is_black_region_significant(
            img_path, check_region, region_threshold, check_overall, overall_threshold, black_threshold)
        
        # Plot original image
        axes[i, 0].imshow(img, cmap='gray')
        title = f"{rel_path}\nOverall black: {percentages['overall']:.1f}%"
        if check_overall:
            title += f" (threshold: {overall_threshold}%)"
        axes[i, 0].set_title(title)
        
        # Plot black mask
        axes[i, 1].imshow(black_mask, cmap='gray')
        
        # Create detailed title with region percentages and triggered information
        region_info = ""
        if check_region:
            region_values = [f"{k}: {v:.1f}%" for k, v in percentages['regions'].items()]
            region_info = f"Regions (threshold: {region_threshold}%): " + ", ".join(region_values)
        
        trigger_info = f"Triggered by: {', '.join(triggered_by) if triggered_by else 'None'}"
        criteria_info = f"Criteria: {'Region' if check_region else ''}{'Overall' if check_overall else ''}"
        
        title = f"Move: {'Yes' if is_significant else 'No'}\n{region_info}\n{trigger_info}\n{criteria_info}"
        axes[i, 1].set_title(title)
    
    plt.tight_layout()
    plt.show()

def move_images_with_annotations(source_dir, target_dir, check_region=True, region_threshold=20, 
                                check_overall=False, overall_threshold=20, 
                                annotation_ext='.txt', image_exts=('.jpg', '.jpeg', '.png', '.bmp'),
                                black_threshold=10, verbose=True):
    """
    Move images with significant black regions and their corresponding annotation files
    to the target directory, preserving subfolder structure.
    
    Args:
        source_dir (str): Source directory containing images and annotations
        target_dir (str): Target directory to move files to
        check_region (bool): Whether to check region-specific thresholds
        region_threshold (float): Percentage threshold for black pixels in regions (0-100)
        check_overall (bool): Whether to check the overall threshold
        overall_threshold (float): Percentage threshold for black pixels in the entire image (0-100)
        annotation_ext (str): Extension of annotation files
        image_exts (tuple): Extensions of image files to check
        black_threshold (int): Pixel value threshold for considering a pixel as black
    """
    # Create target directory if it doesn't exist
    os.makedirs(target_dir, exist_ok=True)
    
    # Counter for moved files
    moved_count = 0
    processed_count = 0
    
    # Walk through all subdirectories
    for root, _, files in tqdm(list(os.walk(source_dir)), desc="Processing directories"):
        # Get relative path from source directory
        rel_path = os.path.relpath(root, source_dir)
        
        # Get all image files in current directory
        image_files = [f for f in files if any(f.lower().endswith(ext) for ext in image_exts)]
        
        # Process each image
        for img_file in tqdm(image_files, desc=f"Processing images in {rel_path}", leave=False):
            processed_count += 1
            img_path = os.path.join(root, img_file)
            
            # Check if image has significant black region and get percentages
            is_significant, percentages, triggered_by = is_black_region_significant(
                img_path, check_region, region_threshold, check_overall, overall_threshold, black_threshold)
            
            if is_significant:
                # Create corresponding subfolder in target directory
                if rel_path != '.':
                    target_subfolder = os.path.join(target_dir, rel_path)
                    os.makedirs(target_subfolder, exist_ok=True)
                else:
                    target_subfolder = target_dir
                
                # Print detailed percentages for the moved image (only if verbose)
                if verbose:
                    print(f"\nMoving {os.path.join(rel_path, img_file)}:")
                    print(f"  Overall black percentage: {percentages['overall']:.2f}%", end="")
                    if check_overall:
                        print(f" (threshold: {overall_threshold}%)")
                    else:
                        print(" (not used for detection)")
                        
                    if check_region:
                        print(f"  Region percentages (threshold: {region_threshold}%):")
                        for region, pct in percentages['regions'].items():
                            print(f"    {region}: {pct:.2f}%")
                    else:
                        print("  Region percentages not used for detection")
                        
                    # Print what triggered the move
                    print(f"  Triggered by: {', '.join(triggered_by)}")
                
                # Move image file
                shutil.move(img_path, os.path.join(target_subfolder, img_file))
                
                # Get corresponding annotation file name
                base_name = os.path.splitext(img_file)[0]
                ann_file = base_name + annotation_ext
                ann_path = os.path.join(root, ann_file)
                
                # Move annotation file if it exists
                if os.path.exists(ann_path):
                    shutil.move(ann_path, os.path.join(target_subfolder, ann_file))
                    if verbose:
                        print(f"  Moved annotation file: {ann_file}")
                elif verbose:
                    print(f"  Warning: Annotation file {ann_file} not found")
                
                moved_count += 1
    
    return processed_count, moved_count

# Example usage with visualization
# Note: Run these cells one by one in a Jupyter notebook

# Replace with your source directory ~ default : '/path/to/source/folder'

# example 10 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_manual-labels_T_640_100-pc' ~ classification
# example 11 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_100-pc' ~ classification
# example 12 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_manual-labels_T_640_125-pc' ~ classification
# example 13 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_125-pc' ~ classification
# example 14 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_manual-labels_T_640_150-pc' ~ classification
# example 15 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_150-pc' ~ classification

source_dir = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_150-pc'  

# Replace with your target directory ~ default : '/path/to/target/folder'

# example 10 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_manual-labels_T_640_100-pc_black-boxes' ~ classification
# example 11 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_100-pc_black-boxes' ~ classification
# example 12 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_manual-labels_T_640_125-pc_black-boxes' ~ classification
# example 13 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_125-pc_black-boxes' ~ classification
# example 14 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_manual-labels_T_640_150-pc_black-boxes' ~ classification
# example 15 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_150-pc_black-boxes' ~ classification

target_dir = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th-setup_task-04_new-work-03\\vet_images_sliced\\TrainingStepSet_automated-labels_T_640_150-pc_black-boxes'  

# Threshold settings
check_region = False      # Whether to use region thresholds
region_threshold = 50    # Percentage threshold for black pixels in regions (sides)
check_overall = True     # Whether to use overall threshold
overall_threshold = 20   # Percentage threshold for black pixels in the entire image
black_threshold = 1     # Pixel value threshold for considering a pixel as black (0-255) ~ choosing "0" causes no image to move, so the actually lowest is "1"

# Visualize a few sample images to verify the detection
visualize_sample_images(
    source_dir, 
    num_samples=1, 
    check_region=check_region,
    region_threshold=region_threshold,
    check_overall=check_overall,
    overall_threshold=overall_threshold, 
    black_threshold=black_threshold
)

# Move images and annotations
processed, moved = move_images_with_annotations(
    source_dir=source_dir,
    target_dir=target_dir,
    check_region=check_region,
    region_threshold=region_threshold,
    check_overall=check_overall,
    overall_threshold=overall_threshold,
    annotation_ext='.txt',  # Change if your annotation files have different extension
    black_threshold=black_threshold,
    verbose=False  # Set to False for less verbose output when processing large datasets ~ default : true ;
)

print(f"\nSummary:")
print(f"Processed {processed} images")
print(f"Moved {moved} images with significant black regions")
print(f"Criteria used:")
if check_region:
    print(f"- Region threshold: {region_threshold}%")
if check_overall:
    print(f"- Overall threshold: {overall_threshold}%")

# Notes: The end.