In [None]:
### ... ~ version 01 A

In [None]:
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob
from collections import defaultdict

def analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.2, visualize=True):
    """
    Analyzes image dimensions and separates images based on their aspect ratio.
    
    Parameters:
    - input_folder: Path to the folder containing input images
    - output_folder: Path to the folder where images will be saved
    - aspect_ratio_threshold: Maximum allowed deviation from square (1.0)
                             e.g., 0.2 means aspect ratios between 0.8 and 1.2 are considered "square"
    - visualize: Whether to show histograms of aspect ratios
    
    Returns:
    - Dictionary with statistics about the dataset
    """
    # Create output directories
    square_folder = os.path.join(output_folder, "square")
    lopsided_folder = os.path.join(output_folder, "lopsided")
    
    os.makedirs(square_folder, exist_ok=True)
    os.makedirs(lopsided_folder, exist_ok=True)
    
    # Find all image files in the input folder and its subfolders
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tif', '*.tiff']
    image_paths = []
    
    for ext in image_extensions:
        image_paths.extend(glob.glob(os.path.join(input_folder, '**', ext), recursive=True))
    
    # Initialize statistics and data collection
    total_images = len(image_paths)
    square_count = 0
    lopsided_count = 0
    
    # Collect aspect ratios for analysis
    aspect_ratios = []
    widths = []
    heights = []
    
    # Additional statistics for detailed analysis
    max_width = 0
    min_width = float('inf')
    max_height = 0
    min_height = float('inf')
    max_aspect_ratio = 0
    min_aspect_ratio = float('inf')
    
    # Track subfolder statistics
    folder_stats = defaultdict(lambda: {'total': 0, 'square': 0, 'lopsided': 0})
    
    # Count child folders
    child_folders = set()
    for path in image_paths:
        rel_path = os.path.relpath(os.path.dirname(path), input_folder)
        if rel_path != '.':
            child_folders.add(rel_path)
    
    print(f"Found {total_images} images in {len(child_folders)} child folders.")
    
    # Store example images for visualization
    first_square = None
    first_lopsided = None
    
    # Process each image with progress bar
    for image_path in tqdm(image_paths, desc="Processing images", unit="image"):
        # Read the image dimensions without loading the entire image
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}. Skipping.")
            continue
        
        height, width = img.shape[:2]
        
        # Calculate aspect ratio (width / height)
        aspect_ratio = width / height if height > 0 else 0
        
        # Store for statistics
        aspect_ratios.append(aspect_ratio)
        widths.append(width)
        heights.append(height)
        
        # Update min/max statistics
        max_width = max(max_width, width)
        min_width = min(min_width, width)
        max_height = max(max_height, height)
        min_height = min(min_height, height)
        max_aspect_ratio = max(max_aspect_ratio, aspect_ratio)
        min_aspect_ratio = min(min_aspect_ratio, aspect_ratio)
        
        # Get the relative folder path
        rel_folder = os.path.relpath(os.path.dirname(image_path), input_folder)
        if rel_folder == '.':
            rel_folder = 'root'
            
        # Update folder statistics
        folder_stats[rel_folder]['total'] += 1
        
        # Determine if image is square or lopsided based on aspect ratio
        # For a square image, aspect ratio should be close to 1.0
        deviation_from_square = abs(aspect_ratio - 1.0)
        is_square = deviation_from_square <= aspect_ratio_threshold
        
        # Determine destination path
        rel_path = os.path.relpath(image_path, input_folder)
        dest_folder = square_folder if is_square else lopsided_folder
        dest_path = os.path.join(dest_folder, rel_path)
        
        # Create subdirectory if needed
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        
        # Copy the image
        shutil.copy2(image_path, dest_path)
        
        # Update counters
        if is_square:
            square_count += 1
            folder_stats[rel_folder]['square'] += 1
            if first_square is None and visualize:
                first_square = (img, image_path, width, height, aspect_ratio)
        else:
            lopsided_count += 1
            folder_stats[rel_folder]['lopsided'] += 1
            if first_lopsided is None and visualize:
                first_lopsided = (img, image_path, width, height, aspect_ratio)
    
    # Calculate additional statistics
    avg_width = np.mean(widths) if widths else 0
    avg_height = np.mean(heights) if heights else 0
    avg_aspect_ratio = np.mean(aspect_ratios) if aspect_ratios else 0
    median_aspect_ratio = np.median(aspect_ratios) if aspect_ratios else 0
    
    # Print detailed statistics
    print(f"\nImage Dimension Analysis:")
    print(f"Total images processed: {total_images}")
    print(f"Square images: {square_count} ({square_count/total_images*100:.2f}%)")
    print(f"Lopsided images: {lopsided_count} ({lopsided_count/total_images*100:.2f}%)")
    
    print(f"\nDimension Statistics:")
    print(f"Width: min={min_width}, max={max_width}, avg={avg_width:.2f}")
    print(f"Height: min={min_height}, max={max_height}, avg={avg_height:.2f}")
    print(f"Aspect Ratio (width/height): min={min_aspect_ratio:.4f}, max={max_aspect_ratio:.4f}")
    print(f"Average aspect ratio: {avg_aspect_ratio:.4f}")
    print(f"Median aspect ratio: {median_aspect_ratio:.4f}")
    print(f"Maximum deviation from square: {max(abs(ar - 1.0) for ar in aspect_ratios):.4f}")
    
    # Print folder statistics
    print(f"\nFolder Distribution:")
    for folder, stats in sorted(folder_stats.items()):
        square_count = stats['square']
        lopsided_count = stats['lopsided']
        total = stats['total']
        
        print(f"  {folder}: Total={total}, Square={square_count} ({square_count/total*100:.2f}%), Lopsided={lopsided_count} ({lopsided_count/total*100:.2f}%)")
    
    # Create visualizations
    if visualize and aspect_ratios:
        plt.figure(figsize=(12, 10))
        
        # 1. Histogram of aspect ratios
        plt.subplot(2, 2, 1)
        plt.hist(aspect_ratios, bins=50, alpha=0.75)
        plt.axvline(x=1.0, color='r', linestyle='--', label='Square (1.0)')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--', 
                   label=f'Threshold ({1.0+aspect_ratio_threshold:.2f})')
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--')
        plt.title('Distribution of Aspect Ratios')
        plt.xlabel('Aspect Ratio (width/height)')
        plt.ylabel('Count')
        plt.legend()
        
        # 2. Scatter plot of width vs height
        plt.subplot(2, 2, 2)
        plt.scatter(widths, heights, alpha=0.5, s=3)
        plt.plot([0, max(widths)], [0, max(widths)], 'r--', label='Square (w=h)')
        plt.title('Width vs Height')
        plt.xlabel('Width (pixels)')
        plt.ylabel('Height (pixels)')
        plt.legend()
        
        # 3. Example images if available
        if first_square is not None and first_lopsided is not None:
            plt.subplot(2, 2, 3)
            plt.imshow(cv2.cvtColor(first_square[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Square Example: {os.path.basename(first_square[1])}\n"
                      f"{first_square[2]}x{first_square[3]} (AR: {first_square[4]:.2f})")
            plt.axis('off')
            
            plt.subplot(2, 2, 4)
            plt.imshow(cv2.cvtColor(first_lopsided[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Lopsided Example: {os.path.basename(first_lopsided[1])}\n"
                      f"{first_lopsided[2]}x{first_lopsided[3]} (AR: {first_lopsided[4]:.2f})")
            plt.axis('off')
        
        plt.tight_layout()
        plt.show()
    
    return {
        'total_images': total_images,
        'square_count': square_count,
        'lopsided_count': lopsided_count,
        'aspect_ratio_stats': {
            'min': min_aspect_ratio,
            'max': max_aspect_ratio,
            'avg': avg_aspect_ratio,
            'median': median_aspect_ratio
        },
        'dimension_stats': {
            'width': {'min': min_width, 'max': max_width, 'avg': avg_width},
            'height': {'min': min_height, 'max': max_height, 'avg': avg_height}
        },
        'folder_stats': dict(folder_stats)
    }

# Define input and output folders - replace these with your actual paths
input_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\vet_images_sliced\\TrainingStepSet_automated-labels_T_full-size_150-pc"
output_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\vet_images_sliced\\TrainingStepSet_automated-labels_T_full-size_150-pc_categorized"

# Example usage:
# Run with default threshold (images with aspect ratio between 0.8 and 1.2 are considered square)
# results = analyze_and_separate_images(input_folder, output_folder)

# For more lenient threshold (images with aspect ratio between 0.7 and 1.3 are considered square)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.3)

# For stricter threshold (images with aspect ratio between 0.9 and 1.1 are considered square)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.1)

# For practical threshold (images with aspect ratio between 0.9 and 1.1 are considered square)
results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=1)

In [None]:
### ... ~ version 02 A

In [None]:
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob
from collections import defaultdict

def analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.2, visualize=True):
    """
    Analyzes image dimensions and separates images based on their aspect ratio.
    
    Parameters:
    - input_folder: Path to the folder containing input images
    - output_folder: Path to the folder where images will be moved
    - aspect_ratio_threshold: Maximum allowed deviation from square (1.0)
                             e.g., 0.2 means aspect ratios between 0.8 and 1.2 are considered "square"
    - visualize: Whether to show visualizations of aspect ratios and examples
    
    Returns:
    - Dictionary with statistics about the dataset
    """
    # Create output directories
    square_folder = os.path.join(output_folder, "square")
    lopsided_folder = os.path.join(output_folder, "lopsided")
    
    os.makedirs(square_folder, exist_ok=True)
    os.makedirs(lopsided_folder, exist_ok=True)
    
    # Find all image files in the input folder and its subfolders
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tif', '*.tiff']
    image_paths = []
    
    for ext in image_extensions:
        image_paths.extend(glob.glob(os.path.join(input_folder, '**', ext), recursive=True))
    
    # Initialize statistics and data collection
    total_images = len(image_paths)
    square_count = 0
    lopsided_count = 0
    
    # Collect aspect ratios for analysis
    aspect_ratios = []
    widths = []
    heights = []
    
    # Additional statistics for detailed analysis
    max_width = 0
    min_width = float('inf')
    max_height = 0
    min_height = float('inf')
    max_aspect_ratio = 0
    min_aspect_ratio = float('inf')
    
    # Track subfolder statistics including dimension stats
    folder_stats = defaultdict(lambda: {
        'total': 0, 
        'square': 0, 
        'lopsided': 0,
        'widths': [],
        'heights': [],
        'aspect_ratios': []
    })
    
    # Count child folders
    child_folders = set()
    for path in image_paths:
        rel_path = os.path.relpath(os.path.dirname(path), input_folder)
        if rel_path != '.':
            child_folders.add(rel_path)
    
    print(f"Found {total_images} images in {len(child_folders)} child folders.")
    
    # Store example images for visualization
    first_square = None
    first_lopsided = None
    
    # First pass: Collect all statistics without moving files
    print("Analyzing image dimensions...")
    for image_path in tqdm(image_paths, desc="Analyzing images", unit="image"):
        # Read the image dimensions
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}. Skipping.")
            continue
        
        height, width = img.shape[:2]
        
        # Calculate aspect ratio (width / height)
        aspect_ratio = width / height if height > 0 else 0
        
        # Store for overall statistics
        aspect_ratios.append(aspect_ratio)
        widths.append(width)
        heights.append(height)
        
        # Update min/max statistics
        max_width = max(max_width, width)
        min_width = min(min_width, width)
        max_height = max(max_height, height)
        min_height = min(min_height, height)
        max_aspect_ratio = max(max_aspect_ratio, aspect_ratio)
        min_aspect_ratio = min(min_aspect_ratio, aspect_ratio)
        
        # Get the relative folder path
        rel_folder = os.path.relpath(os.path.dirname(image_path), input_folder)
        if rel_folder == '.':
            rel_folder = 'root'
            
        # Update folder statistics
        folder_stats[rel_folder]['total'] += 1
        folder_stats[rel_folder]['widths'].append(width)
        folder_stats[rel_folder]['heights'].append(height)
        folder_stats[rel_folder]['aspect_ratios'].append(aspect_ratio)
        
        # Determine if image is square or lopsided based on aspect ratio
        # For a square image, aspect ratio should be close to 1.0
        deviation_from_square = abs(aspect_ratio - 1.0)
        is_square = deviation_from_square <= aspect_ratio_threshold
        
        # Update counters for classification
        if is_square:
            square_count += 1
            folder_stats[rel_folder]['square'] += 1
            if first_square is None and visualize:
                first_square = (img, image_path, width, height, aspect_ratio)
        else:
            lopsided_count += 1
            folder_stats[rel_folder]['lopsided'] += 1
            if first_lopsided is None and visualize:
                first_lopsided = (img, image_path, width, height, aspect_ratio)
    
    # Calculate additional overall statistics
    avg_width = np.mean(widths) if widths else 0
    avg_height = np.mean(heights) if heights else 0
    avg_aspect_ratio = np.mean(aspect_ratios) if aspect_ratios else 0
    median_aspect_ratio = np.median(aspect_ratios) if aspect_ratios else 0
    
    # Print detailed overall statistics
    print(f"\n===== OVERALL IMAGE DIMENSION ANALYSIS =====")
    print(f"Total images processed: {total_images}")
    print(f"Square images: {square_count} ({square_count/total_images*100:.2f}%)")
    print(f"Lopsided images: {lopsided_count} ({lopsided_count/total_images*100:.2f}%)")
    
    print(f"\nDimension Statistics:")
    print(f"Width: min={min_width}, max={max_width}, avg={avg_width:.2f}")
    print(f"Height: min={min_height}, max={max_height}, avg={avg_height:.2f}")
    print(f"Aspect Ratio (width/height): min={min_aspect_ratio:.4f}, max={max_aspect_ratio:.4f}")
    print(f"Average aspect ratio: {avg_aspect_ratio:.4f}")
    print(f"Median aspect ratio: {median_aspect_ratio:.4f}")
    print(f"Maximum deviation from square: {max(abs(ar - 1.0) for ar in aspect_ratios):.4f}")
    
    # Print folder-specific statistics
    print(f"\n===== FOLDER-SPECIFIC STATISTICS =====")
    for folder, stats in sorted(folder_stats.items()):
        square_count = stats['square']
        lopsided_count = stats['lopsided']
        total = stats['total']
        
        # Calculate folder-specific dimension statistics
        folder_widths = stats['widths']
        folder_heights = stats['heights']
        folder_aspect_ratios = stats['aspect_ratios']
        
        if folder_widths:  # Only process if folder has images
            folder_min_width = min(folder_widths)
            folder_max_width = max(folder_widths)
            folder_avg_width = np.mean(folder_widths)
            
            folder_min_height = min(folder_heights)
            folder_max_height = max(folder_heights)
            folder_avg_height = np.mean(folder_heights)
            
            folder_min_ar = min(folder_aspect_ratios)
            folder_max_ar = max(folder_aspect_ratios)
            folder_avg_ar = np.mean(folder_aspect_ratios)
            folder_median_ar = np.median(folder_aspect_ratios)
            
            print(f"\nFolder: {folder}")
            print(f"  Image Distribution: Total={total}, Square={square_count} ({square_count/total*100:.2f}%), "
                  f"Lopsided={lopsided_count} ({lopsided_count/total*100:.2f}%)")
            print(f"  Dimension Statistics:")
            print(f"    Width: min={folder_min_width}, max={folder_max_width}, avg={folder_avg_width:.2f}")
            print(f"    Height: min={folder_min_height}, max={folder_max_height}, avg={folder_avg_height:.2f}")
            print(f"    Aspect Ratio: min={folder_min_ar:.4f}, max={folder_max_ar:.4f}, "
                  f"avg={folder_avg_ar:.4f}, median={folder_median_ar:.4f}")
    
    # Second pass: Move files to their destination
    print("\nMoving images to respective folders...")
    for image_path in tqdm(image_paths, desc="Moving images", unit="image"):
        try:
            img = cv2.imread(image_path)
            if img is None:
                continue  # Skip already processed in analysis phase
            
            height, width = img.shape[:2]
            aspect_ratio = width / height if height > 0 else 0
            
            # Determine classification
            deviation_from_square = abs(aspect_ratio - 1.0)
            is_square = deviation_from_square <= aspect_ratio_threshold
            
            # Determine destination path
            rel_path = os.path.relpath(image_path, input_folder)
            dest_folder = square_folder if is_square else lopsided_folder
            dest_path = os.path.join(dest_folder, rel_path)
            
            # Create subdirectory if needed
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            
            # Move the image instead of copying
            shutil.move(image_path, dest_path)
        except Exception as e:
            print(f"Error processing {image_path}: {str(e)}")
    
    # Create visualizations
    if visualize and aspect_ratios:
        plt.figure(figsize=(15, 12))
        
        # 1. Histogram of aspect ratios
        plt.subplot(2, 2, 1)
        plt.hist(aspect_ratios, bins=50, alpha=0.75)
        plt.axvline(x=1.0, color='r', linestyle='--', label='Square (1.0)')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--', 
                   label=f'Threshold ({1.0+aspect_ratio_threshold:.2f})')
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--')
        plt.title('Distribution of Aspect Ratios')
        plt.xlabel('Aspect Ratio (width/height)')
        plt.ylabel('Count')
        plt.legend()
        
        # 2. Enhanced scatter plot of width vs height with reference lines
        plt.subplot(2, 2, 2)
        plt.scatter(widths, heights, alpha=0.5, s=3)
        
        # Square reference line
        plt.plot([0, max(widths)], [0, max(widths)], 'r--', label='Square (w=h)')
        
        # Average width and height lines
        plt.axvline(x=avg_width, color='g', linestyle=':', label=f'Avg Width ({avg_width:.1f}px)')
        plt.axhline(y=avg_height, color='g', linestyle=':', label=f'Avg Height ({avg_height:.1f}px)')
        
        # Maximum width and height lines
        plt.axvline(x=max_width, color='orange', linestyle=':', label=f'Max Width ({max_width}px)')
        plt.axhline(y=max_height, color='orange', linestyle=':', label=f'Max Height ({max_height}px)')
        
        # Minimum width and height lines
        plt.axvline(x=min_width, color='purple', linestyle=':', label=f'Min Width ({min_width}px)')
        plt.axhline(y=min_height, color='purple', linestyle=':', label=f'Min Height ({min_height}px)')
        
        plt.title('Width vs Height with Reference Lines')
        plt.xlabel('Width (pixels)')
        plt.ylabel('Height (pixels)')
        plt.legend(loc='upper left', fontsize='small')
        
        # 3. Example images if available
        if first_square is not None and first_lopsided is not None:
            plt.subplot(2, 2, 3)
            plt.imshow(cv2.cvtColor(first_square[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Square Example: {os.path.basename(first_square[1])}\n"
                      f"{first_square[2]}x{first_square[3]} (AR: {first_square[4]:.2f})")
            plt.axis('off')
            
            plt.subplot(2, 2, 4)
            plt.imshow(cv2.cvtColor(first_lopsided[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Lopsided Example: {os.path.basename(first_lopsided[1])}\n"
                      f"{first_lopsided[2]}x{first_lopsided[3]} (AR: {first_lopsided[4]:.2f})")
            plt.axis('off')
        
        plt.tight_layout()
        plt.show()
    
    return {
        'total_images': total_images,
        'square_count': square_count,
        'lopsided_count': lopsided_count,
        'aspect_ratio_stats': {
            'min': min_aspect_ratio,
            'max': max_aspect_ratio,
            'avg': avg_aspect_ratio,
            'median': median_aspect_ratio
        },
        'dimension_stats': {
            'width': {'min': min_width, 'max': max_width, 'avg': avg_width},
            'height': {'min': min_height, 'max': max_height, 'avg': avg_height}
        },
        'folder_stats': dict(folder_stats)
    }

# Define input and output folders - replace these with your actual paths
input_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\vet_images_sliced\\TrainingStepSet_automated-labels_T_full-size_150-pc" 
output_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\vet_images_sliced\\TrainingStepSet_automated-labels_T_full-size_150-pc_categorized" 

# Example usage:
# Run with default threshold (images with aspect ratio between 0.8 and 1.2 are considered square)
# results = analyze_and_separate_images(input_folder, output_folder)

# For more lenient threshold (images with aspect ratio between 0.7 and 1.3 are considered square)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.3)

# For stricter threshold (images with aspect ratio between 0.9 and 1.1 are considered square)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.1)

# For practical threshold (images with aspect ratio between 0.9 and 1.1 are considered square)
results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=1)

In [None]:
### ... ~ version 01 C

In [None]:
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob
from collections import defaultdict

def analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.2, visualize=True):
    """
    Analyzes image dimensions and separates images based on their aspect ratio.
    
    Parameters:
    - input_folder: Path to the folder containing input images
    - output_folder: Path to the folder where images will be moved
    - aspect_ratio_threshold: Maximum allowed deviation from square (1.0)
                             e.g., 0.2 means aspect ratios between 0.8 and 1.2 are considered "square"
    - visualize: Whether to show visualizations of aspect ratios and examples
    
    Returns:
    - Dictionary with statistics about the dataset
    """
    # Create output directories
    square_folder = os.path.join(output_folder, "square")
    lopsided_folder = os.path.join(output_folder, "lopsided")
    
    os.makedirs(square_folder, exist_ok=True)
    os.makedirs(lopsided_folder, exist_ok=True)
    
    # Find all image files in the input folder and its subfolders
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tif', '*.tiff']
    image_paths = []
    
    for ext in image_extensions:
        image_paths.extend(glob.glob(os.path.join(input_folder, '**', ext), recursive=True))
    
    # Initialize statistics and data collection
    total_images = len(image_paths)
    square_count = 0
    lopsided_count = 0
    
    # Collect aspect ratios for analysis (both h/w and w/h)
    hw_aspect_ratios = []  # height/width
    wh_aspect_ratios = []  # width/height
    widths = []
    heights = []
    
    # Additional statistics for detailed analysis
    max_width = 0
    min_width = float('inf')
    max_height = 0
    min_height = float('inf')
    max_hw_aspect_ratio = 0
    min_hw_aspect_ratio = float('inf')
    max_wh_aspect_ratio = 0
    min_wh_aspect_ratio = float('inf')
    
    # Track subfolder statistics including dimension stats
    folder_stats = defaultdict(lambda: {
        'total': 0, 
        'square': 0, 
        'lopsided': 0,
        'widths': [],
        'heights': [],
        'hw_aspect_ratios': [],
        'wh_aspect_ratios': []
    })
    
    # Count child folders
    child_folders = set()
    for path in image_paths:
        rel_path = os.path.relpath(os.path.dirname(path), input_folder)
        if rel_path != '.':
            child_folders.add(rel_path)
    
    print(f"Found {total_images} images in {len(child_folders)} child folders.")
    
    # Store example images for visualization
    first_square = None
    first_lopsided = None
    
    # First pass: Collect all statistics without moving files
    print("Analyzing image dimensions...")
    for image_path in tqdm(image_paths, desc="Analyzing images", unit="image"):
        # Read the image dimensions
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}. Skipping.")
            continue
        
        height, width = img.shape[:2]
        
        # Calculate both aspect ratios
        hw_ratio = height / width if width > 0 else float('inf')  # height to width
        wh_ratio = width / height if height > 0 else float('inf')  # width to height
        
        # Store for overall statistics
        hw_aspect_ratios.append(hw_ratio)
        wh_aspect_ratios.append(wh_ratio)
        widths.append(width)
        heights.append(height)
        
        # Update min/max statistics
        max_width = max(max_width, width)
        min_width = min(min_width, width)
        max_height = max(max_height, height)
        min_height = min(min_height, height)
        max_hw_aspect_ratio = max(max_hw_aspect_ratio, hw_ratio)
        min_hw_aspect_ratio = min(min_hw_aspect_ratio, hw_ratio)
        max_wh_aspect_ratio = max(max_wh_aspect_ratio, wh_ratio)
        min_wh_aspect_ratio = min(min_wh_aspect_ratio, wh_ratio)
        
        # Get the relative folder path
        rel_folder = os.path.relpath(os.path.dirname(image_path), input_folder)
        if rel_folder == '.':
            rel_folder = 'root'
            
        # Update folder statistics
        folder_stats[rel_folder]['total'] += 1
        folder_stats[rel_folder]['widths'].append(width)
        folder_stats[rel_folder]['heights'].append(height)
        folder_stats[rel_folder]['hw_aspect_ratios'].append(hw_ratio)
        folder_stats[rel_folder]['wh_aspect_ratios'].append(wh_ratio)
        
        # Determine if image is square or lopsided based on BOTH aspect ratios
        # For a square image, both h/w and w/h should be close to 1.0
        hw_deviation = abs(hw_ratio - 1.0)
        wh_deviation = abs(wh_ratio - 1.0)
        
        # Image is square only if BOTH aspect ratios are within threshold
        is_square = max(hw_deviation, wh_deviation) <= aspect_ratio_threshold
        
        # Update counters for classification
        if is_square:
            square_count += 1
            folder_stats[rel_folder]['square'] += 1
            if first_square is None and visualize:
                first_square = (img, image_path, width, height, hw_ratio, wh_ratio)
        else:
            lopsided_count += 1
            folder_stats[rel_folder]['lopsided'] += 1
            if first_lopsided is None and visualize:
                first_lopsided = (img, image_path, width, height, hw_ratio, wh_ratio)
    
    # Calculate additional overall statistics
    avg_width = np.mean(widths) if widths else 0
    avg_height = np.mean(heights) if heights else 0
    avg_hw_aspect_ratio = np.mean(hw_aspect_ratios) if hw_aspect_ratios else 0
    avg_wh_aspect_ratio = np.mean(wh_aspect_ratios) if wh_aspect_ratios else 0
    median_hw_aspect_ratio = np.median(hw_aspect_ratios) if hw_aspect_ratios else 0
    median_wh_aspect_ratio = np.median(wh_aspect_ratios) if wh_aspect_ratios else 0
    
    # Print detailed overall statistics
    print(f"\n===== OVERALL IMAGE DIMENSION ANALYSIS =====")
    print(f"Total images processed: {total_images}")
    print(f"Square images: {square_count} ({square_count/total_images*100:.2f}%)")
    print(f"Lopsided images: {lopsided_count} ({lopsided_count/total_images*100:.2f}%)")
    
    print(f"\nDimension Statistics:")
    print(f"Width: min={min_width}, max={max_width}, avg={avg_width:.2f}")
    print(f"Height: min={min_height}, max={max_height}, avg={avg_height:.2f}")
    print(f"Height/Width Ratio: min={min_hw_aspect_ratio:.4f}, max={max_hw_aspect_ratio:.4f}, "
          f"avg={avg_hw_aspect_ratio:.4f}, median={median_hw_aspect_ratio:.4f}")
    print(f"Width/Height Ratio: min={min_wh_aspect_ratio:.4f}, max={max_wh_aspect_ratio:.4f}, "
          f"avg={avg_wh_aspect_ratio:.4f}, median={median_wh_aspect_ratio:.4f}")
    print(f"Maximum H/W deviation from square: {max(abs(ar - 1.0) for ar in hw_aspect_ratios):.4f}")
    print(f"Maximum W/H deviation from square: {max(abs(ar - 1.0) for ar in wh_aspect_ratios):.4f}")
    
    # Print folder-specific statistics
    print(f"\n===== FOLDER-SPECIFIC STATISTICS =====")
    for folder, stats in sorted(folder_stats.items()):
        square_count = stats['square']
        lopsided_count = stats['lopsided']
        total = stats['total']
        
        # Calculate folder-specific dimension statistics
        folder_widths = stats['widths']
        folder_heights = stats['heights']
        folder_hw_ratios = stats['hw_aspect_ratios']
        folder_wh_ratios = stats['wh_aspect_ratios']
        
        if folder_widths:  # Only process if folder has images
            folder_min_width = min(folder_widths)
            folder_max_width = max(folder_widths)
            folder_avg_width = np.mean(folder_widths)
            
            folder_min_height = min(folder_heights)
            folder_max_height = max(folder_heights)
            folder_avg_height = np.mean(folder_heights)
            
            folder_min_hw = min(folder_hw_ratios)
            folder_max_hw = max(folder_hw_ratios)
            folder_avg_hw = np.mean(folder_hw_ratios)
            folder_median_hw = np.median(folder_hw_ratios)
            
            folder_min_wh = min(folder_wh_ratios)
            folder_max_wh = max(folder_wh_ratios)
            folder_avg_wh = np.mean(folder_wh_ratios)
            folder_median_wh = np.median(folder_wh_ratios)
            
            print(f"\nFolder: {folder}")
            print(f"  Image Distribution: Total={total}, Square={square_count} ({square_count/total*100:.2f}%), "
                  f"Lopsided={lopsided_count} ({lopsided_count/total*100:.2f}%)")
            print(f"  Dimension Statistics:")
            print(f"    Width: min={folder_min_width}, max={folder_max_width}, avg={folder_avg_width:.2f}")
            print(f"    Height: min={folder_min_height}, max={folder_max_height}, avg={folder_avg_height:.2f}")
            print(f"    Height/Width Ratio: min={folder_min_hw:.4f}, max={folder_max_hw:.4f}, "
                  f"avg={folder_avg_hw:.4f}, median={folder_median_hw:.4f}")
            print(f"    Width/Height Ratio: min={folder_min_wh:.4f}, max={folder_max_wh:.4f}, "
                  f"avg={folder_avg_wh:.4f}, median={folder_median_wh:.4f}")
    
    # Second pass: Move files to their destination
    print("\nMoving images to respective folders...")
    for image_path in tqdm(image_paths, desc="Moving images", unit="image"):
        try:
            img = cv2.imread(image_path)
            if img is None:
                continue  # Skip already processed in analysis phase
            
            height, width = img.shape[:2]
            
            # Calculate both aspect ratios
            hw_ratio = height / width if width > 0 else float('inf')
            wh_ratio = width / height if height > 0 else float('inf')
            
            # Check both deviations
            hw_deviation = abs(hw_ratio - 1.0)
            wh_deviation = abs(wh_ratio - 1.0)
            
            # Image is square only if BOTH aspect ratios are within threshold
            is_square = max(hw_deviation, wh_deviation) <= aspect_ratio_threshold
            
            # Determine destination path
            rel_path = os.path.relpath(image_path, input_folder)
            dest_folder = square_folder if is_square else lopsided_folder
            dest_path = os.path.join(dest_folder, rel_path)
            
            # Create subdirectory if needed
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            
            # Move the image instead of copying
            shutil.move(image_path, dest_path)
        except Exception as e:
            print(f"Error processing {image_path}: {str(e)}")
    
    # Create enhanced visualizations
    if visualize and hw_aspect_ratios and wh_aspect_ratios:
        plt.figure(figsize=(16, 14))
        
        # 1. Histogram of height/width aspect ratios
        plt.subplot(3, 2, 1)
        plt.hist(hw_aspect_ratios, bins=50, alpha=0.75)
        plt.axvline(x=1.0, color='r', linestyle='--', label='Square (1.0)')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--', 
                   label=f'Upper Threshold ({1.0+aspect_ratio_threshold:.2f})')
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--',
                   label=f'Lower Threshold ({1.0-aspect_ratio_threshold:.2f})')
        plt.title('Distribution of Height/Width Ratios')
        plt.xlabel('Height/Width Ratio')
        plt.ylabel('Count')
        plt.legend()
        
        # 2. Histogram of width/height aspect ratios
        plt.subplot(3, 2, 2)
        plt.hist(wh_aspect_ratios, bins=50, alpha=0.75)
        plt.axvline(x=1.0, color='r', linestyle='--', label='Square (1.0)')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--', 
                   label=f'Upper Threshold ({1.0+aspect_ratio_threshold:.2f})')
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--',
                   label=f'Lower Threshold ({1.0-aspect_ratio_threshold:.2f})')
        plt.title('Distribution of Width/Height Ratios')
        plt.xlabel('Width/Height Ratio')
        plt.ylabel('Count')
        plt.legend()
        
        # 3. Enhanced scatter plot of width vs height with reference lines
        plt.subplot(3, 2, 3)
        plt.scatter(widths, heights, alpha=0.5, s=3)
        
        # Square reference line
        plt.plot([0, max(widths)], [0, max(widths)], 'r--', label='Square (w=h)')
        
        # Average width and height lines
        plt.axvline(x=avg_width, color='g', linestyle=':', label=f'Avg Width ({avg_width:.1f}px)')
        plt.axhline(y=avg_height, color='g', linestyle=':', label=f'Avg Height ({avg_height:.1f}px)')
        
        # Maximum width and height lines
        plt.axvline(x=max_width, color='orange', linestyle=':', label=f'Max Width ({max_width}px)')
        plt.axhline(y=max_height, color='orange', linestyle=':', label=f'Max Height ({max_height}px)')
        
        # Minimum width and height lines
        plt.axvline(x=min_width, color='purple', linestyle=':', label=f'Min Width ({min_width}px)')
        plt.axhline(y=min_height, color='purple', linestyle=':', label=f'Min Height ({min_height}px)')
        
        plt.title('Width vs Height with Reference Lines')
        plt.xlabel('Width (pixels)')
        plt.ylabel('Height (pixels)')
        plt.legend(loc='upper left', fontsize='small')
        
        # 4. Scatter plot of height/width vs width/height ratios
        plt.subplot(3, 2, 4)
        plt.scatter(hw_aspect_ratios, wh_aspect_ratios, alpha=0.5, s=3)
        
        # Add threshold box
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--')
        plt.axhline(y=1.0-aspect_ratio_threshold, color='g', linestyle='--')
        plt.axhline(y=1.0+aspect_ratio_threshold, color='g', linestyle='--')
        
        # Add center point
        plt.plot(1.0, 1.0, 'ro', label='Square (1.0, 1.0)')
        
        plt.title('Height/Width vs Width/Height Ratios')
        plt.xlabel('Height/Width Ratio')
        plt.ylabel('Width/Height Ratio')
        plt.legend()
        
        # 5 & 6. Example images if available
        if first_square is not None and first_lopsided is not None:
            plt.subplot(3, 2, 5)
            plt.imshow(cv2.cvtColor(first_square[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Square Example: {os.path.basename(first_square[1])}\n"
                      f"{first_square[2]}x{first_square[3]} (H/W: {first_square[4]:.2f}, W/H: {first_square[5]:.2f})")
            plt.axis('off')
            
            plt.subplot(3, 2, 6)
            plt.imshow(cv2.cvtColor(first_lopsided[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Lopsided Example: {os.path.basename(first_lopsided[1])}\n"
                      f"{first_lopsided[2]}x{first_lopsided[3]} (H/W: {first_lopsided[4]:.2f}, W/H: {first_lopsided[5]:.2f})")
            plt.axis('off')
        
        plt.tight_layout()
        plt.show()
    
    return {
        'total_images': total_images,
        'square_count': square_count,
        'lopsided_count': lopsided_count,
        'aspect_ratio_stats': {
            'hw': {'min': min_hw_aspect_ratio, 'max': max_hw_aspect_ratio, 
                   'avg': avg_hw_aspect_ratio, 'median': median_hw_aspect_ratio},
            'wh': {'min': min_wh_aspect_ratio, 'max': max_wh_aspect_ratio, 
                   'avg': avg_wh_aspect_ratio, 'median': median_wh_aspect_ratio}
        },
        'dimension_stats': {
            'width': {'min': min_width, 'max': max_width, 'avg': avg_width},
            'height': {'min': min_height, 'max': max_height, 'avg': avg_height}
        },
        'folder_stats': dict(folder_stats)
    }

# Define input and output folders
input_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\vet_images_sliced\\TrainingStepSet_automated-labels_T_full-size_150-pc_undivided\\all-classes-mixed"
output_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\task-05_dataset\\vet_images_sliced\\TrainingStepSet_automated-labels_T_full-size_150-pc_undivided_categorized\\all-classes-mixed"

# Example usage:
# For balanced classification (images with both H/W and W/H ratios between 0.8 and 1.2)
# results = analyze_and_separate_images(input_folder, output_folder)

# For stricter threshold (images with both H/W and W/H ratios between 0.9 and 1.1)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.1)

# For more lenient threshold (images with both H/W and W/H ratios between 0.7 and 1.3)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.3)

# For practical threshold (images with both H/W and W/H ratios between 0.7 and 1.3)
results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=1.0)

In [None]:
### ... ~ version alt

In [None]:
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob
from collections import defaultdict

def analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.2, visualize=True):
    """
    Analyzes image dimensions and separates images based on their aspect ratio.
    
    Parameters:
    - input_folder: Path to the folder containing input images
    - output_folder: Path to the folder where images will be moved
    - aspect_ratio_threshold: Maximum allowed deviation from square (1.0)
                             e.g., 0.2 means aspect ratios between 0.8 and 1.2 are considered "square"
    - visualize: Whether to show visualizations of aspect ratios and examples
    
    Returns:
    - Dictionary with statistics about the dataset
    """
    # Create output directories
    square_folder = os.path.join(output_folder, "square")
    lopsided_folder = os.path.join(output_folder, "lopsided")
    
    os.makedirs(square_folder, exist_ok=True)
    os.makedirs(lopsided_folder, exist_ok=True)
    
    # Find all image files in the input folder and its subfolders
    image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tif', '*.tiff']
    image_paths = []
    
    for ext in image_extensions:
        image_paths.extend(glob.glob(os.path.join(input_folder, '**', ext), recursive=True))
    
    # Initialize statistics and data collection
    total_images = len(image_paths)
    square_count = 0
    lopsided_count = 0
    
    # Collect aspect ratios for analysis (both h/w and w/h)
    hw_aspect_ratios = []  # height/width
    wh_aspect_ratios = []  # width/height
    widths = []
    heights = []
    
    # Additional statistics for detailed analysis
    max_width = 0
    min_width = float('inf')
    max_height = 0
    min_height = float('inf')
    max_hw_aspect_ratio = 0
    min_hw_aspect_ratio = float('inf')
    max_wh_aspect_ratio = 0
    min_wh_aspect_ratio = float('inf')
    
    # Track subfolder statistics including dimension stats
    folder_stats = defaultdict(lambda: {
        'total': 0, 
        'square': 0, 
        'lopsided': 0,
        'widths': [],
        'heights': [],
        'hw_aspect_ratios': [],
        'wh_aspect_ratios': []
    })
    
    # Count child folders
    child_folders = set()
    for path in image_paths:
        rel_path = os.path.relpath(os.path.dirname(path), input_folder)
        if rel_path != '.':
            child_folders.add(rel_path)
    
    print(f"Found {total_images} images in {len(child_folders)} child folders.")
    
    # Store example images for visualization
    first_square = None
    first_lopsided = None
    
    # First pass: Collect all statistics without moving files
    print("Analyzing image dimensions...")
    for image_path in tqdm(image_paths, desc="Analyzing images", unit="image"):
        # Read the image dimensions
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}. Skipping.")
            continue
        
        height, width = img.shape[:2]
        
        # Calculate both aspect ratios
        hw_ratio = height / width if width > 0 else float('inf')  # height to width
        wh_ratio = width / height if height > 0 else float('inf')  # width to height
        
        # Store for overall statistics
        hw_aspect_ratios.append(hw_ratio)
        wh_aspect_ratios.append(wh_ratio)
        widths.append(width)
        heights.append(height)
        
        # Update min/max statistics
        max_width = max(max_width, width)
        min_width = min(min_width, width)
        max_height = max(max_height, height)
        min_height = min(min_height, height)
        max_hw_aspect_ratio = max(max_hw_aspect_ratio, hw_ratio)
        min_hw_aspect_ratio = min(min_hw_aspect_ratio, hw_ratio)
        max_wh_aspect_ratio = max(max_wh_aspect_ratio, wh_ratio)
        min_wh_aspect_ratio = min(min_wh_aspect_ratio, wh_ratio)
        
        # Get the relative folder path
        rel_folder = os.path.relpath(os.path.dirname(image_path), input_folder)
        if rel_folder == '.':
            rel_folder = 'root'
            
        # Update folder statistics
        folder_stats[rel_folder]['total'] += 1
        folder_stats[rel_folder]['widths'].append(width)
        folder_stats[rel_folder]['heights'].append(height)
        folder_stats[rel_folder]['hw_aspect_ratios'].append(hw_ratio)
        folder_stats[rel_folder]['wh_aspect_ratios'].append(wh_ratio)
        
        # Determine if image is square or lopsided based on BOTH aspect ratios
        # For a square image, both h/w and w/h should be close to 1.0
        hw_deviation = abs(hw_ratio - 1.0)
        wh_deviation = abs(wh_ratio - 1.0)
        
        # Image is square only if BOTH aspect ratios are within threshold
        is_square = max(hw_deviation, wh_deviation) <= aspect_ratio_threshold
        
        # Update counters for classification
        if is_square:
            square_count += 1
            folder_stats[rel_folder]['square'] += 1
            if first_square is None and visualize:
                first_square = (img, image_path, width, height, hw_ratio, wh_ratio)
        else:
            lopsided_count += 1
            folder_stats[rel_folder]['lopsided'] += 1
            if first_lopsided is None and visualize:
                first_lopsided = (img, image_path, width, height, hw_ratio, wh_ratio)
    
    # Calculate additional overall statistics
    avg_width = np.mean(widths) if widths else 0
    avg_height = np.mean(heights) if heights else 0
    avg_hw_aspect_ratio = np.mean(hw_aspect_ratios) if hw_aspect_ratios else 0
    avg_wh_aspect_ratio = np.mean(wh_aspect_ratios) if wh_aspect_ratios else 0
    median_hw_aspect_ratio = np.median(hw_aspect_ratios) if hw_aspect_ratios else 0
    median_wh_aspect_ratio = np.median(wh_aspect_ratios) if wh_aspect_ratios else 0
    
    # Print detailed overall statistics
    print(f"\n===== OVERALL IMAGE DIMENSION ANALYSIS =====")
    print(f"Total images processed: {total_images}")
    print(f"Square images: {square_count} ({square_count/total_images*100:.2f}%)")
    print(f"Lopsided images: {lopsided_count} ({lopsided_count/total_images*100:.2f}%)")
    
    print(f"\nDimension Statistics:")
    print(f"Width: min={min_width}, max={max_width}, avg={avg_width:.2f}")
    print(f"Height: min={min_height}, max={max_height}, avg={avg_height:.2f}")
    print(f"Height/Width Ratio: min={min_hw_aspect_ratio:.4f}, max={max_hw_aspect_ratio:.4f}, "
          f"avg={avg_hw_aspect_ratio:.4f}, median={median_hw_aspect_ratio:.4f}")
    print(f"Width/Height Ratio: min={min_wh_aspect_ratio:.4f}, max={max_wh_aspect_ratio:.4f}, "
          f"avg={avg_wh_aspect_ratio:.4f}, median={median_wh_aspect_ratio:.4f}")
    print(f"Maximum H/W deviation from square: {max(abs(ar - 1.0) for ar in hw_aspect_ratios):.4f}")
    print(f"Maximum W/H deviation from square: {max(abs(ar - 1.0) for ar in wh_aspect_ratios):.4f}")
    
    # Print folder-specific statistics
    print(f"\n===== FOLDER-SPECIFIC STATISTICS =====")
    for folder, stats in sorted(folder_stats.items()):
        square_count = stats['square']
        lopsided_count = stats['lopsided']
        total = stats['total']
        
        # Calculate folder-specific dimension statistics
        folder_widths = stats['widths']
        folder_heights = stats['heights']
        folder_hw_ratios = stats['hw_aspect_ratios']
        folder_wh_ratios = stats['wh_aspect_ratios']
        
        if folder_widths:  # Only process if folder has images
            folder_min_width = min(folder_widths)
            folder_max_width = max(folder_widths)
            folder_avg_width = np.mean(folder_widths)
            
            folder_min_height = min(folder_heights)
            folder_max_height = max(folder_heights)
            folder_avg_height = np.mean(folder_heights)
            
            folder_min_hw = min(folder_hw_ratios)
            folder_max_hw = max(folder_hw_ratios)
            folder_avg_hw = np.mean(folder_hw_ratios)
            folder_median_hw = np.median(folder_hw_ratios)
            
            folder_min_wh = min(folder_wh_ratios)
            folder_max_wh = max(folder_wh_ratios)
            folder_avg_wh = np.mean(folder_wh_ratios)
            folder_median_wh = np.median(folder_wh_ratios)
            
            print(f"\nFolder: {folder}")
            print(f"  Image Distribution: Total={total}, Square={square_count} ({square_count/total*100:.2f}%), "
                  f"Lopsided={lopsided_count} ({lopsided_count/total*100:.2f}%)")
            print(f"  Dimension Statistics:")
            print(f"    Width: min={folder_min_width}, max={folder_max_width}, avg={folder_avg_width:.2f}")
            print(f"    Height: min={folder_min_height}, max={folder_max_height}, avg={folder_avg_height:.2f}")
            print(f"    Height/Width Ratio: min={folder_min_hw:.4f}, max={folder_max_hw:.4f}, "
                  f"avg={folder_avg_hw:.4f}, median={folder_median_hw:.4f}")
            print(f"    Width/Height Ratio: min={folder_min_wh:.4f}, max={folder_max_wh:.4f}, "
                  f"avg={folder_avg_wh:.4f}, median={folder_median_wh:.4f}")
    
    # Second pass: Move files to their destination
    print("\nMoving images to respective folders...")
    for image_path in tqdm(image_paths, desc="Moving images", unit="image"):
        try:
            img = cv2.imread(image_path)
            if img is None:
                continue  # Skip already processed in analysis phase
            
            height, width = img.shape[:2]
            
            # Calculate both aspect ratios
            hw_ratio = height / width if width > 0 else float('inf')
            wh_ratio = width / height if height > 0 else float('inf')
            
            # Check both deviations
            hw_deviation = abs(hw_ratio - 1.0)
            wh_deviation = abs(wh_ratio - 1.0)
            
            # Image is square only if BOTH aspect ratios are within threshold
            is_square = max(hw_deviation, wh_deviation) <= aspect_ratio_threshold
            
            # Determine destination path
            rel_path = os.path.relpath(image_path, input_folder)
            dest_folder = square_folder if is_square else lopsided_folder
            dest_path = os.path.join(dest_folder, rel_path)
            
            # Create subdirectory if needed
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
            
            # Move the image instead of copying
            shutil.move(image_path, dest_path)
        except Exception as e:
            print(f"Error processing {image_path}: {str(e)}")
    
    # Create enhanced visualizations
    if visualize and hw_aspect_ratios and wh_aspect_ratios:
        plt.figure(figsize=(16, 14))
        
        # 1. Histogram of height/width aspect ratios
        plt.subplot(3, 2, 1)
        plt.hist(hw_aspect_ratios, bins=50, alpha=0.75)
        plt.axvline(x=1.0, color='r', linestyle='--', label='Square (1.0)')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--', 
                   label=f'Upper Threshold ({1.0+aspect_ratio_threshold:.2f})')
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--',
                   label=f'Lower Threshold ({1.0-aspect_ratio_threshold:.2f})')
        plt.title('Distribution of Height/Width Ratios')
        plt.xlabel('Height/Width Ratio')
        plt.ylabel('Count')
        plt.legend()
        
        # 2. Histogram of width/height aspect ratios
        plt.subplot(3, 2, 2)
        plt.hist(wh_aspect_ratios, bins=50, alpha=0.75)
        plt.axvline(x=1.0, color='r', linestyle='--', label='Square (1.0)')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--', 
                   label=f'Upper Threshold ({1.0+aspect_ratio_threshold:.2f})')
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--',
                   label=f'Lower Threshold ({1.0-aspect_ratio_threshold:.2f})')
        plt.title('Distribution of Width/Height Ratios')
        plt.xlabel('Width/Height Ratio')
        plt.ylabel('Count')
        plt.legend()
        
        # 3. Enhanced scatter plot of width vs height with reference lines
        plt.subplot(3, 2, 3)
        plt.scatter(widths, heights, alpha=0.5, s=3)
        
        # Square reference line
        plt.plot([0, max(widths)], [0, max(widths)], 'r--', label='Square (w=h)')
        
        # Average width and height lines
        plt.axvline(x=avg_width, color='g', linestyle=':', label=f'Avg Width ({avg_width:.1f}px)')
        plt.axhline(y=avg_height, color='g', linestyle=':', label=f'Avg Height ({avg_height:.1f}px)')
        
        # Maximum width and height lines
        plt.axvline(x=max_width, color='orange', linestyle=':', label=f'Max Width ({max_width}px)')
        plt.axhline(y=max_height, color='orange', linestyle=':', label=f'Max Height ({max_height}px)')
        
        # Minimum width and height lines
        plt.axvline(x=min_width, color='purple', linestyle=':', label=f'Min Width ({min_width}px)')
        plt.axhline(y=min_height, color='purple', linestyle=':', label=f'Min Height ({min_height}px)')
        
        plt.title('Width vs Height with Reference Lines')
        plt.xlabel('Width (pixels)')
        plt.ylabel('Height (pixels)')
        plt.legend(loc='upper left', fontsize='small')
        
        # 4. Scatter plot of height/width vs width/height ratios
        plt.subplot(3, 2, 4)
        plt.scatter(hw_aspect_ratios, wh_aspect_ratios, alpha=0.5, s=3)
        
        # Add threshold box
        plt.axvline(x=1.0-aspect_ratio_threshold, color='g', linestyle='--')
        plt.axvline(x=1.0+aspect_ratio_threshold, color='g', linestyle='--')
        plt.axhline(y=1.0-aspect_ratio_threshold, color='g', linestyle='--')
        plt.axhline(y=1.0+aspect_ratio_threshold, color='g', linestyle='--')
        
        # Add center point
        plt.plot(1.0, 1.0, 'ro', label='Square (1.0, 1.0)')
        
        plt.title('Height/Width vs Width/Height Ratios')
        plt.xlabel('Height/Width Ratio')
        plt.ylabel('Width/Height Ratio')
        plt.legend()
        
        # 5 & 6. Example images if available
        if first_square is not None and first_lopsided is not None:
            plt.subplot(3, 2, 5)
            plt.imshow(cv2.cvtColor(first_square[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Square Example: {os.path.basename(first_square[1])}\n"
                      f"{first_square[2]}x{first_square[3]} (H/W: {first_square[4]:.2f}, W/H: {first_square[5]:.2f})")
            plt.axis('off')
            
            plt.subplot(3, 2, 6)
            plt.imshow(cv2.cvtColor(first_lopsided[0], cv2.COLOR_BGR2RGB))
            plt.title(f"Lopsided Example: {os.path.basename(first_lopsided[1])}\n"
                      f"{first_lopsided[2]}x{first_lopsided[3]} (H/W: {first_lopsided[4]:.2f}, W/H: {first_lopsided[5]:.2f})")
            plt.axis('off')
        
        plt.tight_layout()
        plt.show()
    
    return {
        'total_images': total_images,
        'square_count': square_count,
        'lopsided_count': lopsided_count,
        'aspect_ratio_stats': {
            'hw': {'min': min_hw_aspect_ratio, 'max': max_hw_aspect_ratio, 
                   'avg': avg_hw_aspect_ratio, 'median': median_hw_aspect_ratio},
            'wh': {'min': min_wh_aspect_ratio, 'max': max_wh_aspect_ratio, 
                   'avg': avg_wh_aspect_ratio, 'median': median_wh_aspect_ratio}
        },
        'dimension_stats': {
            'width': {'min': min_width, 'max': max_width, 'avg': avg_width},
            'height': {'min': min_height, 'max': max_height, 'avg': avg_height}
        },
        'folder_stats': dict(folder_stats)
    }

# Define input and output folders
input_folder = r"C:\Users\praam\Desktop\havetai+vetcyto\task-05_dataset\vet_images_sliced\TrainingStepSet_automated-labels_T_full-size_150-pc_divided" 
output_folder = r"C:\Users\praam\Desktop\havetai+vetcyto\task-05_dataset\vet_images_sliced\TrainingStepSet_automated-labels_T_full-size_150-pc_divided_categorized" 

# Example usage:
# For balanced classification (images with both H/W and W/H ratios between 0.8 and 1.2)
# results = analyze_and_separate_images(input_folder, output_folder)

# For stricter threshold (images with both H/W and W/H ratios between 0.9 and 1.1)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.1)

# For more lenient threshold (images with both H/W and W/H ratios between 0.7 and 1.3)
# results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=0.3)

# For practical threshold (images with both H/W and W/H ratios between 0.7 and 1.3)
results = analyze_and_separate_images(input_folder, output_folder, aspect_ratio_threshold=1.0)