## Imports

In [1]:
import glob
import json
import multiprocessing
import os
import shutil
import time
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
from datetime import datetime
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yaml
from loguru import logger
from PIL import Image
from tqdm.auto import tqdm
# from ultralytics import YOLO
import albumentations as A

## Variables

In [2]:
todaysdate = datetime.now().strftime("%Y%m%d_%H%M%S")
#------------------------
# Constantes
#------------------------

CREATE_DATASET_PROCESSED_PATH = f"datasets/supervisely/yolo_processed_{todaysdate}"

DEVICE = "2"

# Convert binary masks to YOLO format
ANNOTATIONS_BINARY_PNG_PATH = "datasets/supervisely/dataset_processed_20250523-173715/masks"
YOLO_ANNOTATIONS_OUTPUT_PATH = f"{CREATE_DATASET_PROCESSED_PATH}/labels"
TEST_MASK_OUTPUT_PATH = os.path.join(CREATE_DATASET_PROCESSED_PATH, "test_masks")

# Datasets
DATASET_PATH = "datasets/supervisely/341575_free_space_rooftop_geneva_20250511_yolo"
FOLD_PATHS = {
    0: "datasets/supervisely/dataset_processed_20250523-173715/fold_0_dataset.txt",
    1: "datasets/supervisely/dataset_processed_20250523-173715/fold_1_dataset.txt",
    2: "datasets/supervisely/dataset_processed_20250523-173715/fold_2_dataset.txt",
    3: "datasets/supervisely/dataset_processed_20250523-173715/fold_3_dataset.txt",
    4: "datasets/supervisely/dataset_processed_20250523-173715/fold_4_dataset.txt"
}
TEST_DATASET_TXT_PATH = "datasets/supervisely/dataset_processed_20250523-173715/test_dataset.txt"
IMG_DATASET_PATH = "datasets/supervisely/dataset_processed_20250523-173715/images"

# Augmentations
NUM_AUGMENTATIONS_PER_IMAGE = 10  # Number of augmented versions per original
AUGMENTATION_WORKERS = 8  # Number of parallel workers for augmentation

# Train
DATASET_PROCESSED_PATH = "datasets/supervisely/yolo_processed_20250618_201019"
MODEL_NAME = "yolo12x-seg.yaml" # yolo11n-seg.pt
OUTPUT_DIR_YOLO = "training_yolo"
PROJECT_NAME = f"yolo_free_space_rooftop_{todaysdate}"
CLASS_NAMES = ["free_space"]
CUSTOM_PARAMS = {
    'epochs': 5000,
    'batch': 1,
    'imgsz': 1280,
    'patience': 30,
    'lr0': 0.005,
}

# Evaluation
OUTPUT_EVALUATE_TEST_DIR = os.path.join(OUTPUT_DIR_YOLO, f"auto_cv_evaluation_results_{todaysdate}")
CONF_THRESHOLD = 0.5
IOU_THRESHOLD = 0.7
CLASS_NAMES = ["free_space"]

#------------------------
# Fonctionalités
#------------------------

CONVERT_BINARY_MASKS_TO_YOLO_FORMAT = True
SPLIT_DATASET = True
APPLY_AUGMENTATION = True
TRAIN_YOLO = False
EVALUATE_YOLO = False

In [3]:
if CONVERT_BINARY_MASKS_TO_YOLO_FORMAT:
    os.makedirs(CREATE_DATASET_PROCESSED_PATH, exist_ok=True)
    os.makedirs(YOLO_ANNOTATIONS_OUTPUT_PATH, exist_ok=True)
    os.makedirs(TEST_MASK_OUTPUT_PATH, exist_ok=True)
if EVALUATE_YOLO:
    os.makedirs(OUTPUT_EVALUATE_TEST_DIR, exist_ok=True)

In [4]:
AUGMENTATION_PIPELINE = A.Compose([
    # Basic Geometric
    A.SquareSymmetry(p=0.5),
    # Affine and Perspective
    A.Affine(
        scale=(0.95, 1.05), translate_percent=0.1, rotate=(-45, 45), p=0.6
    ),
    # Blur
    A.OneOf(
        [
            A.GaussianBlur(blur_limit=(3, 7), p=0.5),
            A.MedianBlur(blur_limit=5, p=0.5),
            A.MotionBlur(blur_limit=(3, 7), p=0.5),
        ],
        p=0.2,
    ),
    # Noise
    A.OneOf(
        [
            A.GaussNoise(p=0.5),
            A.ISONoise(
                color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.5
            ),
            A.MultiplicativeNoise(
                multiplier=(0.9, 1.1), per_channel=True, p=0.5
            ),
            A.SaltAndPepper(p=0.5),
        ],
        p=0.2,
    ),
    # Weather effects
    A.RandomSunFlare(p=0.2),
    A.RandomFog(p=0.2),
], additional_targets={'mask': 'mask'})

## Convert binary format to YOLO

In [5]:
def contours_join(parent_contour, child_contour):
    """
    Join parent contour with child contour
    Donut use case. Inside donut shouldn't detect anything
    https://github.com/ultralytics/ultralytics/issues/3085
    """
    def is_clockwise(contour):
        value = 0
        num = len(contour)
        for i in range(num):
            p1 = contour[i]
            p2 = contour[(i + 1) % num]  # More efficient modulo operation
            value += (p2[0][0] - p1[0][0]) * (p2[0][1] + p1[0][1])
        return value < 0

    def get_merge_point_idx(contour1, contour2):
        min_distance = float('inf')
        idx1, idx2 = 0, 0
        
        # Vectorized distance calculation for better performance
        for i, p1 in enumerate(contour1):
            distances = np.sum((contour2[:, 0] - p1[0]) ** 2, axis=1)
            min_idx = np.argmin(distances)
            if distances[min_idx] < min_distance:
                min_distance = distances[min_idx]
                idx1, idx2 = i, min_idx
        return idx1, idx2

    def merge_contours(contour1, contour2, idx1, idx2):
        # More efficient concatenation
        part1 = contour1[:idx1 + 1]
        part2 = contour2[idx2:]
        part3 = contour2[:idx2 + 1]
        part4 = contour1[idx1:]
        
        contour = np.concatenate([part1, part2, part3, part4], axis=0)
        return contour.astype(np.int32)

    def merge_with_parent(parent_contour, contour):
        if not is_clockwise(parent_contour):
            parent_contour = parent_contour[::-1]
        if is_clockwise(contour):
            contour = contour[::-1]
        idx1, idx2 = get_merge_point_idx(parent_contour, contour)
        return merge_contours(parent_contour, contour, idx1, idx2)

    return merge_with_parent(parent_contour=parent_contour, contour=child_contour)


def group_child_contours_with_parent(hierarchy):
    """
    returns:
        {
            parent_key: {
                "parent": parent_key,
                "child": [child_keys]
            }
        }
    """
    groups = {}
    hierarchy_flat = hierarchy.squeeze()
    
    for i, h in enumerate(hierarchy_flat):
        parent_index = h[3]
        if parent_index != -1:
            if parent_index in groups:
                groups[parent_index]["child"].append(i)
            else:
                groups[parent_index] = {"parent": parent_index, "child": [i]}
        else:
            if i not in groups:
                groups[i] = {"parent": i, "child": []}
            else:
                groups[i]["parent"] = i
    return groups


def convert_mask_to_yolo_seg_label(mask_path, create_test_mask=True):
    """
    Convert a single mask to YOLO segmentation format
    
    Args:
        mask_path: Path to the mask file
        create_test_mask: Whether to create test mask for verification
    """
    try:
        label_str = ""
        test_mask = None
        
        # Read mask
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            return label_str, test_mask, f"Could not read mask from {mask_path}"
        
        height, width = mask.shape
        
        # Threshold (optimized for 0/1 masks)
        _, thresh = cv2.threshold(mask, 0.5, 255, cv2.THRESH_BINARY)
        
        # Find contours
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
        
        if not contours:
            return label_str, test_mask, None
        
        # Initialize test mask only if needed
        if create_test_mask:
            test_mask = np.zeros((height, width), dtype=np.uint8)
        
        # Process contours
        if len(contours) > 1 and hierarchy is not None:
            contour_groups = group_child_contours_with_parent(hierarchy)
            for contour_group in contour_groups.values():
                parent_contour = contours[contour_group["parent"]]
                
                # Join with child contours
                for child in contour_group["child"]:
                    parent_contour = contours_join(parent_contour=parent_contour, child_contour=contours[child])
                
                # Process contour
                contour_label = process_contour(parent_contour, width, height)
                if contour_label:
                    label_str += f"0{contour_label}\n"
                
                # Draw test mask
                if create_test_mask and test_mask is not None:
                    parent_contour = np.expand_dims(parent_contour, axis=0)
                    cv2.drawContours(test_mask, parent_contour, -1, 255, -1)
        else:
            # Single contour
            contour_label = process_contour(contours[0], width, height)
            if contour_label:
                label_str += f"0{contour_label}\n"
            
            if create_test_mask:
                test_mask = np.zeros((height, width), dtype=np.uint8)
                cv2.drawContours(test_mask, [contours[0]], -1, 255, -1)
        
        label_str = label_str.rstrip()  # Remove last \n
        return label_str, test_mask, None
        
    except Exception as e:
        return "", None, str(e)


def process_contour(contour, width, height):
    """Process a single contour and return normalized coordinates"""
    contour_squeezed = contour.squeeze()
    
    # Handle single point case
    if contour_squeezed.ndim == 1:
        return ""
    
    contour_list = contour_squeezed.tolist()
    
    if len(contour_list) < 3:
        return ""
    
    # Filter valid points and normalize coordinates
    contour_label = ""
    for point in contour_list:
        if isinstance(point, list) and len(point) == 2:
            x_norm = round(float(point[0]) / float(width), 6)
            y_norm = round(float(point[1]) / float(height), 6)
            contour_label += f" {x_norm} {y_norm}"
    
    return contour_label


def process_single_mask(args):
    """Process a single mask file - for multiprocessing"""
    mask_path, yolo_output_path, test_output_path, create_test_masks = args
    
    mask_filename = Path(mask_path).stem
    
    # Convert mask
    label_str, test_mask, error = convert_mask_to_yolo_seg_label(mask_path, create_test_masks)
    
    if error:
        return False, f"Error processing {mask_filename}: {error}"
    
    if not label_str:
        return False, f"No valid contours found in {mask_filename}"
    
    # Save YOLO label
    label_output_path = os.path.join(yolo_output_path, f"{mask_filename}.txt")
    with open(label_output_path, 'w') as f:
        f.write(label_str)
    
    # Save test mask if created
    if create_test_masks and test_mask is not None:
        test_mask_output_path = os.path.join(test_output_path, f"{mask_filename}_test.png")
        cv2.imwrite(test_mask_output_path, test_mask)
    
    return True, mask_filename


def batch_convert_masks_to_yolo(ANNOTATIONS_BINARY_PNG_PATH, 
                               YOLO_ANNOTATIONS_OUTPUT_PATH, 
                               TEST_MASK_OUTPUT_PATH,
                               create_test_masks=True,
                               num_workers=None,
                               create_empty_labels=True):
    """
    Convert all mask files in the input directory to YOLO format
    
    Args:
        ANNOTATIONS_BINARY_PNG_PATH: Input directory with mask files
        YOLO_ANNOTATIONS_OUTPUT_PATH: Output directory for YOLO labels
        TEST_MASK_OUTPUT_PATH: Output directory for test masks
        create_test_masks: Whether to create test masks for verification
        num_workers: Number of parallel workers (None = auto-detect)
        create_empty_labels: Whether to create empty label files for images without valid contours
    """
    
    # Find all mask files
    mask_extensions = ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.tif', '*.tiff']
    mask_files = []
    
    print("Searching for mask files...")
    for extension in mask_extensions:
        pattern = os.path.join(ANNOTATIONS_BINARY_PNG_PATH, extension)
        mask_files.extend(glob.glob(pattern))
    
    if not mask_files:
        print(f"No mask files found in {ANNOTATIONS_BINARY_PNG_PATH}")
        return
    
    print(f"Found {len(mask_files)} mask files to process...")
    
    # Set up multiprocessing
    if num_workers is None:
        num_workers = min(multiprocessing.cpu_count(), len(mask_files))
    
    print(f"Using {num_workers} workers for parallel processing...")
    
    # Prepare arguments for multiprocessing
    process_args = [
        (mask_path, YOLO_ANNOTATIONS_OUTPUT_PATH, TEST_MASK_OUTPUT_PATH, create_test_masks)
        for mask_path in mask_files
    ]
    
    processed_count = 0
    error_count = 0
    errors = []
    failed_files = []  # Track files that failed
    
    # Process files with progress bar
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        # Submit all jobs at once
        futures = {executor.submit(process_single_mask, args): args for args in process_args}
        
        # Use tqdm for progress tracking with as_completed for real-time updates
        with tqdm(total=len(mask_files), desc="Converting masks", unit="files") as pbar:
            # Process results as they complete
            for future in as_completed(futures):
                try:
                    success, message = future.result()
                    if success:
                        processed_count += 1
                        pbar.set_postfix({"Success": processed_count, "Errors": error_count})
                    else:
                        error_count += 1
                        errors.append(message)
                        # Extract filename from error message for empty label creation
                        if "No valid contours found in" in message:
                            filename = message.split("No valid contours found in ")[-1]
                            failed_files.append(filename)
                        pbar.set_postfix({"Success": processed_count, "Errors": error_count})
                except Exception as e:
                    error_count += 1
                    errors.append(f"Unexpected error: {str(e)}")
                    pbar.set_postfix({"Success": processed_count, "Errors": error_count})
                
                pbar.update(1)
    
    # Create empty label files for images without valid contours
    if create_empty_labels and failed_files:
        print(f"\nCreating empty label files for {len(failed_files)} images without valid contours...")
        empty_labels_created = 0
        
        for filename in failed_files:
            empty_label_path = os.path.join(YOLO_ANNOTATIONS_OUTPUT_PATH, f"{filename}.txt")
            try:
                # Create empty label file
                with open(empty_label_path, 'w') as f:
                    pass  # Empty file
                empty_labels_created += 1
            except Exception as e:
                print(f"Failed to create empty label for {filename}: {e}")
        
        print(f"Created {empty_labels_created} empty label files")
        processed_count += empty_labels_created  # Update count to include empty labels
    
    # Print results
    print("\n" + "="*50)
    print("CONVERSION COMPLETED!")
    print("="*50)
    print(f"Successfully processed: {processed_count} files")
    print(f"Errors: {error_count - len(failed_files) if create_empty_labels else error_count} files")
    if create_empty_labels and failed_files:
        print(f"Empty labels created: {len(failed_files)} files (images without annotations)")
    print(f"Success rate: {processed_count/(len(mask_files))*100:.1f}%")
    print(f"YOLO labels saved to: {YOLO_ANNOTATIONS_OUTPUT_PATH}")
    if create_test_masks:
        print(f"Test masks saved to: {TEST_MASK_OUTPUT_PATH}")
    
    # Show first few errors if any (excluding "no contours" if empty labels were created)
    remaining_errors = [e for e in errors if not (create_empty_labels and "No valid contours found in" in e)]
    if remaining_errors:
        print(f"\nRemaining errors ({len(remaining_errors)}):")
        for error in remaining_errors[:5]:
            print(f"  - {error}")
        if len(remaining_errors) > 5:
            print(f"  ... and {len(remaining_errors) - 5} more errors")
    
    if create_empty_labels and failed_files:
        print(f"\nNote: {len(failed_files)} images had no valid annotations and got empty label files.")
        print("This is normal for datasets where some images contain no objects of interest.")

In [6]:
def analyze_failed_masks(ANNOTATIONS_BINARY_PNG_PATH, failed_filenames, sample_size=5):
    """
    Analyze why some masks failed to convert to YOLO format.
    
    Args:
        ANNOTATIONS_BINARY_PNG_PATH: Path to mask directory
        failed_filenames: List of filenames that failed
        sample_size: Number of files to analyze in detail
    """
    if not failed_filenames:
        print("No failed masks to analyze")
        return
    
    print(f"\nAnalyzing {min(sample_size, len(failed_filenames))} failed masks...")
    
    for i, filename in enumerate(failed_filenames[:sample_size]):
        mask_path = os.path.join(ANNOTATIONS_BINARY_PNG_PATH, f"{filename}.png")
        
        if not os.path.exists(mask_path):
            # Try other extensions
            for ext in ['.jpg', '.jpeg', '.bmp', '.tif', '.tiff']:
                alt_path = os.path.join(ANNOTATIONS_BINARY_PNG_PATH, f"{filename}{ext}")
                if os.path.exists(alt_path):
                    mask_path = alt_path
                    break
        
        if not os.path.exists(mask_path):
            print(f"  {filename}: File not found")
            continue
        
        try:
            # Read and analyze mask
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            if mask is None:
                print(f"  {filename}: Could not read image")
                continue
            
            height, width = mask.shape
            unique_values = np.unique(mask)
            foreground_pixels = np.sum(mask > 0)
            foreground_percentage = (foreground_pixels / (height * width)) * 100
            
            print(f"  {filename}:")
            print(f"    - Size: {width}x{height}")
            print(f"    - Unique values: {unique_values}")
            print(f"    - Foreground pixels: {foreground_pixels} ({foreground_percentage:.2f}%)")
            
            if foreground_pixels == 0:
                print("    - Issue: Completely empty mask (no annotations)")
            elif foreground_pixels < 10:
                print("    - Issue: Very few foreground pixels (likely noise)")
            else:
                # Check contours
                _, thresh = cv2.threshold(mask, 0.5, 255, cv2.THRESH_BINARY)
                contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                
                if contours:
                    contour_sizes = [len(c) for c in contours]
                    print(f"    - Contours found: {len(contours)}")
                    print(f"    - Contour sizes: {contour_sizes}")
                    print("    - Issue: Contours too small (< 3 points) or invalid shape")
                else:
                    print("    - Issue: No contours detected")
        
        except Exception as e:
            print(f"  {filename}: Error analyzing - {e}")

In [7]:
if CONVERT_BINARY_MASKS_TO_YOLO_FORMAT:
    print("Converting binary masks to YOLO format...")
    batch_convert_masks_to_yolo(
        ANNOTATIONS_BINARY_PNG_PATH=ANNOTATIONS_BINARY_PNG_PATH,
        YOLO_ANNOTATIONS_OUTPUT_PATH=YOLO_ANNOTATIONS_OUTPUT_PATH,
        TEST_MASK_OUTPUT_PATH=TEST_MASK_OUTPUT_PATH,
        create_test_masks=True,
        num_workers=16,
        create_empty_labels=True,
    )

Converting binary masks to YOLO format...
Searching for mask files...
Found 530 mask files to process...
Using 16 workers for parallel processing...


Converting masks:   0%|          | 0/530 [00:00<?, ?files/s]


Creating empty label files for 46 images without valid contours...
Created 46 empty label files

CONVERSION COMPLETED!
Successfully processed: 530 files
Errors: 0 files
Empty labels created: 46 files (images without annotations)
Success rate: 100.0%
YOLO labels saved to: datasets/supervisely/yolo_processed_20250619_151249/labels
Test masks saved to: datasets/supervisely/yolo_processed_20250619_151249/test_masks

Note: 46 images had no valid annotations and got empty label files.
This is normal for datasets where some images contain no objects of interest.


In [8]:
# Uncomment the lines below if you want to investigate the failed conversions
# failed_files = [
#     "24991113_tile_1_3_14c592", "25001124_tile_18_16_c9d875", 
#     "24971118_tile_15_17_5212bb", "25001121_tile_16_12_6e2b70", 
#     "24921119_tile_4_17_b09eb4"
# ]
# analyze_failed_masks(ANNOTATIONS_BINARY_PNG_PATH, failed_files)

## Split dataset

In [9]:
class FastYOLOAugmentationPipeline:
    """
    Optimized data augmentation pipeline for YOLO segmentation datasets
    """
    
    def __init__(self, augmentation_pipeline=None, num_augmentations=10):
        import albumentations as A
        
        if augmentation_pipeline is None:
            # Optimized pipeline - fewer heavy operations
            self.aug_pipeline = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.3),
                A.RandomRotate90(p=0.5),
                A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.4),
                # Removed heavy operations like blur for speed
            ], additional_targets={'mask': 'mask'})
        else:
            self.aug_pipeline = augmentation_pipeline
            
        self.num_augmentations = num_augmentations
    
    def yolo_label_to_mask_fast(self, label_path, img_width, img_height):
        """Optimized YOLO label to mask conversion"""
        mask = np.zeros((img_height, img_width), dtype=np.uint8)
        
        if not Path(label_path).exists() or Path(label_path).stat().st_size == 0:
            return mask
            
        try:
            with open(label_path, 'r') as f:
                content = f.read().strip()
            
            if not content:
                return mask
                
            lines = content.split('\n')
            
            for line in lines:
                if not line.strip():
                    continue
                    
                parts = line.split()
                if len(parts) < 7:
                    continue
                
                # Vectorized coordinate conversion
                coords = np.array([float(x) for x in parts[1:]])
                coords = coords.reshape(-1, 2)
                
                # Convert to pixel coordinates in one go
                pixel_coords = coords * np.array([img_width, img_height])
                pixel_coords = np.clip(pixel_coords, 0, [img_width-1, img_height-1]).astype(np.int32)
                
                if len(pixel_coords) >= 3:
                    cv2.fillPoly(mask, [pixel_coords], 255)
                    
        except Exception:
            pass  # Return empty mask on error
        
        return mask
    
    def mask_to_yolo_label_fast(self, mask, img_width, img_height, class_id=0):
        """Optimized mask to YOLO label conversion"""
        if mask.max() == 0:  # Empty mask
            return ""
            
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if not contours:
            return ""
        
        label_lines = []
        
        for contour in contours:
            # Less aggressive simplification for speed
            epsilon = 0.002 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)
            
            if len(approx) < 3 or cv2.contourArea(approx) < 5:
                continue
            
            # Vectorized coordinate normalization
            points = approx.reshape(-1, 2)
            normalized = points / np.array([img_width, img_height])
            normalized = np.clip(normalized, 0, 1)
            
            if len(normalized) >= 3:
                coords_str = ' '.join([f"{coord:.6f}" for coord in normalized.flatten()])
                label_lines.append(f"{class_id} {coords_str}")
        
        return '\n'.join(label_lines)
    
    def augment_single_image_batch(self, image_path, label_path, output_images_dir, output_labels_dir):
        """Process all augmentations for one image in a batch"""
        try:
            # Load image once
            image = cv2.imread(str(image_path))
            if image is None:
                return 0, self.num_augmentations, f"Could not load image: {image_path}"
            
            img_height, img_width = image.shape[:2]
            stem = Path(image_path).stem
            
            # Convert YOLO label to mask once
            mask = self.yolo_label_to_mask_fast(label_path, img_width, img_height)
            
            successful = 0
            failed = 0
            
            # Create all augmentations in batch
            for aug_idx in range(1, self.num_augmentations + 1):
                try:
                    # Apply augmentation
                    augmented = self.aug_pipeline(image=image, mask=mask)
                    aug_image = augmented['image']
                    aug_mask = augmented['mask']
                    
                    # Save files
                    aug_image_name = f"{stem}_aug{aug_idx}.png"
                    aug_label_name = f"{stem}_aug{aug_idx}.txt"
                    
                    aug_image_path = output_images_dir / aug_image_name
                    aug_label_path = output_labels_dir / aug_label_name
                    
                    # Faster image saving
                    cv2.imwrite(str(aug_image_path), aug_image, [cv2.IMWRITE_PNG_COMPRESSION, 1])
                    
                    # Convert and save label
                    yolo_label = self.mask_to_yolo_label_fast(aug_mask, img_width, img_height)
                    with open(aug_label_path, 'w') as f:
                        f.write(yolo_label)
                    
                    successful += 1
                    
                except Exception as e:
                    failed += 1
                    if failed <= 2:  # Limit error messages
                        logger.warning(f"Aug {aug_idx} failed for {stem}: {e}")
            
            return successful, failed, f"Processed {stem}"
            
        except Exception as e:
            return 0, self.num_augmentations, f"Error processing {Path(image_path).name}: {e}"
    
    def augment_dataset_folder_fast(self, images_dir, labels_dir, output_images_dir, output_labels_dir, 
                                   num_workers=None, use_threads=True):
        """
        Fast augmentation using parallel processing
        
        Args:
            use_threads: If True, use ThreadPoolExecutor (faster for I/O), 
                        if False, use ProcessPoolExecutor (better for CPU)
        """
        images_dir = Path(images_dir)
        labels_dir = Path(labels_dir)
        output_images_dir = Path(output_images_dir)
        output_labels_dir = Path(output_labels_dir)
        
        # Find all images
        image_files = []
        for ext in ['*.png', '*.jpg', '*.jpeg', '*.tif']:
            image_files.extend(images_dir.glob(ext))
        
        if not image_files:
            logger.warning(f"No images found in {images_dir}")
            return 0, 0
        
        logger.info(f"Found {len(image_files)} images to augment")
        logger.info(f"Creating {self.num_augmentations} versions each = {len(image_files) * self.num_augmentations} total")
        
        # Set optimal number of workers
        if num_workers is None:
            num_workers = min(multiprocessing.cpu_count(), len(image_files), 8)
        
        total_successful = 0
        total_failed = 0
        
        start_time = time.time()
        
        if use_threads and len(image_files) > 1:
            # Use ThreadPoolExecutor for I/O-bound operations
            logger.info(f"Using {num_workers} threads for parallel augmentation")
            
            with ThreadPoolExecutor(max_workers=num_workers) as executor:
                # Submit all tasks
                future_to_image = {}
                for image_path in image_files:
                    label_path = labels_dir / f"{image_path.stem}.txt"
                    future = executor.submit(
                        self.augment_single_image_batch,
                        image_path, label_path, output_images_dir, output_labels_dir
                    )
                    future_to_image[future] = image_path
                
                # Process results with progress bar
                with tqdm(total=len(image_files), desc="Augmenting images (threaded)") as pbar:
                    for future in as_completed(future_to_image):
                        try:
                            successful, failed, message = future.result(timeout=60)
                            total_successful += successful
                            total_failed += failed
                            pbar.update(1)
                            pbar.set_postfix({
                                "Success": total_successful, 
                                "Failed": total_failed,
                                "Rate": f"{total_successful/(time.time()-start_time):.1f}/s"
                            })
                        except Exception as e:
                            total_failed += self.num_augmentations
                            logger.error(f"Task failed: {e}")
                            pbar.update(1)
        
        else:
            # Fallback to sequential processing
            logger.info("Using sequential processing")
            
            for image_path in tqdm(image_files, desc="Augmenting images (sequential)"):
                label_path = labels_dir / f"{image_path.stem}.txt"
                successful, failed, message = self.augment_single_image_batch(
                    image_path, label_path, output_images_dir, output_labels_dir
                )
                total_successful += successful
                total_failed += failed
        
        duration = time.time() - start_time
        rate = total_successful / duration if duration > 0 else 0
        
        logger.info(f"Augmentation completed in {duration:.1f}s")
        logger.info(f"Rate: {rate:.1f} augmentations/second")
        logger.info(f"Results: {total_successful} successful, {total_failed} failed")
        
        return total_successful, total_failed

In [10]:
def read_image_list(txt_path):
    """Read image filenames from txt file."""
    with open(txt_path, 'r') as f:
        return [line.strip() for line in f if line.strip()]

def get_image_name_without_ext(filename):
    """Get image name without extension."""
    return Path(filename).stem

def create_directory_structure(base_path):
    """Create train/val/test directory structure with images and labels subdirs."""
    for split in ['train', 'val', 'test']:
        for subdir in ['images', 'labels']:
            dir_path = Path(base_path) / split / subdir
            dir_path.mkdir(parents=True, exist_ok=True)

def convert_tiff_to_png(tiff_path, png_path, quality=95):
    """Convert TIFF image to high-quality PNG."""
    try:
        with Image.open(tiff_path) as img:
            # Convert to RGB if needed (TIFF might be in different color modes)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            
            # Save as PNG with high quality
            img.save(png_path, 'PNG', optimize=True, compress_level=1)
        return True
    except Exception as e:
        logger.warning(f"Failed to convert {tiff_path} to {png_path}: {e}")
        return False

def copy_label_file(source_labels_dir, target_labels_dir, image_filename):
    """Copy corresponding label file for an image."""
    # Get image name without extension
    image_name = get_image_name_without_ext(image_filename)
    
    # YOLO labels have exact same name as images (just .txt extension)
    label_filename = f"{image_name}.txt"
    
    source_label = Path(source_labels_dir) / label_filename
    target_label = Path(target_labels_dir) / label_filename
    
    if source_label.exists():
        shutil.copy2(source_label, target_label)
        return True
    else:
        logger.warning(f"Label file not found: {source_label}")
        return False

def validate_setup():
    """Validate that all paths and files exist."""
    logger.info("Validating setup for YOLO dataset creation...")
    
    all_good = True
    
    # Check original dataset path for reference
    if not Path(DATASET_PATH).exists():
        logger.warning(f"Original dataset path does not exist: {DATASET_PATH}")
        all_good = False
    else:
        logger.success(f"Original dataset path exists: {DATASET_PATH}")
    
    # Check YOLO labels directory (main requirement)
    yolo_labels_dir = Path(YOLO_ANNOTATIONS_OUTPUT_PATH)
    if not yolo_labels_dir.exists():
        logger.error(f"YOLO labels directory does not exist: {yolo_labels_dir}")
        logger.error("Please run the binary mask to YOLO conversion first!")
        all_good = False
    else:
        label_count = len(list(yolo_labels_dir.glob("*.txt")))
        logger.success(f"YOLO labels directory exists: {yolo_labels_dir} ({label_count} label files)")
    
    # Check image path
    if not Path(IMG_DATASET_PATH).exists():
        logger.warning(f"Image dataset path does not exist: {IMG_DATASET_PATH}")
        all_good = False
    else:
        image_count = len(list(Path(IMG_DATASET_PATH).glob("*.tif")))  # Fixed: only .tif
        logger.success(f"Image dataset path exists: {IMG_DATASET_PATH} ({image_count} image files)")
    
    # Check/create processed output path
    if not Path(CREATE_DATASET_PROCESSED_PATH).exists():
        logger.info(f"Creating processed dataset directory: {CREATE_DATASET_PROCESSED_PATH}")
        Path(CREATE_DATASET_PROCESSED_PATH).mkdir(parents=True, exist_ok=True)
    else:
        logger.success(f"Processed dataset path exists: {CREATE_DATASET_PROCESSED_PATH}")
    
    # Check fold txt files
    total_fold_images = 0
    for fold_num, txt_path in FOLD_PATHS.items():
        if not Path(txt_path).exists():
            logger.warning(f"Fold {fold_num} txt file does not exist: {txt_path}")
            all_good = False
        else:
            images = read_image_list(txt_path)
            total_fold_images += len(images)
            logger.success(f"Fold {fold_num}: {len(images)} images")
    
    # Check test file
    if not Path(TEST_DATASET_TXT_PATH).exists():
        logger.warning(f"Test dataset txt file does not exist: {TEST_DATASET_TXT_PATH}")
        all_good = False
    else:
        test_images = read_image_list(TEST_DATASET_TXT_PATH)
        logger.success(f"Test set: {len(test_images)} images")
        logger.info(f"Total dataset size: {total_fold_images + len(test_images)} images")
    
    # Validate that images and labels match (critical check)
    if Path(YOLO_ANNOTATIONS_OUTPUT_PATH).exists() and FOLD_PATHS.get(0) and Path(FOLD_PATHS[0]).exists():
        logger.info("Validating image-label correspondence...")
        
        sample_images = read_image_list(FOLD_PATHS[0])[:10]  # Check first 10 images
        missing_labels = []
        missing_images = []
        
        for image_filename in sample_images:
            image_name = get_image_name_without_ext(image_filename)
            
            # Check label exists
            label_file = Path(YOLO_ANNOTATIONS_OUTPUT_PATH) / f"{image_name}.txt"
            if not label_file.exists():
                missing_labels.append(f"{image_name}.txt")
            
            # Check image exists (only .tif)
            tif_file = Path(IMG_DATASET_PATH) / f"{image_name}.tif"
            if not tif_file.exists():
                missing_images.append(f"{image_name}.tif")
        
        if missing_labels:
            logger.error(f"Missing label files: {missing_labels}")
            all_good = False
        
        if missing_images:
            logger.error(f"Missing image files: {missing_images}")
            all_good = False
        
        if not missing_labels and not missing_images:
            logger.success("Image-label correspondence validation passed!")
    
    # Summary
    if all_good:
        logger.success("All validations passed! Ready to create cross-validation datasets.")
        logger.info(f"Output will be saved to: {CREATE_DATASET_PROCESSED_PATH}")
    else:
        logger.error("Setup validation failed. Please fix the issues above before proceeding.")
    
    return all_good

def process_image_list(image_list, split_name, target_base_dir):
    """Process a list of images for a specific split (train/val/test)."""
    # Use YOLO labels directory
    source_labels_dir = Path(YOLO_ANNOTATIONS_OUTPUT_PATH)
    target_images_dir = Path(target_base_dir) / split_name / "images"
    target_labels_dir = Path(target_base_dir) / split_name / "labels"
    
    successful_copies = 0
    failed_copies = 0
    
    # Use tqdm for progress bar
    for image_filename in tqdm(image_list, desc=f"Processing {split_name}", leave=False):
        image_name = get_image_name_without_ext(image_filename)
        
        # Source TIF image path (only .tif extension)
        source_tif = Path(IMG_DATASET_PATH) / f"{image_name}.tif"
        
        # Target PNG image path
        target_png = target_images_dir / f"{image_name}.png"
        
        # Convert TIF to PNG and copy label
        if source_tif.exists():
            if convert_tiff_to_png(source_tif, target_png):
                # Copy corresponding label file
                if copy_label_file(source_labels_dir, target_labels_dir, image_filename):
                    successful_copies += 1
                else:
                    failed_copies += 1
            else:
                failed_copies += 1
        else:
            logger.warning(f"Source image not found: {image_name}.tif")
            failed_copies += 1
    
    logger.info(f"{split_name}: {successful_copies} successful, {failed_copies} failed")
    return successful_copies, failed_copies

def create_single_fold_dataset(val_fold, fold_data, test_data):
    """Create a single cross-validation dataset."""
    dataset_name = f"fold_{val_fold}_dataset"
    # Use the updated CREATE_DATASET_PROCESSED_PATH for output
    dataset_path = Path(CREATE_DATASET_PROCESSED_PATH) / dataset_name
    
    logger.info(f"Creating {dataset_name}")
    
    # Create directory structure
    create_directory_structure(dataset_path)
    
    # Validation data: current fold
    val_data = fold_data[val_fold]
    
    # Training data: all other folds
    train_data = []
    for fold_num, images in fold_data.items():
        if fold_num != val_fold:
            train_data.extend(images)
    
    logger.info(f"Training: {len(train_data)} images")
    logger.info(f"Validation: {len(val_data)} images")
    logger.info(f"Test: {len(test_data)} images")
    
    # Process each split
    start_time = time.time()
    
    train_success, train_fail = process_image_list(train_data, "train", dataset_path)
    val_success, val_fail = process_image_list(val_data, "val", dataset_path)
    test_success, test_fail = process_image_list(test_data, "test", dataset_path)
    
    end_time = time.time()
    duration = end_time - start_time
    
    total_success = train_success + val_success + test_success
    total_fail = train_fail + val_fail + test_fail
    
    logger.success(f"{dataset_name} completed in {duration:.1f}s")
    logger.success(f"Total: {total_success} successful, {total_fail} failed")
    
    return total_success, total_fail

def create_single_fold_dataset_with_augmentation_fast(val_fold, fold_data, test_data):
    """
    Fast dataset creation with optimized augmentation
    """
    # First create normal dataset
    total_success, total_fail = create_single_fold_dataset(val_fold, fold_data, test_data)
    
    # Then apply augmentation if enabled
    if APPLY_AUGMENTATION:
        logger.info(f"Applying FAST augmentation to fold {val_fold} training set...")
        
        dataset_path = Path(CREATE_DATASET_PROCESSED_PATH) / f"fold_{val_fold}_dataset"
        train_images_dir = dataset_path / "train" / "images"
        train_labels_dir = dataset_path / "train" / "labels"
        
        if train_images_dir.exists() and train_labels_dir.exists():
            # Count original images
            original_count = len(list(train_images_dir.glob("*.png")))
            logger.info(f"  Original training images: {original_count}")
            
            # Initialize fast augmenter
            augmenter = FastYOLOAugmentationPipeline(
                augmentation_pipeline=AUGMENTATION_PIPELINE,
                num_augmentations=NUM_AUGMENTATIONS_PER_IMAGE
            )
            
            # Apply fast augmentation
            start_time = time.time()
            aug_success, aug_fail = augmenter.augment_dataset_folder_fast(
                images_dir=train_images_dir,
                labels_dir=train_labels_dir,
                output_images_dir=train_images_dir,  # In-place
                output_labels_dir=train_labels_dir,  # In-place
                num_workers=AUGMENTATION_WORKERS,
                use_threads=True  # Set to False for CPU-intensive augmentations
            )
            
            duration = time.time() - start_time
            final_count = len(list(train_images_dir.glob("*.png")))
            
            logger.success(f"Fold {val_fold} augmentation completed in {duration:.1f}s:")
            logger.success(f"  Created: {aug_success} augmented images")
            logger.success(f"  Failed: {aug_fail} augmentations")
            logger.success(f"  Final training set: {final_count} images ({original_count} → {final_count})")
            
            total_success += aug_success
            total_fail += aug_fail
        else:
            logger.warning(f"Training directories not found for fold {val_fold}")
    
    return total_success, total_fail

def create_cross_validation_datasets(APPLY_AUGMENTATION):
    """Create 5 cross-validation datasets with optional augmentation."""
    
    if not SPLIT_DATASET:
        logger.warning("Dataset creation is disabled. Set SPLIT_DATASET = True to enable.")
        return
    
    logger.info("Starting cross-validation dataset creation...")
    if APPLY_AUGMENTATION:
        logger.info("Augmentation is ENABLED - will augment training sets")
        logger.info(f"Augmentations per image: {NUM_AUGMENTATIONS_PER_IMAGE}")
    else:
        logger.info("Augmentation is DISABLED")
    
    start_time = time.time()
    
    # Read all fold datasets
    fold_data = {}
    for fold_num, txt_path in FOLD_PATHS.items():
        fold_data[fold_num] = read_image_list(txt_path)
        logger.info(f"Fold {fold_num}: {len(fold_data[fold_num])} images")
    
    # Read test dataset
    test_data = read_image_list(TEST_DATASET_TXT_PATH)
    logger.info(f"Test set: {len(test_data)} images")
    
    # Create 5 cross-validation datasets
    total_success = 0
    total_fail = 0
    
    for val_fold in range(5):
        logger.info(f"\n{'='*60}")
        logger.info(f"PROCESSING FOLD {val_fold}")
        logger.info(f"{'='*60}")
        
        if APPLY_AUGMENTATION:
            logger.info(f"Creating dataset for fold {val_fold} WITH augmentation...")
            success, fail = create_single_fold_dataset_with_augmentation_fast(val_fold, fold_data, test_data)
        else:
            logger.info(f"Creating dataset for fold {val_fold} WITHOUT augmentation...")
            success, fail = create_single_fold_dataset(val_fold, fold_data, test_data)
        
        total_success += success
        total_fail += fail
        
        logger.info(f"Fold {val_fold} completed: {success} successful, {fail} failed")
    
    end_time = time.time()
    total_duration = end_time - start_time
    
    logger.success("\n" + "="*60)
    logger.success("ALL CROSS-VALIDATION DATASETS COMPLETED!")
    logger.success("="*60)
    logger.success(f"Total time: {total_duration/60:.1f} minutes")
    logger.success(f"Overall: {total_success} successful, {total_fail} failed")
    
    if APPLY_AUGMENTATION:
        logger.success(f"Augmentation applied to all training sets with {NUM_AUGMENTATIONS_PER_IMAGE} versions per image")
    
    return total_success, total_fail

In [11]:
if SPLIT_DATASET:
    if validate_setup():
        # Create cross-validation datasets
        create_cross_validation_datasets(APPLY_AUGMENTATION=APPLY_AUGMENTATION)
    else:
        logger.error("Setup validation failed. Please fix the issues before proceeding.")

[32m2025-06-19 15:12:53.045[0m | [1mINFO    [0m | [36m__main__[0m:[36mvalidate_setup[0m:[36m52[0m - [1mValidating setup for YOLO dataset creation...[0m
[32m2025-06-19 15:12:53.046[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mvalidate_setup[0m:[36m61[0m - [32m[1mOriginal dataset path exists: datasets/supervisely/341575_free_space_rooftop_geneva_20250511_yolo[0m
[32m2025-06-19 15:12:53.048[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mvalidate_setup[0m:[36m71[0m - [32m[1mYOLO labels directory exists: datasets/supervisely/yolo_processed_20250619_151249/labels (530 label files)[0m
[32m2025-06-19 15:12:53.050[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mvalidate_setup[0m:[36m79[0m - [32m[1mImage dataset path exists: datasets/supervisely/dataset_processed_20250523-173715/images (530 image files)[0m
[32m2025-06-19 15:12:53.050[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mvalidate_setup[0m:[36m86[0m - [32m[1mProcessed datase

Processing train:   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:14:25.881[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtrain: 353 successful, 0 failed[0m


Processing val:   0%|          | 0/88 [00:00<?, ?it/s]

[32m2025-06-19 15:14:49.235[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mval: 88 successful, 0 failed[0m


Processing test:   0%|          | 0/89 [00:00<?, ?it/s]

[32m2025-06-19 15:15:18.876[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtest: 89 successful, 0 failed[0m
[32m2025-06-19 15:15:18.877[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m223[0m - [32m[1mfold_0_dataset completed in 145.8s[0m
[32m2025-06-19 15:15:18.877[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m224[0m - [32m[1mTotal: 530 successful, 0 failed[0m
[32m2025-06-19 15:15:18.877[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m237[0m - [1mApplying FAST augmentation to fold 0 training set...[0m
[32m2025-06-19 15:15:18.879[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m246[0m - [1m  Original training images: 353[0m
[32m2025-06-19 15:15:18.881[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast

Augmenting images (threaded):   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:19:05.815[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m229[0m - [1mAugmentation completed in 226.9s[0m
[32m2025-06-19 15:19:05.818[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m230[0m - [1mRate: 15.6 augmentations/second[0m
[32m2025-06-19 15:19:05.818[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m231[0m - [1mResults: 3530 successful, 0 failed[0m
[32m2025-06-19 15:19:05.829[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m268[0m - [32m[1mFold 0 augmentation completed in 226.9s:[0m
[32m2025-06-19 15:19:05.830[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m269[0m - [32m[1m  Created: 3530 augmented images[0m
[32m2025-06-19 15:19:05.830[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_d

Processing train:   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:20:35.240[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtrain: 353 successful, 0 failed[0m


Processing val:   0%|          | 0/88 [00:00<?, ?it/s]

[32m2025-06-19 15:21:00.676[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mval: 88 successful, 0 failed[0m


Processing test:   0%|          | 0/89 [00:00<?, ?it/s]

[32m2025-06-19 15:21:30.106[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtest: 89 successful, 0 failed[0m
[32m2025-06-19 15:21:30.107[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m223[0m - [32m[1mfold_1_dataset completed in 144.3s[0m
[32m2025-06-19 15:21:30.107[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m224[0m - [32m[1mTotal: 530 successful, 0 failed[0m
[32m2025-06-19 15:21:30.107[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m237[0m - [1mApplying FAST augmentation to fold 1 training set...[0m
[32m2025-06-19 15:21:30.109[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m246[0m - [1m  Original training images: 353[0m
[32m2025-06-19 15:21:30.111[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast

Augmenting images (threaded):   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:25:21.634[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m229[0m - [1mAugmentation completed in 231.5s[0m
[32m2025-06-19 15:25:21.635[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m230[0m - [1mRate: 15.2 augmentations/second[0m
[32m2025-06-19 15:25:21.636[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m231[0m - [1mResults: 3530 successful, 0 failed[0m
[32m2025-06-19 15:25:21.648[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m268[0m - [32m[1mFold 1 augmentation completed in 231.5s:[0m
[32m2025-06-19 15:25:21.648[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m269[0m - [32m[1m  Created: 3530 augmented images[0m
[32m2025-06-19 15:25:21.649[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_d

Processing train:   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:26:55.310[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtrain: 353 successful, 0 failed[0m


Processing val:   0%|          | 0/88 [00:00<?, ?it/s]

[32m2025-06-19 15:27:16.302[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mval: 88 successful, 0 failed[0m


Processing test:   0%|          | 0/89 [00:00<?, ?it/s]

[32m2025-06-19 15:27:45.809[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtest: 89 successful, 0 failed[0m
[32m2025-06-19 15:27:45.810[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m223[0m - [32m[1mfold_2_dataset completed in 144.2s[0m
[32m2025-06-19 15:27:45.811[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m224[0m - [32m[1mTotal: 530 successful, 0 failed[0m
[32m2025-06-19 15:27:45.811[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m237[0m - [1mApplying FAST augmentation to fold 2 training set...[0m
[32m2025-06-19 15:27:45.813[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m246[0m - [1m  Original training images: 353[0m
[32m2025-06-19 15:27:45.815[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast

Augmenting images (threaded):   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:31:30.815[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m229[0m - [1mAugmentation completed in 225.0s[0m
[32m2025-06-19 15:31:30.816[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m230[0m - [1mRate: 15.7 augmentations/second[0m
[32m2025-06-19 15:31:30.816[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m231[0m - [1mResults: 3530 successful, 0 failed[0m
[32m2025-06-19 15:31:30.828[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m268[0m - [32m[1mFold 2 augmentation completed in 225.0s:[0m
[32m2025-06-19 15:31:30.828[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m269[0m - [32m[1m  Created: 3530 augmented images[0m
[32m2025-06-19 15:31:30.829[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_d

Processing train:   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:33:01.871[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtrain: 353 successful, 0 failed[0m


Processing val:   0%|          | 0/88 [00:00<?, ?it/s]

[32m2025-06-19 15:33:25.584[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mval: 88 successful, 0 failed[0m


Processing test:   0%|          | 0/89 [00:00<?, ?it/s]

[32m2025-06-19 15:33:54.998[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtest: 89 successful, 0 failed[0m
[32m2025-06-19 15:33:54.999[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m223[0m - [32m[1mfold_3_dataset completed in 144.2s[0m
[32m2025-06-19 15:33:54.999[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m224[0m - [32m[1mTotal: 530 successful, 0 failed[0m
[32m2025-06-19 15:33:54.999[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m237[0m - [1mApplying FAST augmentation to fold 3 training set...[0m
[32m2025-06-19 15:33:55.001[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m246[0m - [1m  Original training images: 353[0m
[32m2025-06-19 15:33:55.003[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast

Augmenting images (threaded):   0%|          | 0/353 [00:00<?, ?it/s]

[32m2025-06-19 15:37:44.948[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m229[0m - [1mAugmentation completed in 229.9s[0m
[32m2025-06-19 15:37:44.948[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m230[0m - [1mRate: 15.4 augmentations/second[0m
[32m2025-06-19 15:37:44.949[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m231[0m - [1mResults: 3530 successful, 0 failed[0m
[32m2025-06-19 15:37:44.960[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m268[0m - [32m[1mFold 3 augmentation completed in 229.9s:[0m
[32m2025-06-19 15:37:44.960[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m269[0m - [32m[1m  Created: 3530 augmented images[0m
[32m2025-06-19 15:37:44.961[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_d

Processing train:   0%|          | 0/352 [00:00<?, ?it/s]

[32m2025-06-19 15:39:18.025[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtrain: 352 successful, 0 failed[0m


Processing val:   0%|          | 0/89 [00:00<?, ?it/s]

[32m2025-06-19 15:39:39.536[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mval: 89 successful, 0 failed[0m


Processing test:   0%|          | 0/89 [00:00<?, ?it/s]

[32m2025-06-19 15:40:08.932[0m | [1mINFO    [0m | [36m__main__[0m:[36mprocess_image_list[0m:[36m183[0m - [1mtest: 89 successful, 0 failed[0m
[32m2025-06-19 15:40:08.933[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m223[0m - [32m[1mfold_4_dataset completed in 144.0s[0m
[32m2025-06-19 15:40:08.933[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset[0m:[36m224[0m - [32m[1mTotal: 530 successful, 0 failed[0m
[32m2025-06-19 15:40:08.933[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m237[0m - [1mApplying FAST augmentation to fold 4 training set...[0m
[32m2025-06-19 15:40:08.935[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m246[0m - [1m  Original training images: 352[0m
[32m2025-06-19 15:40:08.937[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast

Augmenting images (threaded):   0%|          | 0/352 [00:00<?, ?it/s]

[32m2025-06-19 15:43:55.324[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m229[0m - [1mAugmentation completed in 226.4s[0m
[32m2025-06-19 15:43:55.326[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m230[0m - [1mRate: 15.5 augmentations/second[0m
[32m2025-06-19 15:43:55.326[0m | [1mINFO    [0m | [36m__main__[0m:[36maugment_dataset_folder_fast[0m:[36m231[0m - [1mResults: 3520 successful, 0 failed[0m
[32m2025-06-19 15:43:55.337[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m268[0m - [32m[1mFold 4 augmentation completed in 226.4s:[0m
[32m2025-06-19 15:43:55.338[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_dataset_with_augmentation_fast[0m:[36m269[0m - [32m[1m  Created: 3520 augmented images[0m
[32m2025-06-19 15:43:55.338[0m | [32m[1mSUCCESS [0m | [36m__main__[0m:[36mcreate_single_fold_d

## Train

In [12]:
class YOLOSegmentationTrainer:
    """
    Transfer learning pipeline for YOLO segmentation models.
    """
    
    def __init__(self, 
                 dataset_processed_path: str,
                 model_name: str = "yolo11n-seg.pt",
                 project_name: str = "rooftop_segmentation",
                 output_dir: str = None,
                 device: str = "auto"):
        """
        Initialize the trainer.
        
        Args:
            dataset_processed_path: Path to processed CV datasets
            model_name: Pre-trained model to use (must be segmentation model)
            project_name: Project name for saving results
            output_dir: Custom directory to save training results (default: current directory)
            device: Device to use ('auto', 'cpu', '0', '1', etc.)
        """
        self.dataset_processed_path = Path(dataset_processed_path)
        self.model_name = model_name
        self.project_name = project_name
        self.device = device
        
        # Set up output directory
        if output_dir:
            self.output_dir = Path(output_dir)
            self.output_dir.mkdir(parents=True, exist_ok=True)
            self.full_project_path = self.output_dir / project_name
        else:
            self.output_dir = Path(".")
            self.full_project_path = Path(project_name)
        
        # Training parameters
        self.training_params = {
            'epochs': 100,
            'imgsz': 640,
            'batch': 16,
            'lr0': 0.01,
            'lrf': 0.1,
            'momentum': 0.937,
            'weight_decay': 0.0005,
            'warmup_epochs': 3,
            'warmup_momentum': 0.8,
            'warmup_bias_lr': 0.1,
            'patience': 10,
            'save_period': 10,
            'cache': False,
            'workers': 8,
            'close_mosaic': 10,
        }
        
        # Results storage
        self.results = {}
        self.cv_results = []

        
        logger.info("Initialized YOLO Segmentation Trainer")
        logger.info(f"Model: {model_name}")
        logger.info(f"Device: {device}")
        logger.info(f"Dataset path: {dataset_processed_path}")
        logger.info(f"Output directory: {self.full_project_path}")
        
        # Check if dataset path exists
        if not self.dataset_processed_path.exists():
            logger.error(f"Dataset path not found: {self.dataset_processed_path}")
        else:
            logger.success(f"Dataset path found: {self.dataset_processed_path}")
    
    def _cleanup_project_directory(self):
        """Clean up project directory to avoid auto-increment naming issues."""
        if self.full_project_path.exists():
            logger.info(f"Cleaning up existing project directory: {self.full_project_path}")
            shutil.rmtree(self.full_project_path)
            logger.info("Project directory cleaned up")
    
    def create_dataset_yaml(self, fold_path: Path, class_names: list = None):
        """
        Create YOLO dataset configuration file.
        
        Args:
            fold_path: Path to specific fold dataset
            class_names: List of class names (default: ["free_space"])
        """
        if class_names is None:
            class_names = ["free_space"]
        
        dataset_config = {
            'path': str(fold_path.absolute()),
            'train': 'train/images',
            'val': 'val/images', 
            'test': 'test/images',
            'nc': len(class_names),
            'names': {i: name for i, name in enumerate(class_names)}
        }
        
        yaml_path = fold_path / 'dataset.yaml'
        with open(yaml_path, 'w') as f:
            yaml.dump(dataset_config, f, default_flow_style=False)
        
        logger.info(f"Created dataset config: {yaml_path}")
        return yaml_path
    
    def train_single_fold(self, 
                         fold_num: int, 
                         class_names: list = None,
                         custom_params: dict = None):
        """
        Train on a single cross-validation fold.
        
        Args:
            fold_num: Fold number (0-4)
            class_names: List of class names
            custom_params: Custom training parameters
        """
        logger.info(f"Starting training for fold {fold_num}")
        
        # Setup paths
        fold_path = self.dataset_processed_path / f"fold_{fold_num}_dataset"
        
        if not fold_path.exists():
            raise ValueError(f"Fold dataset not found: {fold_path}")
        
        # Use existing dataset.yaml if it exists, otherwise create one
        yaml_path = fold_path / 'dataset.yaml'
        if not yaml_path.exists():
            yaml_path = self.create_dataset_yaml(fold_path, class_names)
        else:
            logger.info(f"Using existing dataset config: {yaml_path}")
        
        # Load pre-trained model with validation
        model = YOLO(self.model_name)
        logger.info(f"Loaded pre-trained model: {self.model_name}")
        
        # Merge custom parameters
        train_params = self.training_params.copy()
        if custom_params:
            train_params.update(custom_params)
        
        # Clean up any existing folder to avoid auto-increment naming
        fold_dir = self.full_project_path / f"fold_{fold_num}"
        if fold_dir.exists():
            logger.info(f"Removing existing fold directory: {fold_dir}")
            shutil.rmtree(fold_dir)
        
        # Train the model
        start_time = time.time()
        
        results = model.train(
            data=str(yaml_path),
            project=str(self.output_dir),  # Use output directory
            name=str(self.full_project_path.name) + f"/fold_{fold_num}",  # Include project name in the path
            device=self.device,
            exist_ok=True,  # Allow overwriting
            **train_params
        )
        
        training_time = time.time() - start_time
        
        # Validate the model on the best checkpoint
        val_results = model.val(data=str(yaml_path))
        
        # Extract comprehensive results from training (with API compatibility)
        try:
            # Try to get metrics - handle different YOLO versions
            if hasattr(val_results, 'seg'):
                # Segmentation metrics
                metrics = {
                    'mAP50': float(val_results.seg.map50),
                    'mAP50-95': float(val_results.seg.map),
                    'precision': float(val_results.seg.mp),
                    'recall': float(val_results.seg.mr),
                }
            elif hasattr(val_results, 'box'):
                # Detection metrics (fallback)
                logger.warning(f"Using detection metrics for fold {fold_num} - ensure you're using a segmentation model!")
                metrics = {
                    'mAP50': float(val_results.box.map50),
                    'mAP50-95': float(val_results.box.map),
                    'precision': float(val_results.box.mp),
                    'recall': float(val_results.box.mr),
                }
            else:
                # Fallback metrics
                logger.warning(f"Could not extract detailed metrics for fold {fold_num}")
                metrics = {
                    'mAP50': 0.0,
                    'mAP50-95': 0.0,
                    'precision': 0.0,
                    'recall': 0.0,
                }
            
            # Extract training info with safe attribute access
            total_epochs = len(getattr(results, 'results', [])) if hasattr(results, 'results') else getattr(results, 'epochs', 0)
            best_epoch = getattr(results, 'best_epoch', total_epochs)
            best_fitness = float(getattr(results, 'best_fitness', 0.0))
            
            # FIXED: Get save directory with proper API compatibility
            if hasattr(results, 'save_dir'):
                save_dir = str(results.save_dir)
            else:
                # Fallback: construct expected path
                save_dir = str(self.full_project_path / f"fold_{fold_num}")
            
            # Try to get training losses safely
            training_metrics = {}
            if hasattr(results, 'results') and results.results and len(results.results) > 0:
                try:
                    last_result = results.results[-1]
                    if isinstance(last_result, (list, tuple)) and len(last_result) >= 2:
                        training_metrics = {
                            'final_train_loss': float(last_result[0]),
                            'final_val_loss': float(last_result[1]),
                            'convergence_epoch': best_epoch,
                        }
                except (IndexError, TypeError, ValueError):
                    pass
            
            fold_results = {
                'fold': fold_num,
                'training_time': training_time,
                'total_epochs': total_epochs,
                'best_epoch': best_epoch,
                'best_fitness': best_fitness,
                'final_metrics': metrics,
                'training_metrics': training_metrics,
                'model_path': str(Path(save_dir) / "weights" / "best.pt"),
                'results_dir': save_dir,
                'plots_saved': True if Path(save_dir).exists() else False
            }
            
        except Exception as e:
            logger.error(f"Error extracting results for fold {fold_num}: {e}")
            # Import traceback for detailed error info
            import traceback
            logger.error(f"Full traceback: {traceback.format_exc()}")
            
            # Minimal fallback results
            save_dir = str(self.full_project_path / f"fold_{fold_num}")
            fold_results = {
                'fold': fold_num,
                'training_time': training_time,
                'total_epochs': 0,
                'best_epoch': 0,
                'best_fitness': 0.0,
                'final_metrics': {'mAP50': 0.0, 'mAP50-95': 0.0, 'precision': 0.0, 'recall': 0.0},
                'training_metrics': {},
                'model_path': str(Path(save_dir) / "weights" / "best.pt"),
                'results_dir': save_dir,
                'plots_saved': False
            }
        
        self.cv_results.append(fold_results)
        
        # Log detailed results
        self._log_fold_results(fold_results, results)
        
        logger.success(f"Fold {fold_num} completed in {training_time/60:.1f} minutes")
        logger.info(f"Best mAP50: {fold_results['final_metrics']['mAP50']:.4f}")
        
        return fold_results
    
    def _log_fold_results(self, fold_results, training_results):
        """Log detailed results from training."""
        logger.info("=" * 60)
        logger.info(f"FOLD {fold_results['fold']} TRAINING SUMMARY")
        logger.info("=" * 60)
        logger.info(f"Training time: {fold_results['training_time']/60:.1f} minutes")
        logger.info(f"Total epochs: {fold_results['total_epochs']}")
        logger.info(f"Best epoch: {fold_results['best_epoch']}")
        logger.info(f"Best fitness: {fold_results['best_fitness']:.4f}")
        logger.info("Final Metrics:")
        for metric, value in fold_results['final_metrics'].items():
            logger.info(f"  {metric}: {value:.4f}")
        logger.info(f"Results saved to: {fold_results['results_dir']}")
        logger.info("=" * 60)
    
    def train_cross_validation(self, 
                              folds: list = None,
                              class_names: list = None,
                              custom_params: dict = None):
        """
        Train all cross-validation folds.
        
        Args:
            folds: List of fold numbers to train (default: [0,1,2,3,4])
            class_names: List of class names
            custom_params: Custom training parameters
        """
        if folds is None:
            folds = list(range(5))
        
        logger.info(f"Starting cross-validation training for folds: {folds}")
        
        # Clean up any existing project directory to avoid naming conflicts
        self._cleanup_project_directory()
        
        for fold_num in folds:
            try:
                self.train_single_fold(fold_num, class_names, custom_params)
            except Exception as e:
                logger.error(f"Failed to train fold {fold_num}: {e}")
                import traceback
                logger.error(f"Full traceback: {traceback.format_exc()}")
                continue
        
        # Analyze results
        self.analyze_cv_results()
        
        logger.success("Cross-validation training completed!")
    
    def analyze_cv_results(self):
        """Analyze cross-validation results."""
        if not self.cv_results:
            logger.warning("No CV results to analyze")
            return
        
        # Convert to DataFrame
        df = pd.DataFrame(self.cv_results)
        
        # Calculate statistics
        metrics = ['mAP50', 'mAP50-95', 'precision', 'recall']
        stats = {}
        
        for metric in metrics:
            values = [r['final_metrics'][metric] for r in self.cv_results]
            stats[metric] = {
                'mean': np.mean(values),
                'std': np.std(values),
                'min': np.min(values),
                'max': np.max(values)
            }
        
        # Log summary
        logger.info("Cross-Validation Results Summary:")
        logger.info("-" * 50)
        for metric, stat in stats.items():
            logger.info(f"{metric:12s}: {stat['mean']:.4f} ± {stat['std']:.4f} "
                       f"(min: {stat['min']:.4f}, max: {stat['max']:.4f})")
        
        # Create visualization
        self.plot_cv_results(df)
        
        # Save results
        self.save_cv_results(df, stats)
        
        # Plot training curves if available
        self.plot_training_curves()
        
        return stats
    
    def plot_training_curves(self):
        """Plot training curves for all folds with API compatibility."""
        if not self.cv_results:
            return
            
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        axes = axes.flatten()
        
        for i, fold_result in enumerate(self.cv_results):
            # Try to load training results
            results_dir = Path(fold_result['results_dir'])
            results_csv = results_dir / 'results.csv'
            
            if results_csv.exists():
                try:
                    df = pd.read_csv(results_csv)
                    
                    # Plot training curves
                    if len(df) > 1:
                        color = f'C{i}'
                        label = f"Fold {fold_result['fold']}"
                        
                        # Loss curves - handle different column names
                        train_loss_cols = ['train/box_loss', 'train/loss', 'train/total_loss']
                        val_loss_cols = ['val/box_loss', 'val/loss', 'val/total_loss']
                        
                        train_loss_col = next((col for col in train_loss_cols if col in df.columns), None)
                        val_loss_col = next((col for col in val_loss_cols if col in df.columns), None)
                        
                        if train_loss_col:
                            axes[0].plot(df['epoch'], df[train_loss_col], 
                                       color=color, label=f"{label} Train", alpha=0.7)
                        if val_loss_col:
                            axes[0].plot(df['epoch'], df[val_loss_col], 
                                       color=color, linestyle='--', label=f"{label} Val", alpha=0.7)
                        
                        # mAP curves - handle different column names
                        map_cols = ['metrics/mAP50(M)', 'metrics/mAP50-95(M)', 'val/mAP50', 'val/mAP50-95']
                        map_col = next((col for col in map_cols if col in df.columns), None)
                        if map_col:
                            axes[1].plot(df['epoch'], df[map_col], 
                                       color=color, label=label, alpha=0.7)
                        
                        # Precision curves - handle different column names
                        prec_cols = ['metrics/precision(M)', 'val/precision', 'metrics/precision']
                        prec_col = next((col for col in prec_cols if col in df.columns), None)
                        if prec_col:
                            axes[2].plot(df['epoch'], df[prec_col], 
                                       color=color, label=label, alpha=0.7)
                        
                        # Recall curves - handle different column names
                        rec_cols = ['metrics/recall(M)', 'val/recall', 'metrics/recall']
                        rec_col = next((col for col in rec_cols if col in df.columns), None)
                        if rec_col:
                            axes[3].plot(df['epoch'], df[rec_col], 
                                       color=color, label=label, alpha=0.7)
                        
                except Exception as e:
                    logger.warning(f"Could not plot training curves for fold {fold_result['fold']}: {e}")
        
        # Customize plots
        titles = ['Training/Validation Loss', 'mAP50', 'Precision', 'Recall']
        for ax, title in zip(axes, titles):
            ax.set_title(title)
            ax.set_xlabel('Epoch')
            ax.legend()
            ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # Save plot
        curves_path = self.full_project_path / "training_curves.png"
        plt.savefig(curves_path, dpi=300, bbox_inches='tight')
        logger.info(f"Training curves saved: {curves_path}")
        plt.show()
    
    def plot_cv_results(self, df):
        """Create visualizations of CV results."""
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        metrics = ['mAP50', 'mAP50-95', 'precision', 'recall']
        
        for i, metric in enumerate(metrics):
            ax = axes[i//2, i%2]
            values = [r['final_metrics'][metric] for r in self.cv_results]
            
            # Bar plot
            bars = ax.bar(df['fold'], values, alpha=0.7, color=f'C{i}')
            ax.set_xlabel('Fold')
            ax.set_ylabel(metric)
            ax.set_title(f'{metric} across folds')
            ax.set_ylim(0, 1)
            
            # Add value labels on bars
            for bar, value in zip(bars, values):
                ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                       f'{value:.3f}', ha='center', va='bottom')
            
            # Add mean line
            mean_val = np.mean(values)
            ax.axhline(y=mean_val, color='red', linestyle='--', alpha=0.7,
                      label=f'Mean: {mean_val:.3f}')
            ax.legend()
        
        plt.tight_layout()
        
        # Save plot
        plot_path = self.full_project_path / "cv_results.png"
        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
        logger.info(f"Results plot saved: {plot_path}")
        plt.show()
    
    def save_cv_results(self, df, stats):
        """Save detailed CV results."""
        results_dir = self.full_project_path / "cv_analysis"
        results_dir.mkdir(parents=True, exist_ok=True)
        
        # Save detailed results
        df.to_csv(results_dir / "detailed_results.csv", index=False)
        
        # Save summary statistics
        stats_df = pd.DataFrame(stats).T
        stats_df.to_csv(results_dir / "summary_stats.csv")
        
        # Save comprehensive results including training metrics
        comprehensive_results = []
        for result in self.cv_results:
            flat_result = {
                'fold': result['fold'],
                'training_time_minutes': result['training_time'] / 60,
                'total_epochs': result.get('total_epochs', 'N/A'),
                'best_epoch': result['best_epoch'],
                'best_fitness': result['best_fitness'],
                'final_mAP50': result['final_metrics']['mAP50'],
                'final_mAP50_95': result['final_metrics']['mAP50-95'],
                'final_precision': result['final_metrics']['precision'],
                'final_recall': result['final_metrics']['recall'],
                'model_path': result['model_path'],
                'results_directory': result.get('results_dir', 'N/A')
            }
            
            # Add training metrics if available
            if 'training_metrics' in result:
                flat_result.update({
                    'final_train_loss': result['training_metrics'].get('final_train_loss', 'N/A'),
                    'final_val_loss': result['training_metrics'].get('final_val_loss', 'N/A'),
                    'convergence_epoch': result['training_metrics'].get('convergence_epoch', 'N/A')
                })
            
            comprehensive_results.append(flat_result)
        
        comprehensive_df = pd.DataFrame(comprehensive_results)
        comprehensive_df.to_csv(results_dir / "comprehensive_results.csv", index=False)
        
        # Save configuration
        config = {
            'model_name': self.model_name,
            'training_params': self.training_params,
            'dataset_path': str(self.dataset_processed_path),
            'project_name': self.project_name,
            'total_folds_trained': len(self.cv_results),
            'average_training_time_minutes': np.mean([r['training_time']/60 for r in self.cv_results]),
            'best_performing_fold': max(self.cv_results, key=lambda x: x['final_metrics']['mAP50'])['fold'] if self.cv_results else 'N/A',
        }
        
        with open(results_dir / "training_config.yaml", 'w') as f:
            yaml.dump(config, f, default_flow_style=False)
        
        logger.info(f"Results saved to: {results_dir}")
        logger.info(f"Comprehensive results saved: {results_dir / 'comprehensive_results.csv'}")
        
        return results_dir
    
    def predict_on_test_set(self, best_fold: int = None):
        """
        Use the best model to predict on test sets.
        
        Args:
            best_fold: Fold number of best model (auto-select if None)
        """
        if not self.cv_results:
            logger.error("No trained models available")
            return
        
        # Select best fold based on mAP50
        if best_fold is None:
            best_fold = max(self.cv_results, 
                          key=lambda x: x['final_metrics']['mAP50'])['fold']
        
        logger.info(f"Using model from fold {best_fold} for predictions")
        
        # Load best model
        model_path = self.full_project_path / f"fold_{best_fold}" / "weights" / "best.pt"
        
        if not model_path.exists():
            logger.error(f"Model not found: {model_path}")
            return
        
        model = YOLO(str(model_path))
        
        # Test on all folds
        for fold_num in range(5):
            fold_path = self.dataset_processed_path / f"fold_{fold_num}_dataset"
            test_images = fold_path / "test" / "images"
            
            if test_images.exists():
                logger.info(f"Predicting on fold {fold_num} test set")
                
                try:
                    results = model.predict(
                        source=str(test_images),
                        project=str(self.output_dir),
                        name=str(self.full_project_path.name) + f"/predictions_fold_{fold_num}",
                        save=True,
                        save_txt=True,
                        save_conf=True,
                        conf=0.5,
                        iou=0.7
                    )
                    
                    logger.success(f"Predictions saved for fold {fold_num}")
                    
                except Exception as e:
                    logger.error(f"Failed to predict on fold {fold_num}: {e}")
            else:
                logger.warning(f"Test images not found for fold {fold_num}: {test_images}")

In [13]:
if TRAIN_YOLO:
    # Initialize trainer
    trainer = YOLOSegmentationTrainer(
        dataset_processed_path=DATASET_PROCESSED_PATH,
        model_name=MODEL_NAME,
        project_name=PROJECT_NAME,
        output_dir=OUTPUT_DIR_YOLO,
        device=DEVICE
    )

    # Full cross-validation
    logger.info("Starting full cross-validation training...")
    trainer.train_cross_validation(
        class_names=CLASS_NAMES,
        custom_params=CUSTOM_PARAMS
    )

    # Make predictions on test sets
    trainer.predict_on_test_set()

## Evaluate on test dataset

In [14]:
class AutoCVEvaluator:
    """
    Automatically find and evaluate all fold models in training directory
    """
    
    def __init__(self,
                 training_base_dir: str,
                 dataset_base_path: str,
                 output_dir: str = "auto_cv_evaluation_results",
                 conf_threshold: float = 0.5,
                 iou_threshold: float = 0.7,
                 class_names: list = None):
        """
        Initialize Auto CV evaluator for comprehensive model comparison
        
        Args:
            training_base_dir: Base directory containing yolo_* directories (e.g., "training_yolo")
            dataset_base_path: Base path to dataset folds
            output_dir: Directory to save CV evaluation results
            conf_threshold: Confidence threshold for predictions
            iou_threshold: IoU threshold for NMS
            class_names: List of class names
        """
        self.training_base_dir = Path(training_base_dir)
        self.dataset_base_path = Path(dataset_base_path)
        self.output_dir = Path(output_dir)
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold
        self.class_names = class_names or ["free_space"]
        
        # Create output directory
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        # Results storage
        self.all_per_image_results = []  # Master dataframe data
        self.fold_summaries = []
        self.cv_summary = {}
        
        print(f"Initialized Comprehensive Model Evaluator:")
        print(f"  Training base: {self.training_base_dir}")
        print(f"  Dataset base: {self.dataset_base_path}")
        print(f"  Output: {self.output_dir}")
        print(f"  Will evaluate ALL yolo_* projects with 3+ folds")
    
    def find_fold_models(self):
        """Automatically find all fold models in ALL yolo_* directories and let user choose"""
        fold_models = {}
        
        print("Searching for fold models in ALL yolo_* directories...")
        
        # Find all yolo_* project directories
        yolo_projects = [item for item in self.training_base_dir.iterdir() 
                        if item.is_dir() and item.name.startswith('yolo_')]
        
        if not yolo_projects:
            print("No yolo_* directories found!")
            return fold_models
        
        print(f"Found {len(yolo_projects)} yolo_* project directories:")
        
        # Search through ALL yolo_* projects and collect info
        project_models = {}
        for project_dir in yolo_projects:
            print(f"\nSearching in project: {project_dir.name}")
            
            # Look for fold_X directories within this project
            project_folds = {}
            for item in project_dir.iterdir():
                if item.is_dir() and item.name.startswith('fold_'):
                    try:
                        # Extract fold number
                        fold_num = int(item.name.split('fold_')[1])
                        
                        # Deep search for best.pt files
                        model_path = self.deep_search_best_pt(item)
                        
                        if model_path:
                            project_folds[fold_num] = model_path
                            print(f"  ✓ Found Fold {fold_num}: {model_path}")
                        else:
                            print(f"  ✗ Fold {fold_num} directory found but no best.pt: {item}")
                            
                    except ValueError:
                        # Not a valid fold_X format
                        continue
            
            if project_folds:
                project_models[project_dir.name] = {
                    'folds': project_folds,
                    'count': len(project_folds),
                    'path': project_dir
                }
                print(f"  → Found {len(project_folds)} fold models in {project_dir.name}")
        
        if not project_models:
            print("\nNo fold models found in any yolo_* directories!")
            return fold_models
        
        # Display options
        print(f"\nAvailable projects with models:")
        print("-" * 60)
        for proj_name, info in project_models.items():
            folds = sorted(info['folds'].keys())
            print(f"{proj_name}: {info['count']} folds {folds}")
        
        # Select project based on target_project parameter or auto-select
        if self.target_project:
            if self.target_project in project_models:
                selected_project = self.target_project
                selected_models = project_models[self.target_project]['folds']
                print(f"\nUsing specified project: {selected_project}")
            else:
                print(f"\nSpecified project '{self.target_project}' not found!")
                print("Available projects:", list(project_models.keys()))
                return fold_models
        else:
            # Auto-select the project with the most folds
            best_project = max(project_models.items(), key=lambda x: x[1]['count'])
            selected_project = best_project[0]
            selected_models = best_project[1]['folds']
            print(f"\nAuto-selected: {selected_project} ({best_project[1]['count']} folds)")
        
        print("Final model selection:")
        for fold_num in sorted(selected_models.keys()):
            print(f"  Fold {fold_num}: {selected_models[fold_num]}")
        
        return selected_models
    
    def deep_search_best_pt(self, fold_dir):
        """Recursively search for best.pt in a fold directory"""
        for root, dirs, files in os.walk(fold_dir):
            if "best.pt" in files:
                return Path(root) / "best.pt"
        return None
    
    def find_shared_test_dataset(self):
        """Find shared test dataset (same for all folds)"""
        # Try different naming conventions for the first available test dataset
        possible_dataset_names = [
            "fold_0_dataset",
            "fold0_dataset", 
            "fold_0",
            "fold0"
        ]
        
        for dataset_name in possible_dataset_names:
            dataset_path = self.dataset_base_path / dataset_name
            test_images_dir = dataset_path / "test" / "images"
            test_labels_dir = dataset_path / "test" / "labels"
            
            if test_images_dir.exists() and test_labels_dir.exists():
                print(f"  ✓ Using shared test dataset: {dataset_path}")
                return test_images_dir, test_labels_dir
        
        # If specific fold datasets not found, try direct test directory
        direct_test_images = self.dataset_base_path / "test" / "images"
        direct_test_labels = self.dataset_base_path / "test" / "labels"
        
        if direct_test_images.exists() and direct_test_labels.exists():
            print(f"  ✓ Using direct test dataset: {self.dataset_base_path / 'test'}")
            return direct_test_images, direct_test_labels
        
        print(f"  ✗ Could not find shared test dataset in {self.dataset_base_path}")
        return None, None
    
    def evaluate_single_fold_simple(self, fold_num, model_path, shared_test_images_dir, shared_test_labels_dir, project_name=None):
        """
        Simplified evaluation function that doesn't require the external script
        Uses shared test dataset for all folds
        """
        print(f"\nEvaluating {project_name} - Fold {fold_num}...")
        print(f"  Model: {model_path}")
        
        try:
            # Import YOLO
            from ultralytics import YOLO
            import cv2
            
            # Load model
            model = YOLO(str(model_path))
            print(f"  ✓ Loaded model successfully")
            
            # Get test images
            image_extensions = ['*.png', '*.jpg', '*.jpeg', '*.bmp', '*.tif', '*.tiff']
            image_files = []
            
            for ext in image_extensions:
                image_files.extend(shared_test_images_dir.glob(ext))
            
            image_files.sort()
            print(f"  Found {len(image_files)} test images")
            
            if not image_files:
                print(f"  ✗ No test images found in {shared_test_images_dir}")
                return None
            
            # Evaluate each image
            fold_results = []
            all_tp, all_tn, all_fp, all_fn = 0, 0, 0, 0
            
            for image_file in tqdm(image_files, desc=f"  Processing {project_name} fold {fold_num}", leave=False):
                try:
                    result = self.evaluate_single_image_simple(
                        image_file, shared_test_labels_dir, model, fold_num, project_name
                    )
                    
                    if result:
                        fold_results.append(result)
                        # Accumulate confusion matrix
                        all_tp += result['true_positives']
                        all_tn += result['true_negatives'] 
                        all_fp += result['false_positives']
                        all_fn += result['false_negatives']
                
                except Exception as e:
                    print(f"    Error processing {image_file.name}: {e}")
                    continue
            
            if not fold_results:
                print(f"  ✗ No valid results for {project_name} fold {fold_num}")
                return None
            
            # Calculate fold summary metrics
            fold_df = pd.DataFrame(fold_results)
            
            # Overall metrics
            epsilon = 1e-7
            overall_precision = all_tp / (all_tp + all_fp + epsilon)
            overall_recall = all_tp / (all_tp + all_fn + epsilon)
            overall_f1 = 2 * (overall_precision * overall_recall) / (overall_precision + overall_recall + epsilon)
            overall_iou = all_tp / (all_tp + all_fp + all_fn + epsilon)
            overall_accuracy = (all_tp + all_tn) / (all_tp + all_tn + all_fp + all_fn + epsilon)
            
            fold_summary = {
                'project': project_name,
                'fold': fold_num,
                'model_path': str(model_path),
                'total_images': len(fold_results),
                'overall_iou': overall_iou,
                'overall_f1_score': overall_f1,
                'overall_precision': overall_precision,
                'overall_recall': overall_recall,
                'overall_accuracy': overall_accuracy,
                'mean_iou': fold_df['iou'].mean(),
                'mean_f1_score': fold_df['f1_score'].mean(),
                'mean_precision': fold_df['precision'].mean(),
                'mean_recall': fold_df['recall'].mean(),
                'std_iou': fold_df['iou'].std(),
                'std_f1_score': fold_df['f1_score'].std(),
                'total_tp': int(all_tp),
                'total_tn': int(all_tn),
                'total_fp': int(all_fp),
                'total_fn': int(all_fn)
            }
            
            # Add to master results
            self.all_per_image_results.extend(fold_results)
            self.fold_summaries.append(fold_summary)
            
            print(f"  ✓ {project_name} fold {fold_num} completed: {len(fold_results)} images, IoU={overall_iou:.4f}")
            
            return fold_summary
            
        except Exception as e:
            print(f"  ✗ Error evaluating {project_name} fold {fold_num}: {e}")
            return None
    
    def evaluate_single_image_simple(self, image_file, test_labels_dir, model, fold_num, project_name=None):
        """Simplified single image evaluation"""
        try:
            import cv2
            
            # Load ground truth
            gt_mask = self.load_ground_truth_mask_simple(image_file, test_labels_dir)
            if gt_mask is None:
                return None
            
            # Get prediction
            pred_mask = self.get_prediction_mask_simple(image_file, model)
            if pred_mask is None:
                return None
            
            # Calculate metrics
            metrics = self.calculate_metrics_simple(pred_mask, gt_mask)
            
            # Add metadata
            metrics.update({
                'project': project_name,
                'fold': fold_num,
                'image_name': image_file.name,
                'image_path': str(image_file)
            })
            
            return metrics
            
        except Exception as e:
            print(f"Error evaluating {image_file.name}: {e}")
            return None
    
    def load_ground_truth_mask_simple(self, image_file, test_labels_dir):
        """Load ground truth mask from YOLO label"""
        try:
            import cv2
            
            # Load image to get dimensions
            img = cv2.imread(str(image_file))
            if img is None:
                return None
            
            img_height, img_width = img.shape[:2]
            
            # Load label file
            image_name = image_file.stem
            label_file = test_labels_dir / f"{image_name}.txt"
            
            # Initialize empty mask
            gt_mask = np.zeros((img_height, img_width), dtype=np.uint8)
            
            if label_file.exists() and label_file.stat().st_size > 0:
                with open(label_file, 'r') as f:
                    lines = f.readlines()
                
                for line in lines:
                    line = line.strip()
                    if not line:
                        continue
                    
                    parts = line.split()
                    if len(parts) < 7:  # class + at least 3 points
                        continue
                    
                    coords = [float(x) for x in parts[1:]]
                    
                    # Convert normalized coords to pixels
                    points = []
                    for i in range(0, len(coords), 2):
                        x = int(coords[i] * img_width)
                        y = int(coords[i + 1] * img_height)
                        points.append([x, y])
                    
                    if len(points) >= 3:
                        points = np.array(points, dtype=np.int32)
                        cv2.fillPoly(gt_mask, [points], 255)
            
            return gt_mask
            
        except Exception as e:
            print(f"Error loading GT for {image_file.name}: {e}")
            return None
    
    def get_prediction_mask_simple(self, image_file, model):
        """Get prediction mask from model"""
        try:
            import cv2
            
            # Run inference
            results = model.predict(
                source=str(image_file),
                conf=self.conf_threshold,
                iou=self.iou_threshold,
                verbose=False
            )
            
            if not results:
                img = cv2.imread(str(image_file))
                img_height, img_width = img.shape[:2]
                return np.zeros((img_height, img_width), dtype=np.uint8)
            
            result = results[0]
            img_height, img_width = result.orig_shape
            
            # Initialize prediction mask
            pred_mask = np.zeros((img_height, img_width), dtype=np.uint8)
            
            # Process masks
            if hasattr(result, 'masks') and result.masks is not None:
                masks = result.masks.data.cpu().numpy()
                
                for mask in masks:
                    mask_resized = cv2.resize(mask, (img_width, img_height))
                    mask_binary = (mask_resized > 0.5).astype(np.uint8) * 255
                    pred_mask = cv2.bitwise_or(pred_mask, mask_binary)
            
            return pred_mask
            
        except Exception as e:
            print(f"Error getting prediction for {image_file.name}: {e}")
            return None
    
    def calculate_metrics_simple(self, pred_mask, gt_mask):
        """Calculate segmentation metrics"""
        # Convert to binary
        pred_binary = (pred_mask > 127).astype(np.uint8)
        gt_binary = (gt_mask > 127).astype(np.uint8)
        
        # Flatten
        pred_flat = pred_binary.flatten()
        gt_flat = gt_binary.flatten()
        
        # Confusion matrix
        tp = np.sum((pred_flat == 1) & (gt_flat == 1))
        tn = np.sum((pred_flat == 0) & (gt_flat == 0))
        fp = np.sum((pred_flat == 1) & (gt_flat == 0))
        fn = np.sum((pred_flat == 0) & (gt_flat == 1))
        
        # Metrics
        epsilon = 1e-7
        precision = tp / (tp + fp + epsilon)
        recall = tp / (tp + fn + epsilon)
        f1_score = 2 * (precision * recall) / (precision + recall + epsilon)
        iou = tp / (tp + fp + fn + epsilon)
        accuracy = (tp + tn) / (tp + tn + fp + fn + epsilon)
        dice = 2 * tp / (2 * tp + fp + fn + epsilon)
        
        return {
            'precision': precision,
            'recall': recall,
            'f1_score': f1_score,
            'iou': iou,
            'accuracy': accuracy,
            'dice_coefficient': dice,
            'true_positives': int(tp),
            'true_negatives': int(tn),
            'false_positives': int(fp),
            'false_negatives': int(fn)
        }
    
    def evaluate_all_folds(self):
        """Evaluate ALL projects and all folds - comprehensive comparison"""
        print("="*60)
        print("STARTING COMPREHENSIVE MODEL EVALUATION")
        print("="*60)
        
        # Find shared test dataset (same for all folds)
        print("\nFinding shared test dataset...")
        shared_test_images_dir, shared_test_labels_dir = self.find_shared_test_dataset()
        
        if shared_test_images_dir is None or shared_test_labels_dir is None:
            print("❌ Could not find shared test dataset. Exiting.")
            return None, None
        
        # Find all projects with models
        all_project_models = self.find_all_project_models()
        
        if not all_project_models:
            print("❌ No project models found. Exiting.")
            return None, None
        
        print(f"\nWill evaluate {len(all_project_models)} projects against the same test dataset")
        
        # Evaluate each project
        total_evaluations = 0
        successful_evaluations = 0
        
        for project_name, fold_models in all_project_models.items():
            print(f"\n{'='*60}")
            print(f"EVALUATING PROJECT: {project_name}")
            print(f"{'='*60}")
            
            for fold_num in sorted(fold_models.keys()):
                model_path = fold_models[fold_num]
                total_evaluations += 1
                
                result = self.evaluate_single_fold_simple(
                    fold_num, model_path, shared_test_images_dir, shared_test_labels_dir,
                    project_name=project_name
                )
                if result:
                    successful_evaluations += 1
        
        if successful_evaluations == 0:
            print("❌ No evaluations were successful.")
            return None, None
        
        print(f"\n✅ Successfully completed {successful_evaluations}/{total_evaluations} evaluations")
        
        # Create comprehensive dataframes
        master_df = pd.DataFrame(self.all_per_image_results)
        fold_summary_df = pd.DataFrame(self.fold_summaries)
        
        # Analyze results across all projects
        self.analyze_comprehensive_results(master_df, fold_summary_df)
        
        # Save results
        self.save_comprehensive_results(master_df, fold_summary_df)
        
        return master_df, fold_summary_df
    
    def find_all_project_models(self):
        """Find models for ALL projects that have complete fold sets"""
        print("Searching for fold models in ALL yolo_* directories...")
        
        # Find all yolo_* project directories
        yolo_projects = [item for item in self.training_base_dir.iterdir() 
                        if item.is_dir() and item.name.startswith('yolo_')]
        
        if not yolo_projects:
            print("No yolo_* directories found!")
            return {}
        
        print(f"Found {len(yolo_projects)} yolo_* project directories:")
        
        # Search through ALL yolo_* projects and collect info
        all_project_models = {}
        
        for project_dir in yolo_projects:
            print(f"\nSearching in project: {project_dir.name}")
            
            # Look for fold_X directories within this project
            project_folds = {}
            for item in project_dir.iterdir():
                if item.is_dir() and item.name.startswith('fold_'):
                    try:
                        # Extract fold number
                        fold_num = int(item.name.split('fold_')[1])
                        
                        # Deep search for best.pt files
                        model_path = self.deep_search_best_pt(item)
                        
                        if model_path:
                            project_folds[fold_num] = model_path
                            print(f"  ✓ Found Fold {fold_num}: {model_path}")
                        else:
                            print(f"  ✗ Fold {fold_num} directory found but no best.pt: {item}")
                            
                    except ValueError:
                        # Not a valid fold_X format
                        continue
            
            if project_folds:
                # Only include projects with at least 3 folds (configurable)
                min_folds = 3
                if len(project_folds) >= min_folds:
                    all_project_models[project_dir.name] = project_folds
                    print(f"  → Will evaluate {len(project_folds)} folds from {project_dir.name}")
                else:
                    print(f"  → Skipping {project_dir.name} (only {len(project_folds)} folds, need {min_folds}+)")
        
        if not all_project_models:
            print("\nNo projects with sufficient folds found!")
            return {}
        
        # Display what will be evaluated
        print(f"\nFinal evaluation plan:")
        print("-" * 60)
        total_folds = 0
        for proj_name, folds in all_project_models.items():
            fold_list = sorted(folds.keys())
            print(f"{proj_name}: {len(fold_list)} folds {fold_list}")
            total_folds += len(fold_list)
        
        print(f"\nTotal evaluations to perform: {total_folds}")
        
        return all_project_models
    
    def analyze_comprehensive_results(self, master_df, fold_summary_df):
        """Analyze results across all projects and folds"""
        print("\n" + "="*60)
        print("COMPREHENSIVE ANALYSIS ACROSS ALL MODELS")
        print("="*60)
        
        if fold_summary_df.empty:
            print("No results to analyze")
            return
        
        # Group by project
        project_stats = {}
        
        print("Results by Project:")
        print("-" * 50)
        
        for project in fold_summary_df['project'].unique():
            project_data = fold_summary_df[fold_summary_df['project'] == project]
            
            metrics_to_analyze = ['overall_iou', 'overall_f1_score', 'overall_precision', 
                                 'overall_recall', 'overall_accuracy']
            
            project_metrics = {}
            for metric in metrics_to_analyze:
                if metric in project_data.columns:
                    project_metrics[metric] = {
                        'mean': project_data[metric].mean(),
                        'std': project_data[metric].std(),
                        'min': project_data[metric].min(),
                        'max': project_data[metric].max()
                    }
            
            project_stats[project] = {
                'folds_evaluated': len(project_data),
                'total_images': project_data['total_images'].sum(),
                'metrics': project_metrics
            }
            
            print(f"\n{project}:")
            print(f"  Folds: {len(project_data)} | Images: {project_data['total_images'].sum()}")
            for metric, stats in project_metrics.items():
                metric_name = metric.replace('overall_', '').replace('_', ' ').title()
                print(f"  {metric_name:12s}: {stats['mean']:.4f} ± {stats['std']:.4f}")
        
        # Find best performing models
        print(f"\n" + "="*50)
        print("BEST PERFORMING MODELS")
        print("="*50)
        
        best_iou = fold_summary_df.loc[fold_summary_df['overall_iou'].idxmax()]
        best_f1 = fold_summary_df.loc[fold_summary_df['overall_f1_score'].idxmax()]
        
        print(f"Best IoU: {best_iou['project']} (Fold {best_iou['fold']}) - IoU: {best_iou['overall_iou']:.4f}")
        print(f"Best F1:  {best_f1['project']} (Fold {best_f1['fold']}) - F1: {best_f1['overall_f1_score']:.4f}")
        
        # Project rankings
        print(f"\n" + "="*40)
        print("PROJECT RANKINGS (by mean IoU)")
        print("="*40)
        
        project_rankings = []
        for project in fold_summary_df['project'].unique():
            project_data = fold_summary_df[fold_summary_df['project'] == project]
            mean_iou = project_data['overall_iou'].mean()
            mean_f1 = project_data['overall_f1_score'].mean()
            project_rankings.append({
                'project': project,
                'mean_iou': mean_iou,
                'mean_f1': mean_f1,
                'folds': len(project_data)
            })
        
        project_rankings.sort(key=lambda x: x['mean_iou'], reverse=True)
        
        for i, proj in enumerate(project_rankings, 1):
            print(f"{i}. {proj['project']}")
            print(f"   Mean IoU: {proj['mean_iou']:.4f} | Mean F1: {proj['mean_f1']:.4f} | Folds: {proj['folds']}")
        
        # Store comprehensive summary
        self.cv_summary = {
            'total_projects': len(fold_summary_df['project'].unique()),
            'total_evaluations': len(fold_summary_df),
            'total_images': master_df.shape[0] if not master_df.empty else 0,
            'project_statistics': project_stats,
            'project_rankings': project_rankings,
            'best_models': {
                'best_iou': {
                    'project': best_iou['project'],
                    'fold': best_iou['fold'],
                    'value': best_iou['overall_iou']
                },
                'best_f1': {
                    'project': best_f1['project'],
                    'fold': best_f1['fold'],
                    'value': best_f1['overall_f1_score']
                }
            },
            'evaluation_date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
    
    def save_comprehensive_results(self, master_df, fold_summary_df):
        """Save all results with timestamp"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Save master dataframe (all per-image results)
        master_path = self.output_dir / f"all_per_image_results_{timestamp}.csv"
        master_df.to_csv(master_path, index=False)
        print(f"\n✓ Master results saved: {master_path}")
        print(f"  Shape: {master_df.shape}")
        
        # Save fold summary
        summary_path = self.output_dir / f"fold_summary_{timestamp}.csv"
        fold_summary_df.to_csv(summary_path, index=False)
        print(f"✓ Fold summary saved: {summary_path}")
        
        # Save CV analysis
        cv_path = self.output_dir / f"cv_analysis_{timestamp}.json"
        with open(cv_path, 'w') as f:
            json.dump(self.cv_summary, f, indent=2, default=str)
        print(f"✓ CV analysis saved: {cv_path}")
        
        # Create plots
        self.create_summary_plots(master_df, fold_summary_df, timestamp)
        
        print(f"\n🎉 All results saved to: {self.output_dir}")
    
    def create_summary_plots(self, master_df, fold_summary_df, timestamp):
        """Create comprehensive visualization plots for multiple projects"""
        try:
            import matplotlib.pyplot as plt
            import seaborn as sns
            
            # Set style
            plt.style.use('default')
            
            # Plot 1: Project comparison by metrics
            if 'project' in fold_summary_df.columns:
                fig, axes = plt.subplots(2, 3, figsize=(20, 12))
                axes = axes.flatten()
                
                metrics_to_plot = ['overall_iou', 'overall_f1_score', 'overall_precision',
                                  'overall_recall', 'overall_accuracy', 'mean_iou']
                
                projects = fold_summary_df['project'].unique()
                colors = plt.cm.Set3(np.linspace(0, 1, len(projects)))
                
                for i, metric in enumerate(metrics_to_plot):
                    if metric in fold_summary_df.columns:
                        ax = axes[i]
                        
                        # Create grouped bar plot
                        x_pos = 0
                        width = 0.15
                        
                        for j, project in enumerate(projects):
                            project_data = fold_summary_df[fold_summary_df['project'] == project]
                            folds = project_data['fold'].values
                            values = project_data[metric].values
                            
                            x_positions = [x_pos + k * (width * len(projects) + 0.1) for k in range(len(folds))]
                            bars = ax.bar([x + j * width for x in x_positions], values, 
                                         width, label=project.replace('yolo_', ''), 
                                         color=colors[j], alpha=0.8)
                            
                            # Add value labels on bars
                            for bar, value in zip(bars, values):
                                ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005,
                                       f'{value:.3f}', ha='center', va='bottom', fontsize=8, rotation=90)
                        
                        ax.set_xlabel('Fold')
                        ax.set_ylabel(metric.replace('_', ' ').title())
                        ax.set_title(f'{metric.replace("_", " ").title()} by Project and Fold')
                        ax.set_ylim(0, 1)
                        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
                        ax.grid(True, alpha=0.3)
                        
                        # Set x-tick labels to fold numbers
                        n_folds = len(fold_summary_df['fold'].unique())
                        ax.set_xticks([k * (width * len(projects) + 0.1) + width * (len(projects) - 1) / 2 
                                      for k in range(n_folds)])
                        ax.set_xticklabels([f'F{k}' for k in sorted(fold_summary_df['fold'].unique())])
                
                plt.tight_layout()
                plt.savefig(self.output_dir / f"project_fold_comparison_{timestamp}.png", 
                           dpi=300, bbox_inches='tight')
                plt.close()
                
                # Plot 2: Project performance comparison (box plots)
                fig, axes = plt.subplots(2, 3, figsize=(18, 12))
                axes = axes.flatten()
                
                for i, metric in enumerate(metrics_to_plot):
                    if metric in fold_summary_df.columns:
                        ax = axes[i]
                        
                        # Create box plot
                        data_for_box = []
                        labels = []
                        for project in projects:
                            project_data = fold_summary_df[fold_summary_df['project'] == project]
                            data_for_box.append(project_data[metric].values)
                            labels.append(project.replace('yolo_', ''))
                        
                        bp = ax.boxplot(data_for_box, labels=labels, patch_artist=True)
                        
                        # Color the boxes
                        for patch, color in zip(bp['boxes'], colors):
                            patch.set_facecolor(color)
                            patch.set_alpha(0.7)
                        
                        ax.set_ylabel(metric.replace('_', ' ').title())
                        ax.set_title(f'{metric.replace("_", " ").title()} Distribution by Project')
                        ax.tick_params(axis='x', rotation=45)
                        ax.grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.savefig(self.output_dir / f"project_performance_boxplot_{timestamp}.png", 
                           dpi=300, bbox_inches='tight')
                plt.close()
                
            # Plot 3: Per-image metrics distribution (if we have per-image data)
            if not master_df.empty and 'project' in master_df.columns:
                fig, axes = plt.subplots(2, 3, figsize=(18, 12))
                axes = axes.flatten()
                
                image_metrics = ['iou', 'f1_score', 'precision', 'recall', 'accuracy', 'dice_coefficient']
                
                for i, metric in enumerate(image_metrics):
                    if metric in master_df.columns:
                        ax = axes[i]
                        
                        # Create histogram for each project
                        for j, project in enumerate(projects):
                            project_data = master_df[master_df['project'] == project]
                            ax.hist(project_data[metric], bins=30, alpha=0.6, 
                                   label=project.replace('yolo_', ''), color=colors[j])
                        
                        ax.set_xlabel(metric.replace('_', ' ').title())
                        ax.set_ylabel('Frequency')
                        ax.set_title(f'{metric.replace("_", " ").title()} Distribution')
                        ax.legend()
                        ax.grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.savefig(self.output_dir / f"per_image_distributions_{timestamp}.png", 
                           dpi=300, bbox_inches='tight')
                plt.close()
            
            print(f"✓ Comprehensive plots saved")
            
        except Exception as e:
            print(f"Warning: Could not create plots: {e}")
            import traceback
            traceback.print_exc()

In [None]:
if EVALUATE_YOLO:
    # =============================================================================
    # RUN EVALUATION
    # =============================================================================

    evaluator = AutoCVEvaluator(
        training_base_dir=OUTPUT_DIR_YOLO,
        dataset_base_path=DATASET_PROCESSED_PATH,
        output_dir=OUTPUT_EVALUATE_TEST_DIR,
        conf_threshold=CONF_THRESHOLD,
        iou_threshold=IOU_THRESHOLD,
        class_names=CLASS_NAMES,
    )
    # Run evaluation and get dataframes
    master_df, fold_summary_df = evaluator.evaluate_all_folds()

    if master_df is not None:
        print("\n📊 COMPREHENSIVE EVALUATION RESULTS:")
        print(f"   📋 Per-Image DataFrame shape: {master_df.shape}")
        print(f"      Columns: {list(master_df.columns)}")
        print(f"   📈 Per-Fold DataFrame shape: {fold_summary_df.shape}")
        print(f"      Columns: {list(fold_summary_df.columns)}")
        
        # Show unique projects evaluated
        if 'project' in master_df.columns:
            unique_projects = master_df['project'].unique()
            print(f"   🚀 Projects evaluated: {len(unique_projects)}")
            for proj in unique_projects:
                count = len(master_df[master_df['project'] == proj])
                print(f"      - {proj}: {count} evaluations")
        
        # Display sample results
        print("\n📋 Sample per-image results:")
        display_cols = ['project', 'fold', 'image_name', 'iou', 'f1_score', 'precision', 'recall']
        available_cols = [col for col in display_cols if col in master_df.columns]
        print(master_df[available_cols].head(10))
        
        print("\n📊 Per-fold summary:")
        summary_cols = ['project', 'fold', 'total_images', 'overall_iou', 'overall_f1_score', 'mean_iou']
        available_summary_cols = [col for col in summary_cols if col in fold_summary_df.columns]
        print(fold_summary_df[available_summary_cols].round(4))
        
        print("\n💾 Files saved:")
        print("   - all_per_image_results_*.csv (Per-Image DataFrame)")
        print("   - fold_summary_*.csv (Per-Fold DataFrame)")
        print("   - cv_analysis_*.json (Comprehensive analysis)")
        print("   - fold_comparison_*.png (Visualization plots)")

: 