# notes: slice into 640x640 pieces (for the object detection model training step) ~ manual labels

In [None]:
# "D_01_new-work-03_task-03_SliceDivideCode_640[dot]ipynb"
import os
import json
from PIL import Image
from tqdm import tqdm
import glob
from pathlib import Path
import numpy as np

class ImageAnnotationProcessor:
    def __init__(self, input_dir, output_dir, slice_size=640):
        """
        Initialize the processor with input/output directories and slice size.
        
        Args:
            input_dir (str): Directory containing images and JSON annotations
            output_dir (str): Directory to save processed images and labels
            slice_size (int): Size of image slices (default: 640 for YOLO compatibility)
        """
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.slice_size = slice_size
        os.makedirs(output_dir, exist_ok=True)
        
        # Statistics
        self.stats = {
            "total_files": 0,
            "processed_files": 0,
            "total_slices": 0,
            "saved_slices": 0,
            "skipped_slices": 0,
            "annotations_processed": 0,
            "failed_annotations": 0,
            "rectangle_annotations": 0,
            "polygon_annotations": 0
        }
        
    def find_matching_files(self):
        """Find all image files and their corresponding JSON annotations."""
        image_files = glob.glob(os.path.join(self.input_dir, "*.jpg")) + \
                     glob.glob(os.path.join(self.input_dir, "*.png"))
        pairs = []
        
        self.stats["total_files"] = len(image_files)
        
        for image_path in image_files:
            base_name = Path(image_path).stem
            json_path = os.path.join(self.input_dir, f"{base_name}.json")
            
            if os.path.exists(json_path):
                pairs.append((image_path, json_path))
            else:
                print(f"Warning: No matching JSON found for {image_path}")
                
        return pairs

    def normalize_points(self, points, shape_type):
        """
        Convert different annotation formats to a standard [x_min, y_min, x_max, y_max] format.
        
        Args:
            points: Points from the JSON file
            shape_type: Type of shape ('rectangle', 'polygon', etc.)
            
        Returns:
            list: Normalized points as [[x_min, y_min], [x_max, y_max]] or None if invalid
        """
        try:
            if shape_type == "rectangle":
                # Rectangle already has two diagonal points
                self.stats["rectangle_annotations"] += 1
                return points
                
            elif shape_type == "polygon":
                self.stats["polygon_annotations"] += 1
                # Extract min/max coordinates from polygon
                points_array = np.array(points)
                x_min, y_min = np.min(points_array, axis=0)
                x_max, y_max = np.max(points_array, axis=0)
                return [[x_min, y_min], [x_max, y_max]]
                
            else:
                print(f"Unsupported shape type: {shape_type}")
                return None
                
        except Exception as e:
            print(f"Error normalizing points: {e}, shape_type: {shape_type}")
            return None

    def convert_to_yolov8_format(self, points, slice_bounds, image_bounds):
        """
        Convert bounding box coordinates to YOLO format with boundary checking.
        
        Args:
            points: Original bounding box points in format [[x_min, y_min], [x_max, y_max]]
            slice_bounds: (x_min, y_min, x_max, y_max) of the current slice
            image_bounds: (width, height) of the slice
        
        Returns:
            list: YOLO format coordinates or None if invalid
        """
        try:
            x_min, y_min = points[0]
            x_max, y_max = points[1]
            
            # Calculate coordinates relative to slice
            slice_x_min, slice_y_min, slice_x_max, slice_y_max = slice_bounds
            
            # Check if box intersects slice
            if (x_max < slice_x_min or x_min > slice_x_max or 
                y_max < slice_y_min or y_min > slice_y_max):
                return None
            
            # Calculate new coordinates relative to slice
            new_x_min = max(0, min(x_min - slice_x_min, image_bounds[0]))
            new_y_min = max(0, min(y_min - slice_y_min, image_bounds[1]))
            new_x_max = max(0, min(x_max - slice_x_min, image_bounds[0]))
            new_y_max = max(0, min(y_max - slice_y_min, image_bounds[1]))
            
            # Skip if box is too small after clipping
            if new_x_max - new_x_min < 2 or new_y_max - new_y_min < 2:
                return None
                
            # Convert to YOLO format (class_id, x_center, y_center, width, height)
            box_width = new_x_max - new_x_min
            box_height = new_y_max - new_y_min
            x_center = new_x_min + box_width / 2
            y_center = new_y_min + box_height / 2
            
            # Validate final coordinates
            if (x_center < 0 or x_center > image_bounds[0] or 
                y_center < 0 or y_center > image_bounds[1] or 
                box_width <= 0 or box_height <= 0):
                return None
                
            return [0,  # class_id (assuming single class)
                   x_center / image_bounds[0],
                   y_center / image_bounds[1],
                   box_width / image_bounds[0],
                   box_height / image_bounds[1]]
                   
        except Exception as e:
            print(f"Error converting coordinates: {e}")
            self.stats["failed_annotations"] += 1
            return None

    def process_image(self, image_path, json_path):
        """
        Process a single image and its annotations.
        
        Args:
            image_path (str): Path to the image file
            json_path (str): Path to the JSON annotation file
        """
        try:
            # Load image and annotations
            image = Image.open(image_path)
            image_width, image_height = image.size
            
            with open(json_path, 'r') as f:
                data = json.load(f)
            
            # Calculate number of slices for progress bar
            num_slices_x = (image_width - 1) // self.slice_size + 1
            num_slices_y = (image_height - 1) // self.slice_size + 1
            total_slices = num_slices_x * num_slices_y
            self.stats["total_slices"] += total_slices
            
            base_name = Path(image_path).stem
            processed_count = 0
            
            # Process each slice
            with tqdm(total=total_slices, desc=f"Processing {base_name}") as pbar:
                for x in range(0, image_width, self.slice_size):
                    for y in range(0, image_height, self.slice_size):
                        # Define slice boundaries
                        actual_slice_width = min(self.slice_size, image_width - x)
                        actual_slice_height = min(self.slice_size, image_height - y)
                        
                        if actual_slice_width < self.slice_size or actual_slice_height < self.slice_size:
                            # Skip partial slices
                            pbar.update(1)
                            self.stats["skipped_slices"] += 1
                            continue
                            
                        slice_bounds = (x, y, x + self.slice_size, y + self.slice_size)
                        slice_image = image.crop(slice_bounds)
                        
                        # Process annotations for this slice
                        slice_boxes = []
                        for shape in data['shapes']:
                            self.stats["annotations_processed"] += 1
                            
                            # Get shape type (default to rectangle for backward compatibility)
                            shape_type = shape.get("shape_type", "rectangle")
                            
                            # Normalize points to standard format
                            normalized_points = self.normalize_points(shape["points"], shape_type)
                            if not normalized_points:
                                continue
                                
                            # Convert to YOLO format
                            yolo_box = self.convert_to_yolov8_format(
                                normalized_points, 
                                slice_bounds, 
                                (self.slice_size, self.slice_size)
                            )
                            if yolo_box:
                                slice_boxes.append(yolo_box)
                        
                        # Save slice and annotations if boxes are found
                        if slice_boxes:
                            slice_filename = f"{base_name}_slice_{x}_{y}"
                            
                            # Save image
                            image_output = os.path.join(self.output_dir, f"{slice_filename}.png")
                            slice_image.save(image_output)
                            
                            # Save annotations
                            label_output = os.path.join(self.output_dir, f"{slice_filename}.txt")
                            with open(label_output, 'w') as f:
                                for box in slice_boxes:
                                    # Convert all values to Python native types
                                    class_id = int(box[0])
                                    x_center = float(box[1])
                                    y_center = float(box[2])
                                    width = float(box[3])
                                    height = float(box[4])
                                    # Write to file using Python native types
                                    f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
                            
                            processed_count += 1
                            self.stats["saved_slices"] += 1
                        else:
                            self.stats["skipped_slices"] += 1
                            
                        pbar.update(1)
                        
            return processed_count
            
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
            return 0

    def process_all(self):
        """Process all matching image-annotation pairs in the input directory."""
        pairs = self.find_matching_files()
        if not pairs:
            print("No valid image-annotation pairs found!")
            return
            
        total_processed = 0
        for image_path, json_path in pairs:
            processed = self.process_image(image_path, json_path)
            total_processed += processed
            self.stats["processed_files"] += 1
            
        self.print_statistics()
        print(f"\nProcessing complete! Generated {total_processed} valid slices.")
    
    def print_statistics(self):
        """Print processing statistics"""
        print("\n====== Processing Statistics ======")
        print(f"Total image files found: {self.stats['total_files']}")
        print(f"Files successfully processed: {self.stats['processed_files']}")
        print(f"Total slices created: {self.stats['saved_slices']} / {self.stats['total_slices']}")
        print(f"Slices skipped (no annotations): {self.stats['skipped_slices']}")
        print(f"Annotation Statistics:")
        print(f"  - Total processed: {self.stats['annotations_processed']}")
        print(f"  - Rectangle annotations: {self.stats['rectangle_annotations']}")
        print(f"  - Polygon annotations: {self.stats['polygon_annotations']}")
        print(f"  - Failed annotations: {self.stats['failed_annotations']}")
        print("==================================")

# Example usage
if __name__ == "__main__":
    # Directory containing images and JSON files
    # example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\HM_aligned_to_SC_renamed_files_manual_labels_T_(new_OM_images)'
    # example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\25_pc_expanded_manual_labels_with_aligned_HM_images_T'
    # example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\50_pc_expanded_manual_labels_with_aligned_HM_images_T'
    # example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\75_pc_expanded_manual_labels_with_aligned_HM_images_T'
    # example 05 : ''
    input_dir = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\75_pc_expanded_manual_labels_with_aligned_HM_images_T"  
    # Directory to save processed files
    # example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels'
    # example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_125'
    # example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_150'
    # example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_175'
    # example 05 : ''
    output_dir = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_175"  
    
    processor = ImageAnnotationProcessor(
        input_dir=input_dir,
        output_dir=output_dir,
        slice_size=640 # either using a derivative (512) of the size used originally (500) in the script or using the recommended size for YOLO models (640)
    )
    processor.process_all()

# notes: slice into 640x640 pieces (for the object detection model training step) ~ automated labels

In [None]:
import os
import json
from PIL import Image
from tqdm import tqdm
import glob
from pathlib import Path
import numpy as np

class ImageAnnotationProcessor:
    def __init__(self, input_dir, output_dir, slice_size=640):
        """
        Initialize the processor with input/output directories and slice size.
        
        Args:
            input_dir (str): Directory containing images and JSON annotations
            output_dir (str): Directory to save processed images and labels
            slice_size (int): Size of image slices (default: 640 for YOLO compatibility)
        """
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.slice_size = slice_size
        os.makedirs(output_dir, exist_ok=True)
        
        # Statistics
        self.stats = {
            "total_files": 0,
            "processed_files": 0,
            "total_slices": 0,
            "saved_slices": 0,
            "skipped_slices": 0,
            "annotations_processed": 0,
            "failed_annotations": 0,
            "rectangle_annotations": 0,
            "polygon_annotations": 0
        }
        
    def find_matching_files(self):
        """Find all image files and their corresponding JSON annotations."""
        image_files = glob.glob(os.path.join(self.input_dir, "*.jpg")) + \
                     glob.glob(os.path.join(self.input_dir, "*.png"))
        pairs = []
        
        self.stats["total_files"] = len(image_files)
        
        for image_path in image_files:
            base_name = Path(image_path).stem
            json_path = os.path.join(self.input_dir, f"{base_name}.json")
            
            if os.path.exists(json_path):
                pairs.append((image_path, json_path))
            else:
                print(f"Warning: No matching JSON found for {image_path}")
                
        return pairs

    def normalize_points(self, points, shape_type):
        """
        Convert different annotation formats to a standard [x_min, y_min, x_max, y_max] format.
        
        Args:
            points: Points from the JSON file
            shape_type: Type of shape ('rectangle', 'polygon', etc.)
            
        Returns:
            list: Normalized points as [[x_min, y_min], [x_max, y_max]] or None if invalid
        """
        try:
            if shape_type == "rectangle":
                # Rectangle already has two diagonal points
                self.stats["rectangle_annotations"] += 1
                return points
                
            elif shape_type == "polygon":
                self.stats["polygon_annotations"] += 1
                # Extract min/max coordinates from polygon
                points_array = np.array(points)
                x_min, y_min = np.min(points_array, axis=0)
                x_max, y_max = np.max(points_array, axis=0)
                return [[x_min, y_min], [x_max, y_max]]
                
            else:
                print(f"Unsupported shape type: {shape_type}")
                return None
                
        except Exception as e:
            print(f"Error normalizing points: {e}, shape_type: {shape_type}")
            return None

    def convert_to_yolov8_format(self, points, slice_bounds, image_bounds):
        """
        Convert bounding box coordinates to YOLO format with boundary checking.
        
        Args:
            points: Original bounding box points in format [[x_min, y_min], [x_max, y_max]]
            slice_bounds: (x_min, y_min, x_max, y_max) of the current slice
            image_bounds: (width, height) of the slice
        
        Returns:
            list: YOLO format coordinates or None if invalid
        """
        try:
            x_min, y_min = points[0]
            x_max, y_max = points[1]
            
            # Calculate coordinates relative to slice
            slice_x_min, slice_y_min, slice_x_max, slice_y_max = slice_bounds
            
            # Check if box intersects slice
            if (x_max < slice_x_min or x_min > slice_x_max or 
                y_max < slice_y_min or y_min > slice_y_max):
                return None
            
            # Calculate new coordinates relative to slice
            new_x_min = max(0, min(x_min - slice_x_min, image_bounds[0]))
            new_y_min = max(0, min(y_min - slice_y_min, image_bounds[1]))
            new_x_max = max(0, min(x_max - slice_x_min, image_bounds[0]))
            new_y_max = max(0, min(y_max - slice_y_min, image_bounds[1]))
            
            # Skip if box is too small after clipping
            if new_x_max - new_x_min < 2 or new_y_max - new_y_min < 2:
                return None
                
            # Convert to YOLO format (class_id, x_center, y_center, width, height)
            box_width = new_x_max - new_x_min
            box_height = new_y_max - new_y_min
            x_center = new_x_min + box_width / 2
            y_center = new_y_min + box_height / 2
            
            # Validate final coordinates
            if (x_center < 0 or x_center > image_bounds[0] or 
                y_center < 0 or y_center > image_bounds[1] or 
                box_width <= 0 or box_height <= 0):
                return None
                
            return [0,  # class_id (assuming single class)
                   x_center / image_bounds[0],
                   y_center / image_bounds[1],
                   box_width / image_bounds[0],
                   box_height / image_bounds[1]]
                   
        except Exception as e:
            print(f"Error converting coordinates: {e}")
            self.stats["failed_annotations"] += 1
            return None

    def process_image(self, image_path, json_path):
        """
        Process a single image and its annotations.
        
        Args:
            image_path (str): Path to the image file
            json_path (str): Path to the JSON annotation file
        """
        try:
            # Load image and annotations
            image = Image.open(image_path)
            image_width, image_height = image.size
            
            with open(json_path, 'r') as f:
                data = json.load(f)
            
            # Calculate number of slices for progress bar
            num_slices_x = (image_width - 1) // self.slice_size + 1
            num_slices_y = (image_height - 1) // self.slice_size + 1
            total_slices = num_slices_x * num_slices_y
            self.stats["total_slices"] += total_slices
            
            base_name = Path(image_path).stem
            processed_count = 0
            
            # Process each slice
            with tqdm(total=total_slices, desc=f"Processing {base_name}") as pbar:
                for x in range(0, image_width, self.slice_size):
                    for y in range(0, image_height, self.slice_size):
                        # Define slice boundaries
                        actual_slice_width = min(self.slice_size, image_width - x)
                        actual_slice_height = min(self.slice_size, image_height - y)
                        
                        if actual_slice_width < self.slice_size or actual_slice_height < self.slice_size:
                            # Skip partial slices
                            pbar.update(1)
                            self.stats["skipped_slices"] += 1
                            continue
                            
                        slice_bounds = (x, y, x + self.slice_size, y + self.slice_size)
                        slice_image = image.crop(slice_bounds)
                        
                        # Process annotations for this slice
                        slice_boxes = []
                        for shape in data['shapes']:
                            self.stats["annotations_processed"] += 1
                            
                            # Get shape type (default to rectangle for backward compatibility)
                            shape_type = shape.get("shape_type", "rectangle")
                            
                            # Normalize points to standard format
                            normalized_points = self.normalize_points(shape["points"], shape_type)
                            if not normalized_points:
                                continue
                                
                            # Convert to YOLO format
                            yolo_box = self.convert_to_yolov8_format(
                                normalized_points, 
                                slice_bounds, 
                                (self.slice_size, self.slice_size)
                            )
                            if yolo_box:
                                slice_boxes.append(yolo_box)
                        
                        # Save slice and annotations if boxes are found
                        if slice_boxes:
                            slice_filename = f"{base_name}_slice_{x}_{y}"
                            
                            # Save image
                            image_output = os.path.join(self.output_dir, f"{slice_filename}.png")
                            slice_image.save(image_output)
                            
                            # Save annotations
                            label_output = os.path.join(self.output_dir, f"{slice_filename}.txt")
                            with open(label_output, 'w') as f:
                                for box in slice_boxes:
                                    # Convert all values to Python native types
                                    class_id = int(box[0])
                                    x_center = float(box[1])
                                    y_center = float(box[2])
                                    width = float(box[3])
                                    height = float(box[4])
                                    # Write to file using Python native types
                                    f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
                            
                            processed_count += 1
                            self.stats["saved_slices"] += 1
                        else:
                            self.stats["skipped_slices"] += 1
                            
                        pbar.update(1)
                        
            return processed_count
            
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
            return 0

    def process_all(self):
        """Process all matching image-annotation pairs in the input directory."""
        pairs = self.find_matching_files()
        if not pairs:
            print("No valid image-annotation pairs found!")
            return
            
        total_processed = 0
        for image_path, json_path in pairs:
            processed = self.process_image(image_path, json_path)
            total_processed += processed
            self.stats["processed_files"] += 1
            
        self.print_statistics()
        print(f"\nProcessing complete! Generated {total_processed} valid slices.")
    
    def print_statistics(self):
        """Print processing statistics"""
        print("\n====== Processing Statistics ======")
        print(f"Total image files found: {self.stats['total_files']}")
        print(f"Files successfully processed: {self.stats['processed_files']}")
        print(f"Total slices created: {self.stats['saved_slices']} / {self.stats['total_slices']}")
        print(f"Slices skipped (no annotations): {self.stats['skipped_slices']}")
        print(f"Annotation Statistics:")
        print(f"  - Total processed: {self.stats['annotations_processed']}")
        print(f"  - Rectangle annotations: {self.stats['rectangle_annotations']}")
        print(f"  - Polygon annotations: {self.stats['polygon_annotations']}")
        print(f"  - Failed annotations: {self.stats['failed_annotations']}")
        print("==================================")

# Example usage
if __name__ == "__main__":
    # Directory containing images and JSON files
    # example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\HM_aligned_to_SC_renamed_files_automated_labels_T'
    # example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\25_pc_expanded_automated_labels_with_aligned_HM_images_T'
    # example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\50_pc_expanded_automated_labels_with_aligned_HM_images_T'
    # example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\75_pc_expanded_automated_labels_with_aligned_HM_images_T'
    # example 05 : ''
    input_dir = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\75_pc_expanded_automated_labels_with_aligned_HM_images_T"  
    # Directory to save processed files
    # example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_slicing_output_automated_labels_T'
    # example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_125'
    # example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_150'
    # example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_175'
    # example 05 : ''
    output_dir = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_175"  
    
    processor = ImageAnnotationProcessor(
        input_dir=input_dir,
        output_dir=output_dir,
        slice_size=640  # either using a derivative (512) of the size used originally (500) in the script or using the recommended size for YOLO models (640)
    )
    processor.process_all()

# notes: divide into test/train/val data sets (for the cross-validation splitting step)

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split
from tqdm import tqdm

def create_yolo_directories(output_path):
    """
    Create the standard YOLO dataset directory structure.
    YOLO expects a specific directory layout:
    - train/images/ and train/labels/
    - val/images/ and val/labels/
    - test/images/ and test/labels/
    
    Parameters:
        output_path (str): Base directory where the dataset structure will be created
    """
    for split in ['train', 'valid', 'test']:
        # Create separate directories for images and their corresponding labels
        os.makedirs(os.path.join(output_path, split, 'images'), exist_ok=True)
        os.makedirs(os.path.join(output_path, split, 'labels'), exist_ok=True)

def get_file_pairs(source_path):
    """
    Find matching pairs of images and their label files.
    For YOLO, each image must have a corresponding label file with the same name but .txt extension.
    ONLY includes pairs where both the image and label files exist.
    
    Parameters:
        source_path (str): Directory containing the image and label files
    
    Returns:
        list: Pairs of (image_file, label_file) that exist in the source directory
    """
    # Get all image files
    image_files = [f for f in os.listdir(source_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
    # Get all label files
    label_files = [f for f in os.listdir(source_path) if f.endswith('.txt')]
    
    file_pairs = []
    
    # Only include images that have matching label files
    for img_file in image_files:
        base_name = os.path.splitext(img_file)[0]  # Remove extension
        label_file = base_name + '.txt'  # YOLO format label file
        
        # Strict check: only include pairs where both files exist
        if label_file in label_files:
            # Verify the label file is not empty (contains bounding boxes)
            label_path = os.path.join(source_path, label_file)
            if os.path.getsize(label_path) > 0:
                file_pairs.append((img_file, label_file))
            else:
                print(f"Skipping {img_file}: label file exists but is empty (no bounding boxes)")
        else:
            print(f"Skipping {img_file}: no corresponding label file found")
    
    return file_pairs

def copy_file_pair(source_path, output_path, split, image_file, label_file):
    """
    Copy an image and its corresponding label file to the appropriate YOLO directories.
    Maintains the relationship between images and their annotations.
    
    Parameters:
        source_path (str): Source directory containing original files
        output_path (str): Base directory of the YOLO dataset
        split (str): Dataset split ('train', 'valid', or 'test')
        image_file (str): Name of the image file
        label_file (str): Name of the label file
    """
    # Copy image to the images subdirectory of the appropriate split
    shutil.copy(
        os.path.join(source_path, image_file),
        os.path.join(output_path, split, 'images', image_file)
    )
    
    # Copy label to the labels subdirectory of the appropriate split
    shutil.copy(
        os.path.join(source_path, label_file),
        os.path.join(output_path, split, 'labels', label_file)
    )

def split_dataset(source_folder, output_folder, train_size=0.7, val_size=0.15, test_size=0.15):
    """
    Split the dataset into train, validation, and test sets while maintaining
    the relationship between images and their label files.
    
    The function expects that the source_folder contains:
    - Image files (.png, .jpg, .jpeg)
    - YOLO format label files (.txt) with the same names as their corresponding images
    
    Parameters:
        source_folder (str): Directory containing the image and label files
        output_folder (str): Directory where the split dataset will be created
        train_size (float): Proportion of data for training (default: 0.7)
        val_size (float): Proportion of data for validation (default: 0.15)
        test_size (float): Proportion of data for testing (default: 0.15)
    """
    print(f"Processing source directory: {source_folder}")
    
    # Create fresh YOLO directory structure
    if os.path.exists(output_folder):
        print(f"The output directory '{output_folder}' already exists. It will be replaced.")
        shutil.rmtree(output_folder)
    print(f"Creating output directory: '{output_folder}'")
    create_yolo_directories(output_folder)
    
    # Get all valid image-label pairs
    file_pairs = get_file_pairs(source_folder)
    total_pairs = len(file_pairs)
    print(f"Found {total_pairs} valid image-label pairs with bounding boxes")
    
    if not file_pairs:
        print("No valid image-label pairs found. Exiting.")
        return
    
    # Perform the train-val-test split while keeping pairs together
    train_pairs, temp_pairs = train_test_split(
        file_pairs,
        test_size=(val_size + test_size),
        random_state=42
    )
    
    val_pairs, test_pairs = train_test_split(
        temp_pairs,
        test_size=test_size/(val_size + test_size),
        random_state=42
    )
    
    # Process each split
    splits = {
        'train': train_pairs,
        'valid': val_pairs,
        'test': test_pairs
    }
    
    # Copy files to their respective directories with progress tracking
    for split, pairs in splits.items():
        print(f"\nProcessing {split} split ({len(pairs)} pairs)")
        for img_file, label_file in tqdm(pairs, desc=f"Copying {split} files"):
            try:
                copy_file_pair(source_folder, output_folder, split, img_file, label_file)
            except Exception as e:
                print(f"Error processing files {img_file}, {label_file}: {str(e)}")
    
    # Print final statistics
    print("\nDataset split complete!")
    print(f"Train set: {len(train_pairs)} pairs ({len(train_pairs)/total_pairs*100:.1f}%)")
    print(f"Validation set: {len(val_pairs)} pairs ({len(val_pairs)/total_pairs*100:.1f}%)")
    print(f"Test set: {len(test_pairs)} pairs ({len(test_pairs)/total_pairs*100:.1f}%)")

# Example usage
# default = '/Users/gustavszviedris/Desktop/vet_images_sliced_copy/Training' ; 
# example 01 = 'E:\\-_EDI_-\\notes\\havetai+vetcyto\\vet_images_sliced\\Training' ; 
# example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels'
# example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels'
# example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_125'
# example 05 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_125'
# example 06 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_150'
# example 07 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_150'
# example 08 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_manual_labels_pc_175'
# example 09 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_175'
# example 10 : ''
source_folder = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\640_T_slicing_output_automated_labels_pc_175' ; 
# default = '/Users/gustavszviedris/Desktop/vet_images_sliced_split' ; 
# example 01 = 'E:\\-_EDI_-\\notes\\havetai+vetcyto\\vet_images_sliced_split' ; 
# example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640'
# example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640'
# example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_125'
# example 05 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_125'
# example 06 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_150'
# example 07 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150'
# example 08 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_175'
# example 09 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_175'
# example 10 : ''
output_folder = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_175' ; 

split_dataset(source_folder, output_folder)

# notes: rewriting permissions for files (such as undesirable results)

In [None]:
# o-c

import os

def set_permissions_recursive(path, dir_mode=0o777, file_mode=0o666):
    """
    Recursively set permissions for a folder.
    
    Directories are set to `dir_mode` (default 777: read, write, execute)
    Files are set to `file_mode` (default 666: read and write).
    
    Note: For directories the execute bit is necessary to access their contents.
    """
    if not os.path.isdir(path):
        raise NotADirectoryError(f"'{path}' is not a valid directory.")
    
    # Change permission for the root directory
    os.chmod(path, dir_mode)
    
    # Walk through all subdirectories and files
    for root, dirs, files in os.walk(path):
        for d in dirs:
            os.chmod(os.path.join(root, d), dir_mode)
        for f in files:
            os.chmod(os.path.join(root, f), file_mode)

# --- Example Usage ---

# Provide your results folder path here:
results_folder = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_500\\results"  # <-- update this path accordingly ~ default : '/path/to/your/results_folder'

try:
    set_permissions_recursive(results_folder)
    print(f"Permissions updated for folder: {results_folder}")
except Exception as e:
    print(f"Error: {e}")

# a-c

import os
import subprocess
import stat

def set_permissions(path):
    """Recursively make directory and contents readable/writable by everyone"""
    try:
        if os.name == 'nt':  # Windows
            # Remove read-only attributes from files/directories
            subprocess.run(f'attrib -r "{os.path.abspath(path)}" /s /d', shell=True, check=True)
        else:  # Unix/Linux/macOS
            # Recursively set 777 permissions (rwx for all)
            for root, dirs, files in os.walk(path):
                for d in dirs:
                    os.chmod(os.path.join(root, d), stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
                for f in files:
                    os.chmod(os.path.join(root, f), stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
            # Set permissions for the root directory itself
            os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
            
        print(f"Successfully set permissions for: {path}")
    except Exception as e:
        print(f"Error setting permissions: {str(e)}")
        raise

# Example usage
results_path = r"C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_500\\results"  # either "C:\Users\user\project\results" for windows or "/home/user/project/results" for Linux/macOS
set_permissions(results_path)

# notes: cleaning up garbage files (such as discarded results)

In [None]:
import shutil

# Navigate to the data set folder & delete any read-only folders, in case that they get created by accident
! echo "# test 01" && cd "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8" && dir
#shutil.rmtree("C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640\\results")
#hutil.rmtree("C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640\\runs")


# notes: counting images & instances in data sets

In [None]:
import os
import numpy as np

# provide address
# 'original-00' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\02nd_task-03\\October15.v1i.yolov8\\data.yaml'
# 'attempt-00' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\google_colab-splitting_output_manual_labels-20250306T170409Z-001\\splitting_output_manual_labels_T_500\\data.yaml'
# 'attempt-01' = [not used for any step]
# 'attempt-02' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_500\\data.yaml'
# 'attempt-03' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_640\\data.yaml'
# 'attempt-04' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_automated_labels_T_640\\data.yaml'
# 'attempt-05' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_640_pc_125\\data.yaml'
# 'attempt-06' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_automated_labels_T_640_pc_125\\data.yaml'
# 'attempt-07' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_150\\data.yaml'
# 'attempt-08' = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\data.yaml'
# 'attempt-09' = ?
# 'attempt-10' = ?
data_yaml_path = "C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\data.yaml"
# Define paths (adjust as needed) ~ train or valid or test ~ 'train/labels' vs 'valid/labels' vs 'test/labels'
labels_path = os.path.join(os.path.dirname(data_yaml_path), 'train/labels')

# Get all txt files (labels)
label_files = [f for f in os.listdir(labels_path) if f.endswith('.txt')]
image_count = len(label_files)

# Count object instances
instance_count = 0
for label_file in label_files:
    file_path = os.path.join(labels_path, label_file)
    with open(file_path, 'r') as f:
        instance_count += len(f.readlines())

print(f"Training/Validating/Testing set: {image_count} images with {instance_count} object instances...")

#  notes: reviewing model details for comparison

In [None]:
from ultralytics import YOLO
# "original-00" = "task-03" = "experiment-step-02" [ManualAnnotations] ~ 100%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\02nd_task-03\\October15.v1i.yolov8\\results\\200_epochs-\\weights\\'
# “attempt-00” = “experiment-step-04” [ManualAnnotations] ~ 100% (models missing! deleted to make space? transferred to external memory drive?)
# model_path = '' 
# “attempt-02” = “experiment-step-05” [ManualAnnotations] ~ 100%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_500\\results\\yolov8s_training_20250224_005615\\weights\\'
# "attempt-03" = "experiment-step-06" [ManualAnnotations] ~ 100%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_640\\results\\yolov8s_training_20250226_013840\\weights\\'
# "attempt-04" = "experiment-step-07" [AutomatedAnnotations] ~ 100%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_automated_labels_T_640\\results\\yolov8s_training_20250308_124715\\weights\\'
# “attempt-05” = “experiment-step-08” [ManualAnnotations] ~ 125%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_640_pc_125\\results\\yolov8s_training_20250308_214357\\\weights\\'
# “attempt-06” = “experiment-step-09” [AutomatedAnnotations] ~ 125%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_125\\results\\yolov8s_training_20250311_013323\\weights\\'
# "attempt-07" = "experiment-step-10" [ManualAnnotations] ~ 150%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_150\\results\\yolov8s_training_20250311_172821\\weights\\'
# "attempt-08" = "experiment-step-11" [AutomatedAnnotations] ~ 150%
# model_path = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\results\\yolov8s_training_20250311_221910\\weights\\'
# "attempt-09" = ?
# 'attempt-10' = ?
# Load either your best or last model
print("\nLoading the last resulting model...")
# default = '{project_path}/{name}/weights/last.pt'
model_path_last = f'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\results\\yolov8s_training_20250311_221910\\weights\\last.pt'  # last.pt or best.pt
model_last = YOLO(model_path_last)
# Access model information
print("\nmodel's info:\n", model_last.info())  # Basic model information
print("\nmodel's names:\n", model_last.names)   # Class names
#print("\nmodel's ckpt:\n", model_last.ckpt)    # Checkpoint information (uses up too much space)
# If trained with validation, metrics are stored in model.metrics
if hasattr(model_last, 'metrics'):
    print("\nSaved metrics:", model_last.metrics)

# default = 'path/to/your/validation/data.yaml'
# "original-00" = "task-03" = "experiment-step-02" [ManualAnnotations] ~ 100%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\02nd_task-03\\October15.v1i.yolov8\\data.yaml'
# “attempt-00” = “experiment-step-04” [ManualAnnotations] ~ 100% (models missing! deleted to make space? transferred to external memory drive?)
# data yaml = '' 
# “attempt-02” = “experiment-step-05” [ManualAnnotations] ~ 100%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_500\\data.yaml'
# "attempt-03" = "experiment-step-06" [ManualAnnotations] ~ 100%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_640\\data.yaml'
# "attempt-04" = "experiment-step-07" [AutomatedAnnotations] ~ 100%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_automated_labels_T_640\\data.yaml'
# “attempt-05” = “experiment-step-08” [ManualAnnotations] ~ 125%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\_legacy_\\splitting_output_manual_labels_T_640_pc_125\\data.yaml'
# “attempt-06” = “experiment-step-09” [AutomatedAnnotations] ~ 125%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_125\\data.yaml'
# "attempt-07" = "experiment-step-10" [ManualAnnotations] ~ 150%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_150\\data.yaml'
# "attempt-08" = "experiment-step-11" [AutomatedAnnotations] ~ 150%
# data yaml = 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\data.yaml'
# "attempt-09" = ?
# 'attempt-10' = ?
test_results = model_last.val(data='C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\data.yaml', split='') # train or val or test
print(f"Box Precision: {test_results.box.p}")
print(f"Box Recall: {test_results.box.r}")
print(f"Box MAP50: {test_results.box.map50}")
print(f"Box MAP50-95: {test_results.box.map}")

# Extract and print specific metrics for the last model
"""
"""
# Load either your best or last model
print("\nLoading the best resulting model...")
# default = '{project_path}/{name}/weights/best.pt'
model_path_best = f'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\results\\yolov8s_training_20250311_221910\\weights\\best.pt'  # best.pt or last.pt
model_best = YOLO(model_path_best)
# Access model information
print("\nmodel's info:\n", model_best.info())  # Basic model information
print("\nmodel's names:\n", model_best.names)   # Class names
#print("\nmodel's ckpt:\n", model_best.ckpt)    # Checkpoint information (uses up too much space)
# If trained with validation, metrics are stored in model.metrics
if hasattr(model_best, 'metrics'):
    print("\nSaved metrics:", model_best.metrics)

# default = 'path/to/your/validation/data.yaml'
test_results = model_best.val(data='C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150\\data.yaml', split='') # train or val or test
print(f"Box Precision: {test_results.box.p}")
print(f"Box Recall: {test_results.box.r}")
print(f"Box MAP50: {test_results.box.map50}")
print(f"Box MAP50-95: {test_results.box.map}")

# Extract and print specific metrics for the best model
"""
"""

# Notes: The End.