## Notes: The second alternative new code for the object detection model training step (the old code was discarded due to mismatch).

In [None]:
# Install required packages
try:
    import pip
    packages = ['ultralytics', 'torch', 'torchvision', 'torchaudio']
    for package in packages:
        !pip install {package}
    print("\n! ~ All required packages installed successfully ~ !\n")
except Exception as e:
    print(f"Error installing packages: {str(e)}")
    raise

In [None]:
# Import necessary packages with error handling
try:
    from ultralytics import YOLO
    from matplotlib import pyplot as plt
    from PIL import Image
    from ultralytics.data.utils import check_det_dataset
    from ultralytics.utils.plotting import plot_results
    from pathlib import Path
    import torch
    import yaml
    import os
    import json
    import shutil
    from datetime import datetime
    import stat
    import subprocess
    from ultralytics import settings
    from ultralytics.utils.checks import check_yolo
    #from ultralytics.cfg import get_cfg, set_cfg
    from ultralytics.utils import DEFAULT_CFG, ROOT
    from ultralytics.data.dataset import YOLODataset
    from ultralytics.data.base import BaseDataset
    import torch
    import gc
    print("\n! ~ All required libraries imported successfully ~ !\n")
except ImportError as e:
    print(f"Error importing required packages: {str(e)}")
    raise

In [None]:
# preparing helper functions
def validate_yaml_content(yaml_path):
    """
    Validate the content of the YAML file for cross-validation setup
    Returns tuple of (is_valid, error_message)
    """
    required_keys = ['train', 'val', 'test', 'nc', 'names']
    
    try:
        with open(yaml_path, 'r') as stream:
            data = yaml.safe_load(stream)
            
            # Check for required keys
            missing_keys = [key for key in required_keys if key not in data]
            if missing_keys:
                return False, f"Missing required keys in YAML: {missing_keys}"
            
            # Get the base directory (where data.yaml is located)
            base_dir = os.path.dirname(yaml_path)
            
            # Validate paths exist by joining with base directory
            for key in ['train', 'val', 'test']:
                if key in data and data[key]:
                    # Remove '../' from the path as we're already in the base directory
                    cleaned_path = data[key].replace('../', '')
                    full_path = os.path.join(base_dir, cleaned_path)
                    if not os.path.exists(full_path):
                        return False, f"Path specified in YAML for {key} does not exist: {full_path}"
            
            # Validate class names
            if len(data['names']) != data['nc']:
                return False, f"Number of class names ({len(data['names'])}) doesn't match nc ({data['nc']})"
                
            return True, "YAML content validation successful"
            
    except Exception as e:
        return False, f"Error validating YAML content: {str(e)}"

def validate_image_label_pairs(base_path):
    """
    Validate that each image has a corresponding label file in train/valid/test setup
    Returns tuple of (is_valid, error_message)
    """
    mismatched_pairs = []
    missing_labels = []
    
    # Check all three directories: train, valid, test
    for subset in ['train', 'valid', 'test']:
        img_dir = os.path.join(base_path, subset, 'images')
        label_dir = os.path.join(base_path, subset, 'labels')
        
        if not os.path.exists(img_dir) or not os.path.exists(label_dir):
            return False, f"Directory not found: {img_dir} or {label_dir}"
        
        # Get all image files
        image_files = [f for f in os.listdir(img_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
        
        for img_file in image_files:
            # Convert image filename to expected label filename
            label_file = os.path.splitext(img_file)[0] + '.txt'
            label_path = os.path.join(label_dir, label_file)
            
            if not os.path.exists(label_path):
                missing_labels.append(f"{subset}/{img_file}")
            else:
                # Validate label file format
                try:
                    with open(label_path, 'r') as f:
                        lines = f.readlines()
                        for line in lines:
                            # Check if label format is correct (class x_center y_center width height)
                            parts = line.strip().split()
                            if len(parts) != 5:
                                mismatched_pairs.append(f"{subset}/{img_file}")
                                break
                            # Validate class id and coordinates
                            class_id = int(parts[0])
                            coords = [float(p) for p in parts[1:]]
                            if class_id < 0 or any(c < 0 or c > 1 for c in coords):
                                mismatched_pairs.append(f"{subset}/{img_file}")
                                break
                except:
                    mismatched_pairs.append(f"{subset}/{img_file}")
    
    if missing_labels or mismatched_pairs:
        error_msg = ""
        if missing_labels:
            error_msg += f"\nMissing label files for: {missing_labels}"
        if mismatched_pairs:
            error_msg += f"\nInvalid label format in: {mismatched_pairs}"
        return False, error_msg
    
    return True, "All image-label pairs validated successfully"

def get_device_config():
    """
    Detect and configure device settings
    Returns tuple of (device, batch_size, device_info)
    """
    device_info = {}
    
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        device_info = {
            "device_name": torch.cuda.get_device_name(0),
            "gpu_memory": f"{gpu_memory:.2f} GB",
            "cuda_version": torch.version.cuda
        }
        
        # Batch size based on available GPU memory
        if gpu_memory >= 6:
            batch_size = 16
        elif gpu_memory >= 4:
            batch_size = 8
        else:
            batch_size = 4
            
        return "cuda", batch_size, device_info
    else:
        device_info = {
            "device_name": "CPU",
            "memory": "N/A",
            "reason": "No GPU detected"
        }
        return "cpu", 4, device_info

def debug_paths(yaml_path):
    with open(yaml_path) as f:
        data = yaml.safe_load(f)
        base_dir = os.path.dirname(yaml_path)
        print("\n[DEBUG] Path resolution:")
        for split in ['train', 'val', 'test']:
            original = data.get(split, '')
            abs_path = os.path.abspath(os.path.join(base_dir, original))
            exists = os.path.exists(abs_path)
            print(f"{split.upper():<6} | Original: {original:<20} | Absolute: {abs_path} | Exists: {exists}")


def create_directory(path):
    """
    Creates a directory with full permissions (777) and clears read-only attributes.
    """
    try:
        original_umask = os.umask(0)  # Temporarily set umask to 0
        Path(path).mkdir(parents=True, exist_ok=True, mode=0o777)
    finally:
        os.umask(original_umask)  # Restore original umask

    try:
        # Windows: Remove read-only attribute
        if os.name == 'nt':
            subprocess.run(f'attrib -r "{os.path.abspath(path)}" /s /d', shell=True, check=True)
        # Set permissions for all platforms
        os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
    except Exception as e:
        print(f"Error creating directory {path}: {str(e)}")
        raise

def safe_delete(path):
    """
    Force-deletes directories with proper permission handling
    """
    try:
        # Remove read-only attributes first
        subprocess.run(f'attrib -r "{os.path.abspath(path)}" /s /d', shell=True, check=True)
        
        # Delete with retries
        shutil.rmtree(path, onerror=lambda func, path, _: func(path))
        
    except Exception as e:
        print(f"Error deleting {path}: {str(e)}")
        raise

def check_directory_contents(directory):
    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist!")
        return
    
    print(f"Contents of {directory}:")
    #for item in os.listdir(directory):
    #    print(f"  - {item}")
    
    lst = os.listdir(directory) # your directory path
    number_files = len(lst)
    print(f"  - {number_files} items")

def set_permissions_recursive(path, mode):
    """
    Recursively sets permissions on all directories and files in the given path.
    """
    for root, dirs, files in os.walk(path):
        for dir_name in dirs:
            dir_path = os.path.join(root, dir_name)
            try:
                os.chmod(dir_path, mode)
                if os.name == 'nt':
                    subprocess.run(f'attrib -r "{dir_path}"', shell=True, check=True)
            except Exception as e:
                print(f"Error setting permissions for {dir_path}: {str(e)}")
        for file_name in files:
            file_path = os.path.join(root, file_name)
            try:
                os.chmod(file_path, mode)
                if os.name == 'nt':
                    subprocess.run(f'attrib -r "{file_path}"', shell=True, check=True)
            except Exception as e:
                print(f"Error setting permissions for {file_path}: {str(e)}")

def remove_corrupted_files(folder):
    from PIL import Image
    corrupt_files = []
    for file in Path(folder).rglob('*.*'):
        try:
            if file.suffix.lower() in ['.jpg', '.jpeg', '.png']:
                with Image.open(file) as img:
                    img.verify()
        except (IOError, SyntaxError) as e:
            corrupt_files.append(str(file))
            os.remove(file)
            label_file = Path(str(file).replace('images', 'labels')).with_suffix('.txt')
            if label_file.exists():
                os.remove(label_file)
    return corrupt_files

def verify_labels(label_dir, verbose=False, max_errors=100):
    """
    Verify that label files follow the YOLO format requirements.
    Optimized to handle both NumPy and Python native float values while minimizing output.
    
    Args:
        label_dir (str): Directory containing label files
        verbose (bool): Whether to print progress information
        max_errors (int): Maximum number of errors to collect before stopping
        
    Returns:
        list: List of files with formatting issues and their errors
    """
    from pathlib import Path
    from tqdm.notebook import tqdm
    import os
    
    bad_labels = []
    label_files = list(Path(label_dir).glob('*.txt'))
    total_files = len(label_files)
    
    # Use tqdm for a progress bar instead of printing status for each file
    progress_bar = tqdm(label_files, desc=f"Validating {os.path.basename(label_dir)}", 
                         disable=not verbose)
    
    for label_file in progress_bar:
        try:
            with open(label_file) as f:
                lines = f.readlines()
                for line_num, line in enumerate(lines, 1):
                    parts = line.strip().split()
                    
                    # Check if we have exactly 5 values per line (class, x, y, w, h)
                    if len(parts) != 5:
                        raise ValueError(f"Line {line_num}: Expected 5 values, got {len(parts)}")
                    
                    # First value should be an integer class ID
                    try:
                        cls = int(parts[0])
                    except ValueError:
                        raise ValueError(f"Line {line_num}: Class ID must be an integer, got '{parts[0]}'")
                    
                    # The remaining values should be floats between 0 and 1
                    for i, (name, value) in enumerate(zip(['x', 'y', 'w', 'h'], parts[1:5])):
                        try:
                            # Convert to Python native float (handles both Python floats and NumPy floats)
                            val = float(value)
                            # Check bounds
                            if not (0 <= val <= 1):
                                raise ValueError(f"Line {line_num}: {name.upper()} value '{val}' is out of bounds (0-1)")
                        except (ValueError, TypeError) as e:
                            raise ValueError(f"Line {line_num}: {name.upper()} value '{value}' is not a valid float: {str(e)}")
                    
        except Exception as e:
            bad_labels.append(f"{label_file.name}: {str(e)}")
            # Limit the number of errors we collect to avoid memory issues
            if len(bad_labels) >= max_errors:
                bad_labels.append(f"... and potentially more (stopped after {max_errors} errors)")
                break
    
    # Don't update the progress bar too often to reduce output load
    progress_bar.close()
    
    # Return only the number of issues if there are too many to display
    if len(bad_labels) > 50:
        return [f"Found {len(bad_labels)} label issues. First 10 examples:", *bad_labels[:10], "..."]
    
    return bad_labels

def validate_label_formats(train_labels_dir, val_labels_dir, verbose=False):
    """
    Wrapper function that validates both training and validation labels
    while producing minimal output.
    
    Args:
        train_labels_dir (str): Directory containing training labels
        val_labels_dir (str): Directory containing validation labels
        verbose (bool): Whether to print progress information
        
    Returns:
        bool: True if validation succeeds, False otherwise
    """
    print("\nValidating label formats...")
    
    # Check training labels
    train_label_issues = verify_labels(train_labels_dir, verbose=verbose)
    
    # Check validation labels
    val_label_issues = verify_labels(val_labels_dir, verbose=verbose)
    
    # Summarize results without printing too much data
    if train_label_issues or val_label_issues:
        print(f"Label validation failed:")
        if train_label_issues:
            print(f"- Training labels: {len(train_label_issues)} issues")
        if val_label_issues:
            print(f"- Validation labels: {len(val_label_issues)} issues")
            
        # Write detailed errors to a file instead of printing to screen
        with open("label_validation_errors.log", "w") as f:
            f.write("TRAINING LABEL ISSUES:\n")
            f.write("\n".join(train_label_issues) + "\n\n")
            f.write("VALIDATION LABEL ISSUES:\n")
            f.write("\n".join(val_label_issues))
            
        print(f"Detailed error log written to 'label_validation_errors.log'")
        return False
    
    print("All label formats validated successfully!")
    return True

def validate_label_ranges(label_dir, max_errors=100):
    """
    Validates that all label values are within acceptable ranges for YOLO format.
    Optimized to handle large datasets without overwhelming Jupyter outputs.
    
    Args:
        label_dir (str): Directory containing label files
        max_errors (int): Maximum number of errors to collect
        
    Returns:
        list: List of files with value range issues
    """
    from pathlib import Path
    from tqdm.notebook import tqdm
    import os
    
    invalid = []
    label_files = list(Path(label_dir).glob('*.txt'))
    
    # Use tqdm for a progress bar instead of printing for each file
    progress_bar = tqdm(label_files, desc=f"Validating ranges ({os.path.basename(label_dir)})")
    
    for lbl_file in progress_bar:
        try:
            with open(lbl_file) as f:
                for line_num, line in enumerate(f, 1):
                    parts = line.strip().split()
                    if len(parts) != 5:
                        invalid.append(f"{lbl_file.name}:{line_num}: Bad field count")
                        continue
                    
                    try:
                        # Convert each value explicitly to handle NumPy float64 values
                        cls = float(parts[0]) 
                        x = float(parts[1])
                        y = float(parts[2])
                        w = float(parts[3])
                        h = float(parts[4])
                        
                        if not (0 <= x <= 1 and 0 <= y <= 1):
                            invalid.append(f"{lbl_file.name}:{line_num}: Center out of bounds (x={x}, y={y})")
                        if not (0 < w <= 1 and 0 < h <= 1):
                            invalid.append(f"{lbl_file.name}:{line_num}: Dimensions out of bounds (w={w}, h={h})")
                    except ValueError as e:
                        invalid.append(f"{lbl_file.name}:{line_num}: Value error ({str(e)})")
                    except Exception as e:
                        invalid.append(f"{lbl_file.name}:{line_num}: {str(e)}")
        except Exception as e:
            invalid.append(f"{lbl_file.name}: File error ({str(e)})")
        
        # Limit the number of errors we collect
        if len(invalid) >= max_errors:
            invalid.append(f"... and potentially more (stopped after {max_errors} errors)")
            break
    
    # Don't update the progress bar too often
    progress_bar.close()
    
    return invalid

def validate_all_label_ranges(train_labels, valid_labels):
    """
    Performs label range validation on both training and validation datasets
    with optimized output handling for Jupyter.
    
    Args:
        train_labels (str): Path to training labels directory
        valid_labels (str): Path to validation labels directory
        
    Returns:
        bool: True if validation succeeds, False otherwise
    """
    import os
    
    print("Validating label value ranges...")
    
    # Check training labels
    train_label_issues = validate_label_ranges(train_labels)
    
    # Check validation labels
    val_label_issues = validate_label_ranges(valid_labels)
    
    # Handle the issues
    if train_label_issues or val_label_issues:
        # Write detailed errors to a file
        with open("label_range_errors.log", "w") as f:
            if train_label_issues:
                f.write("TRAINING LABEL ISSUES:\n")
                f.write("\n".join(train_label_issues) + "\n\n")
            
            if val_label_issues:
                f.write("VALIDATION LABEL ISSUES:\n")
                f.write("\n".join(val_label_issues))
        
        # Show summary in the notebook
        print(f"Label range validation failed:")
        if train_label_issues:
            print(f"- Training labels: {len(train_label_issues)} issues")
        if val_label_issues:
            print(f"- Validation labels: {len(val_label_issues)} issues")
        print(f"Detailed error log written to 'label_range_errors.log'")
        
        # Return a small sample for immediate viewing
        sample_size = min(5, max(len(train_label_issues), len(val_label_issues)))
        if train_label_issues:
            print("\nSample training issues:")
            for issue in train_label_issues[:sample_size]:
                print(f"  - {issue}")
        
        if val_label_issues:
            print("\nSample validation issues:")
            for issue in val_label_issues[:sample_size]:
                print(f"  - {issue}")
        
        return False
    
    print("All label value ranges validated successfully!")
    return True

def reset_ultralytics_paths(path_var):
    """
    Resets Ultralytics cached paths to point to your current dataset directory.
    This addresses path conflicts between old and new directory structures.
    """
    # Your new, correct path
    correct_path = path_var
    
    # Ultralytics config directory
    ultralytics_dir = Path(os.path.expanduser("~")) / "AppData" / "Roaming" / "Ultralytics"
    
    # 1. Handle settings.json
    settings_file = ultralytics_dir / "settings.json"
    if settings_file.exists():
        print(f"Found settings file at: {settings_file}")
        try:
            # Read current settings
            with open(settings_file, 'r') as f:
                settings = json.load(f)
            
            # Print current settings for debugging
            print("Current settings:")
            for key, value in settings.items():
                print(f"  {key}: {value}")
            
            # Update the datasets_dir setting
            old_path = settings.get('datasets_dir', 'Not set')
            settings['datasets_dir'] = correct_path
            
            # Create backup
            backup_file = ultralytics_dir / "settings.json.backup"
            shutil.copy2(settings_file, backup_file)
            print(f"Created backup at: {backup_file}")
            
            # Write updated settings
            with open(settings_file, 'w') as f:
                json.dump(settings, f, indent=4)
            
            print(f"Updated datasets_dir from '{old_path}' to '{correct_path}'")
        except Exception as e:
            print(f"Error updating settings.json: {e}")
    
    # 2. Handle persistent_cache.json
    cache_file = ultralytics_dir / "persistent_cache.json"
    if cache_file.exists():
        print(f"Found persistent cache at: {cache_file}")
        try:
            # Create backup
            backup_file = ultralytics_dir / "persistent_cache.json.backup"
            shutil.copy2(cache_file, backup_file)
            print(f"Created backup at: {backup_file}")
            
            # Option 1: Delete cache completely to force fresh creation
            os.remove(cache_file)
            print(f"Deleted persistent cache to force fresh creation")
            
            # Option 2 (alternative): Update paths in cache
            # with open(cache_file, 'r') as f:
            #     cache = json.load(f)
            # 
            # # Replace all occurrences of old path in the cache
            # cache_str = json.dumps(cache)
            # updated_str = cache_str.replace('manual_labels', 'automated_labels')
            # updated_cache = json.loads(updated_str)
            # 
            # with open(cache_file, 'w') as f:
            #     json.dump(updated_cache, f, indent=4)
            # print(f"Updated paths in persistent cache")
        except Exception as e:
            print(f"Error handling persistent_cache.json: {e}")
    
    # 3. Clear other potential caches
    cache_dirs = [
        Path.home() / ".cache" / "torch" / "ultralytics",
        Path.home() / ".cache" / "torch" / "hub" / "ultralytics"
    ]
    
    for cache_dir in cache_dirs:
        if cache_dir.exists():
            try:
                shutil.rmtree(cache_dir)
                print(f"Cleared cache directory: {cache_dir}")
            except Exception as e:
                print(f"Could not clear {cache_dir}: {e}")
    
    # 4. Set environment variable for this session
    os.environ["ULTRALYTICS_DATASETS_DIR"] = correct_path
    print(f"Set ULTRALYTICS_DATASETS_DIR environment variable to: {correct_path}")
    
    print("\nUltralytics paths have been reset successfully!")
    return True

def clear_ultralytics_cache(path_var):
    """
    Completely removes all Ultralytics cache files and settings.
    This ensures no path conflicts between training runs.
    """
    import os
    import shutil
    import json
    from pathlib import Path
    
    # Your correct dataset path
    correct_path = path_var
    
    # 1. Delete Ultralytics directory in AppData completely
    ultralytics_dir = Path(os.path.expanduser("~")) / "AppData" / "Roaming" / "Ultralytics"
    if ultralytics_dir.exists():
        print(f"Removing Ultralytics settings directory: {ultralytics_dir}")
        try:
            shutil.rmtree(ultralytics_dir)
            print("✓ Removed successfully")
        except Exception as e:
            print(f"! Could not remove directory: {e}")
            
            # If we can't remove the directory, try to at least remove individual files
            for file in ["settings.json", "persistent_cache.json", "Arial.ttf"]:
                file_path = ultralytics_dir / file
                if file_path.exists():
                    try:
                        os.remove(file_path)
                        print(f"✓ Removed {file}")
                    except:
                        print(f"! Could not remove {file}")
    
    # 2. Delete all torch cache directories related to Ultralytics
    cache_locations = [
        Path.home() / ".cache" / "torch" / "ultralytics",
        Path.home() / ".cache" / "torch" / "hub" / "ultralytics",
        Path.home() / ".cache" / "torch" / "hub" / "checkpoints",
        Path.home() / ".config" / "Ultralytics",
    ]
    
    for location in cache_locations:
        if location.exists():
            print(f"Removing cache: {location}")
            try:
                shutil.rmtree(location)
                print("✓ Removed successfully")
            except Exception as e:
                print(f"! Could not remove: {e}")
    
    # 3. Create a fresh settings.json with correct path
    os.makedirs(ultralytics_dir, exist_ok=True)
    settings = {
        "datasets_dir": correct_path,
        "weights_dir": str(Path.home() / ".config" / "Ultralytics" / "weights"),
        "runs_dir": str(Path(correct_path) / "runs"),
        "uuid": "track"  # Set to None to disable tracking
    }
    
    try:
        with open(ultralytics_dir / "settings.json", "w") as f:
            json.dump(settings, f, indent=4)
        print(f"✓ Created fresh settings with correct path: {correct_path}")
    except Exception as e:
        print(f"! Could not create settings file: {e}")
    
    # 4. Set environment variables (belt and suspenders approach)
    os.environ["ULTRALYTICS_DATASETS_DIR"] = correct_path
    
    # 5. Clear project-specific cache directories 
    project_caches = [
        Path(correct_path) / "train" / "cache",
        Path(correct_path) / "valid" / "cache",
        Path(correct_path) / "test" / "cache"
    ]
    
    for cache_dir in project_caches:
        if cache_dir.exists():
            print(f"Removing project cache: {cache_dir}")
            try:
                shutil.rmtree(cache_dir)
                print("✓ Removed successfully")
            except Exception as e:
                print(f"! Could not remove: {e}")
    
    print("\n✓ Cache clearing completed. Ultralytics will use fresh paths on next run.")
    return True

def clear_memory():
    import gc
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats()

print("\n! ~ All helper functions are ready ~ !\n");


In [None]:
# Main execution block with comprehensive error handling
try:    
    # Configure PyTorch to be more memory-efficient
    torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    
    # For extreme cases, limit CUDA memory use
    # torch.cuda.set_per_process_memory_fraction(0.7)  # Use only 70% of GPU memory

    # Add before model.train()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # Clear GPU cache
        
    # Set a memory-efficient training approach
    gc.collect()  # Force garbage collection

    # Device configuration
    device, batch_size, device_info = get_device_config()
    print("\nDevice Configuration:")
    for key, value in device_info.items():
        print(f"{key}: {value}")
    print(f"Suggested batch size: {batch_size}")
    
    # Clear global settings cache
    # early in the script
    shutil.rmtree(os.path.join(os.path.expanduser('~'), '.cache', 'torch', 'ultralytics'), ignore_errors=True)
    # Reset YOLO checks
    # early in the script
    check_yolo(verbose=False)

    # Define paths - Update this to your dataset path
    # Replace with your actual path ~ default : 'path_to_your_dataset_folder'
    # default : '/Users/gustavszviedris/Desktop/vet_images_sliced_split' ; 
    # example 00 : 'E:\\-_EDI_-\\notes\\havetai+vetcyto\\vet_images_sliced_split' ; 
    # notes : "example 01" = “attempt-02” (experiment-step-05) [ManualAnnotations] ~ 100%
    # example 01 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_500' ; 
    # notes : "example 02" = “attempt-03” (experiment-step-06) [ManualAnnotations] ~ 100%
    # example 02 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640'
    # notes : "example 03" = “attempt-04” (experiment-step-07) [AutomatedAnnotations] ~ 100%
    # example 03 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640'
    # notes : "example 04" =  “attempt-05” (experiment-step-08) [ManualAnnotations] ~ 125%
    # example 04 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_125'
    # notes : example 05 = “attempt-06” (experiment-step-09) [AutomatedAnnotations] ~ 125%
    # example 05 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_125'
    # notes : "example 06" =  “attempt-07” (experiment-step-10) [ManualAnnotations] ~ 150%
    # example 06 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_150'
    # notes : "example 07" =  “attempt-08” (experiment-step-11) [AutomatedAnnotations] ~ 150%
    # example 07 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_automated_labels_T_640_pc_150'
    # notes : "example 08" =  “attempt-09” (experiment-step-12) [ManualAnnotations] ~ 175%
    # example 08 : 'C:\\Users\\praam\\Desktop\\havetai+vetcyto\\04th_task-04_new-work-03\\February15.v1i.yolov8\\splitting_output_manual_labels_T_640_pc_175'
    # notes : ?
    # example 09 : ''
    # notes : synthetic + original + mixed + 640 ;
    # example : 'C:\Users\praam\Desktop\havetai+vetcyto\task-05_dataset\splitting_output_automated_labels_T_640_pc_150_mixed_01'
    base_path = r"C:\Users\praam\Desktop\havetai+vetcyto\task-05_dataset\T_640_real-synthetic-mixed-03_ratio-1.5-units"  
    data_yaml_path = os.path.join(base_path, "data.yaml")
    
    reset_ultralytics_paths(base_path)
    clear_ultralytics_cache(base_path)

    # Call this function before any YOLO operations ~ useful only for automated labels
    #force_correct_paths(base_path)

    # Clear Ultralytics cache directories
    cache_dirs = [
        Path.home() / '.cache' / 'torch' / 'ultralytics',
        Path.home() / '.cache' / 'torch' / 'hub',
        Path.home() / '.config' / 'Ultralytics'
    ]
    for d in cache_dirs:
        if d.exists():
            print(f"Clearing cache: {d}")
            shutil.rmtree(d, ignore_errors=True)

    # Add this before initializing the model
    # execute earlier in the script, before any YOLO operations
    settings.update({'datasets_dir': base_path})
    settings['datasets_dir'] = base_path  # Force update internal settings
    
    os.chdir(base_path)  # Change working directory to dataset root

    # Add this line right after defining base_path:
    print(f"\n=== Directory Structure Check ===")
    print(f"Checking: {base_path}")
    ! echo "results for 'dir' :" && dir

    # Validate YAML content
    print("\nValidating YAML content...")
    yaml_valid, yaml_message = validate_yaml_content(data_yaml_path)
    if not yaml_valid:
        raise ValueError(yaml_message)
    print(yaml_message)

    # Validate image-label pairs
    print("\nValidating image-label pairs...")
    pairs_valid, pairs_message = validate_image_label_pairs(base_path)
    if not pairs_valid:
        raise ValueError(pairs_message)
    print(pairs_message)

    # Create versioned project directory for results
    project_path = os.path.join(base_path, "results")
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    name = f"yolov8s_training_{timestamp}"
    
    # Make sure the project directory exists
    create_directory(project_path) # use custom function instead of "os.makedirs" to avoid creating read-only folders
    
    print(f"\nResults will be saved to: {os.path.join(project_path, name)}")

    # Initialize model
    try:
        model_name='yolov8s.pt'
        # Delete existing model if present
        model_path = Path(model_name)
        if model_path.exists():
            model_path.unlink()
        # Download fresh model
        model = YOLO(model_name)  # Load pretrained YOLOv8s model
        # Override any stored paths
        if hasattr(model, 'args'):
            if hasattr(model.args, 'data'):
                model.args.data = None  # Reset data path
        print("\nModel initialized successfully")
    except Exception as e:
        raise RuntimeError(f"Error initializing YOLO model: {str(e)}")

    # troubleshooting step is to print out the actual absolute paths that YOLO is trying to open
    print("\n!!! result for 'os.getcwd()' = ", os.getcwd())

    # Call this before model training
    debug_paths(data_yaml_path)

    # Add cache clearing before training
    shutil.rmtree(os.path.join(base_path, "train", "cache"), ignore_errors=True)
    shutil.rmtree(os.path.join(base_path, "valid", "cache"), ignore_errors=True)

    # Check critical directories
    train_images = os.path.join(base_path, "train", "images")
    train_labels = os.path.join(base_path, "train", "labels")
    check_directory_contents(train_images)
    check_directory_contents(train_labels)
    valid_images = os.path.join(base_path, "valid", "images")
    valid_labels = os.path.join(base_path, "valid", "labels")
    check_directory_contents(valid_images)
    check_directory_contents(valid_labels)

    # Add after directory checks:
    print("\nValidating label formats...")
    if not validate_label_formats(train_labels, os.path.join(base_path, "valid/labels"), verbose=True):
        raise ValueError("Label validation failed. Check the error log for details.")
    
    # Add before training:
    print("\nChecking for corrupt media files...")
    corrupt_train = remove_corrupted_files(os.path.join(base_path, "train/images"))
    corrupt_val = remove_corrupted_files(os.path.join(base_path, "valid/images"))
    print(f"Removed {len(corrupt_train)+len(corrupt_val)} corrupt files")

    # Add before training
    print("Validating label value ranges...")
    if not validate_all_label_ranges(train_labels, valid_labels):
        raise ValueError("Label range validation failed. Check the error log for details.")
    
    print("ULTRALYTICS_DATASETS_DIR:", os.getenv('ULTRALYTICS_DATASETS_DIR'))
    print("Current working directory:", os.getcwd())
    print("Data YAML path being used:", data_yaml_path)
    
    # Test model sanity with a single batch
    # """
    print("\nRunning sanity check...")
    tmp_model = YOLO('yolov8s.pt')
    tmp_results = tmp_model.train(
        data=data_yaml_path,
        epochs=3,
        imgsz=512,
        batch=4,
        device=device, # 'cpu',
        workers=4,
        cache=False,
        name='sanity_check', #, #_break='train' # not a valid yolo argument
    )
    #assert tmp_results.box.loss > 0.5, "Sanity check failed - model not learning" # removed "[0]" & replaced "boxes" with "box"
    mp, mr, map50, map = tmp_results.mean_results()  # returns mean precision, recall, AP@0.5, and AP@0.5:0.95
    assert mr > 0.05, "Sanity check failed - model not learning" # change box recall threshold value from 0.1 to either 0.05 or 0.025 if ncessary
    # """
    
    # Train the model
    print("\nStarting training...")

    original_umask = os.umask(0)  # Set umask to 0 for full permissions

    # Call this between major operations
    clear_memory()

    results = model.train(
        data=data_yaml_path,
        task='detect',  # Add this line to ensure paths are resolved correctly
        project=project_path,
        name=name,
        epochs=200,
        patience=25,
        batch=4,  # manual annotations ~ batch size = "batch_size" (8) ; automated annotations ~ batch size = "4" ;
        nbs=8,    # manual annotations ~ commented out ; automated annotations ~ effective/nominal batch size = "8" ;
        imgsz=640,  # either using 512 as the closest multiple of 32 to 500 or using 640 as the default for yolo ;
        device=device,
        workers=min(8, os.cpu_count() or 1),
        degrees=45,
        flipud=0.5,
        mixup=0.1,
        cache='disk',  # Use disk cache instead of RAM # formerly : cache=True,
        amp=True,
        exist_ok=False,  # Prevent overwriting
        val=True,  # Enable validation during training
    )
    
    print("\nTraining completed successfully")

    # Ensure all result directories are writable
    training_output_dir = os.path.join(project_path, name)
    print(f"\nSetting permissions for {training_output_dir}...")
    set_permissions_recursive(training_output_dir, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
    
    # Plot results
    try:
        plot_results(file=f'{project_path}/{name}')
        print("Training results plotted successfully")
    except Exception as e:
        print(f"Warning: Could not plot results: {str(e)}")

    # Evaluate on test set
    print("\nStarting evaluation on test set...")
    test_results = model.val(
        data=data_yaml_path,
        project=project_path,
        name=f"{name}_test",
        split='test'  # Specifically use test set for final evaluation
    )
    
    print("\nTest Set Evaluation Results:")
    #print(test_results)
    print("...")

except FileNotFoundError as e:
    print(f"\nError: Required file not found: {str(e)}")
except PermissionError as e:
    print(f"\nError: Permission denied when accessing files: {str(e)}")
except ValueError as e:
    print(f"\nError: Validation failed: {str(e)}")
except RuntimeError as e:
    print(f"\nError: Runtime error occurred: {str(e)}")
except Exception as e:
    print(f"\nUnexpected error occurred: {str(e)}")
finally:
    print("\nScript execution completed")

## Notes: The end.