#Imports and Global Variables

In [1]:
import os
import yaml #type:ignore # For reading/writing YAML files if needed for data.yaml manipulation
from ultralytics import YOLO #type:ignore
from IPython.display import display, Image #type:ignore # For displaying results in Jupyter
import glob # For finding files like best.pt using patterns
import shutil # For file operations like copying
import torch 

In [2]:
print(torch.cuda.is_available())
print(torch.__version__)
import torchvision
print(torchvision.__version__)

True
2.5.1+cu121
0.20.1+cu121


User-Defined Paths and Configurations - Fill In!!!

In [None]:
# --- User-Defined Paths and Configuration ---
# IMPORTANT: User MUST change this path to point to their dataset root directory.
# This directory should contain 'images/', 'labels/', and 'data.yaml' as outputted from colab or Roboflow (the unzipped folder).
DATASET_ROOT_PATH = r"C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset" # EXAMPLE, NEEDS CHANGE

# Path to the dataset configuration YAML file.
DATASET_YAML_PATH = os.path.join(DATASET_ROOT_PATH, 'dataset.yaml')

# --- Model Configuration ---
# Choose model size: 'n' (nano), 's' (small), 'm' (medium), 'l' (large), 'x' (extra-large)
MODEL_SIZE = 'l'
# -- Pretrained - use the best current model or base one ---
# BASE_PRETRAINED_MODEL_NAME = r"C:\Users\JellyTracker\Desktop\JellyFishTrackingPC-main\Models\larvae_best.pt" # current best larvae segmentation model
BASE_PRETRAINED_MODEL_NAME = r"C:\Users\weiss\Desktop\JT101\Models\larvae_best.pt" # current best larvae segmentation model

# If using a pre-trained YOLOv8 model, it should be in the format below, just change the size (n, s, m, l, x).:
# this may or may not work, more testing needed
# BASE_PRETRAINED_MODEL_NAME = r"C:\Users\JellyTracker\Desktop\BaseModels\yolov8l-seg.pt" # "C:\Users\weiss\Desktop\BaseModels\yolov8l-seg.pt"
# BASE_PRETRAINED_MODEL_NAME = r"C:\Users\weiss\Desktop\BaseModels\yolov8l-seg.pt"

# --- Training Run Configuration ---
# 'project': Top-level directory for saving runs (e.g., 'runs/s
# egment/YOLOv8_Custom_Seg_Training/')
# 'name': Specific subdirectory for this run (e.g., 'runs/segment/YOLOv8_Custom_Seg_Training/run_yolov8m-seg_custom/')
# These determine where weights (best.pt, last.pt) and logs are saved.[12, 13]
PROJECT_NAME = 'Larvae_fresh_training' 
RUN_NAME = f'train_{MODEL_SIZE}_Larvae_seg' # A descriptive name for the training run

# --- Incremental Training: Path to find previous best.pt ---
# Ultralytics saves runs in: runs/<task>/<project_name>/<run_name>/
# The <run_name> might be appended with a number if it's not unique (e.g., run_name2, run_name3)
# This logic will try to find the latest run matching the RUN_NAME pattern.
# The path to the actual 'best.pt' will be determined programmatically later.

# --- Training Hyperparameters ---
EPOCHS = 150  # Number of training epochs - normal 150
IMAGE_SIZE = 640 # Target image size for training (e.g., 640 pixels) [2, 8, 13]
BATCH_SIZE = 8   # Number of images per batch; adjust based on GPU memory.
                 # Use -1 for auto-batch to utilize ~60% GPU memory.[12, 13]
PATIENCE = 10    # Epochs to wait for improvement before early stopping.[12, 13]
DEVICE = '0'     # Computational device: '0' for GPU 0, 'cpu', or None for auto-selection.[12, 13]
WORKERS = 4      # Number of worker threads for data loading (per RANK if multi-GPU).[12, 13]

# This flag is used by model.train(). If True, it ensures weights are loaded.
# If YOLO() loads a specific.pt, pretrained=True in train() uses those.
# If YOLO() loads a.yaml, pretrained=True in train() loads default weights for that.yaml.
# For our logic of loading either base or custom best.pt, this should be True.
PRETRAINED_ARG_FOR_TRAIN = True # [12, 13]

# --- Output Directories ---
# Base directory where Ultralytics saves training runs
ULTRALYTICS_RUNS_DIR = 'runs'
SEGMENTATION_TASK_DIR = 'segment' # YOLOv8 saves segmentation tasks under 'segment'

print("Configuration parameters set.")
print(f"  Dataset Root Path: {DATASET_ROOT_PATH}")
print(f"  Dataset YAML Path: {DATASET_YAML_PATH}")
print(f"  Base Pretrained Model: {BASE_PRETRAINED_MODEL_NAME}")
print(f"  Project Name: {PROJECT_NAME}")
print(f"  Run Name: {RUN_NAME}")

Configuration parameters set.
  Dataset Root Path: C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset
  Dataset YAML Path: C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset\dataset.yaml
  Base Pretrained Model: C:\Users\weiss\Desktop\JT101\Models\larvae_best.pt
  Project Name: Larvae_fresh_training
  Run Name: train_l_Larvae_seg


Verifying Dataset Accessibility

In [5]:
print(f"--- Verifying Dataset Accessibility ---")
print(f"Attempting to locate data.yaml at: {DATASET_YAML_PATH}")

if not os.path.exists(DATASET_ROOT_PATH):
    print(f"ERROR: The DATASET_ROOT_PATH does not exist: {DATASET_ROOT_PATH}")
    print(f"Please ensure the path is correct and the dataset directory is available.")
elif not os.path.exists(DATASET_YAML_PATH):
    print(f"ERROR: data.yaml not found at the specified path: {DATASET_YAML_PATH}")
    print(f"Please ensure '{os.path.basename(DATASET_YAML_PATH)}' exists in '{DATASET_ROOT_PATH}'.")
else:
    print(f"Successfully found data.yaml: {DATASET_YAML_PATH}")
    try:
        with open(DATASET_YAML_PATH, 'r') as f:
            data_config = yaml.safe_load(f)
        print("\nContents of data.yaml:")
        print(yaml.dump(data_config, indent=2, sort_keys=False))

        # Verify essential keys in data.yaml
        required_keys = ['path', 'train', 'val', 'names']
        missing_keys = [key for key in required_keys if key not in data_config]

        if missing_keys:
            print(f"\nWARNING: data.yaml is missing the following required keys: {', '.join(missing_keys)}")
        else:
            print("\ndata.yaml contains all required keys (path, train, val, names).")

            # Further check: ensure the 'path' in data.yaml matches DATASET_ROOT_PATH if it's used for relative sub-paths
            # or that paths are resolvable.
            # For simplicity, we assume 'train' and 'val' are relative to data_config['path']
            # and data_config['path'] is either absolute or correctly relative to the execution context.
            
            # Construct absolute paths for train/val images based on data.yaml content
            # This handles cases where data_config['path'] might be relative itself.
            # If data_config['path'] is absolute, os.path.join behaves as expected.
            # If data_config['path'] is relative, it's joined with DATASET_ROOT_PATH's directory.
            
            # Path specified inside data.yaml
            yaml_path_field = data_config.get('path', '')
            if not os.path.isabs(yaml_path_field):
                # If path in YAML is relative, assume it's relative to the YAML file's location (DATASET_ROOT_PATH)
                effective_dataset_path_from_yaml = os.path.abspath(os.path.join(DATASET_ROOT_PATH, yaml_path_field))
            else:
                effective_dataset_path_from_yaml = yaml_path_field

            print(f"\nEffective dataset path from data.yaml ('path' field): {effective_dataset_path_from_yaml}")

            train_images_path = os.path.join(effective_dataset_path_from_yaml, data_config.get('train', ''))
            val_images_path = os.path.join(effective_dataset_path_from_yaml, data_config.get('val', ''))

            print(f"Expected training images directory: {train_images_path}")
            print(f"Expected validation images directory: {val_images_path}")

            if not os.path.isdir(train_images_path):
                print(f"WARNING: Training images directory not found: {train_images_path}")
            else:
                print(f"Training images directory found.")
            
            if not os.path.isdir(val_images_path):
                print(f"WARNING: Validation images directory not found: {val_images_path}")
            else:
                print(f"Validation images directory found.")

    except Exception as e:
        print(f"ERROR: Could not read or parse data.yaml: {e}")
        data_config = None # Ensure data_config is defined for potential later use

--- Verifying Dataset Accessibility ---
Attempting to locate data.yaml at: C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset\dataset.yaml
Successfully found data.yaml: C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset\dataset.yaml

Contents of data.yaml:
path: C:/Users/weiss/Downloads/larvae_yolo_dataset/larvae_yolo_dataset
train: images
val: images
names:
  0: larvae


data.yaml contains all required keys (path, train, val, names).

Effective dataset path from data.yaml ('path' field): C:/Users/weiss/Downloads/larvae_yolo_dataset/larvae_yolo_dataset
Expected training images directory: C:/Users/weiss/Downloads/larvae_yolo_dataset/larvae_yolo_dataset\images
Expected validation images directory: C:/Users/weiss/Downloads/larvae_yolo_dataset/larvae_yolo_dataset\images
Training images directory found.
Validation images directory found.


Loading the Model(train from scratch or use pre-trained)

In [6]:
print(f"\n--- Determining Model Weights for Training ---")
model_weights_to_load = BASE_PRETRAINED_MODEL_NAME  # Default to the base official pretrained model

# Construct the expected path pattern for previous runs
# runs/segment/PROJECT_NAME/RUN_NAME* (glob pattern to catch RUN_NAME, RUN_NAME2, etc.)
# This is important because if RUN_NAME is reused, Ultralytics appends numbers to avoid overwriting,
# unless 'exist_ok=True' is used in training.
# We want to find the *latest* such run if multiple exist.
potential_run_dirs_pattern = os.path.join(ULTRALYTICS_RUNS_DIR, SEGMENTATION_TASK_DIR, PROJECT_NAME, RUN_NAME + '*')
print(f"Searching for existing run directories matching pattern: {potential_run_dirs_pattern}")

list_of_potential_run_dirs = sorted(glob.glob(potential_run_dirs_pattern))

path_to_existing_best_pt = None
if list_of_potential_run_dirs:
    latest_run_dir = list_of_potential_run_dirs[-1] # Get the alphabetically last one, usually the latest
    print(f"Found potential latest run directory: {latest_run_dir}")
    
    candidate_best_pt_path = os.path.join(latest_run_dir, 'weights', 'best.pt')
    if os.path.exists(candidate_best_pt_path):
        path_to_existing_best_pt = candidate_best_pt_path
        print(f"Found existing 'best.pt' in the latest run: {path_to_existing_best_pt}")
    else:
        print(f"'best.pt' not found in weights directory of: {latest_run_dir}")
        # Optionally, could check for 'last.pt' as a fallback
        # candidate_last_pt_path = os.path.join(latest_run_dir, 'weights', 'last.pt')
        # if os.path.exists(candidate_last_pt_path):
        #     path_to_existing_best_pt = candidate_last_pt_path # Using last.pt as fallback
        #     print(f"Found existing 'last.pt' as fallback: {path_to_existing_best_pt}")
elif os.path.isfile(model_weights_to_load) and model_weights_to_load.endswith(".pt"):
    print(f"Using specified model weights file directly: {model_weights_to_load}")
    path_to_existing_best_pt = model_weights_to_load
else:
    print(f"No existing run directories found matching the pattern for PROJECT_NAME='{PROJECT_NAME}' and RUN_NAME='{RUN_NAME}'.")

if path_to_existing_best_pt:
    model_weights_to_load = path_to_existing_best_pt
    print(f"Decision: Will start training FROM or RESUME using: {model_weights_to_load}")
else:
    print(f"Decision: No suitable existing 'best.pt' (or 'last.pt') found.")
    print(f"           Will start training from base official pretrained model: {BASE_PRETRAINED_MODEL_NAME}")

# PRETRAINED_ARG_FOR_TRAIN should be True if we are loading any weights,
# be it the base model or a custom best.pt/last.pt.
# The YOLO() constructor handles loading the weights specified in model_weights_to_load.
# The model.train(pretrained=True) then uses these loaded weights.


--- Determining Model Weights for Training ---
Searching for existing run directories matching pattern: runs\segment\Larvae_fresh_training\train_l_Larvae_seg*
Using specified model weights file directly: C:\Users\weiss\Desktop\JT101\Models\larvae_best.pt
Decision: Will start training FROM or RESUME using: C:\Users\weiss\Desktop\JT101\Models\larvae_best.pt


Intalize YOLO Model

In [7]:
print(f"\n--- Initializing YOLO Model ---")
print(f"Loading model with weights from: {model_weights_to_load}")

try:
    model = YOLO(model_weights_to_load)
    print(f"Successfully initialized YOLO model.")
    if model_weights_to_load == BASE_PRETRAINED_MODEL_NAME:
        print("Model loaded with official pretrained weights.")
    else:
        print("Model loaded with custom weights from a previous training run (best.pt or last.pt).")
except Exception as e:
    print(f"ERROR: Failed to initialize YOLO model with weights from {model_weights_to_load}.")
    print(f"Error details: {e}")
    model = None # Ensure model is None if loading failed


--- Initializing YOLO Model ---
Loading model with weights from: C:\Users\weiss\Desktop\JT101\Models\larvae_best.pt
Successfully initialized YOLO model.
Model loaded with official pretrained weights.


Train the Model

In [8]:
if model and data_config: # Proceed only if model is loaded and data_config is available
    print(f"\n--- Starting YOLOv8 Segmentation Training ---")
    print(f"  Dataset YAML: {DATASET_YAML_PATH}")
    print(f"  Model being trained: Initialized from {model_weights_to_load}")
    print(f"  Epochs: {EPOCHS}")
    print(f"  Image Size: {IMAGE_SIZE}x{IMAGE_SIZE}")
    print(f"  Batch Size: {BATCH_SIZE if BATCH_SIZE!= -1 else 'Auto (approx. 60% GPU Mem)'}")
    print(f"  Device: {DEVICE if DEVICE is not None else 'Auto (GPU if available, else CPU)'}")
    print(f"  Project Directory: {PROJECT_NAME}")
    print(f"  Run Name: {RUN_NAME}")
    print(f"  Patience for Early Stopping: {PATIENCE}")
    print(f"  Number of Data Loader Workers: {WORKERS}")
    print(f"  Using pretrained weights for training (model.train pretrained={PRETRAINED_ARG_FOR_TRAIN}): {PRETRAINED_ARG_FOR_TRAIN}")
    print(f"  Allow overwrite if run exists (exist_ok=True): True") # Set based on desired behavior

    try:
        results = model.train(
            data=DATASET_YAML_PATH,
            epochs=EPOCHS,
            imgsz=IMAGE_SIZE,
            batch=BATCH_SIZE,
            device=DEVICE,
            project=PROJECT_NAME,
            name=RUN_NAME,
            exist_ok=True,  # If True, overwrites the existing 'RUN_NAME' directory if it exists.
                            # Set to False if each execution should create a new numbered run (e.g., RUN_NAME2).
                            # For the logic of finding and continuing from best.pt of a specific RUN_NAME,
                            # exist_ok=True is generally suitable if you intend to refine that *exact* run.
                            # If RUN_NAME is a generic name and you expect multiple iterations,
                            # the globbing logic handles finding the latest.
            patience=PATIENCE,
            workers=WORKERS,
            pretrained=PRETRAINED_ARG_FOR_TRAIN # This ensures that the weights loaded into the 'model' object
                                               # (either base pretrained or custom best.pt) are used as the
                                               # starting point for this training session. [12, 13]
        )
        print("\n--- Training Complete ---")
        print(f"Training results and artifacts saved in: {os.path.join(ULTRALYTICS_RUNS_DIR, SEGMENTATION_TASK_DIR, PROJECT_NAME, model.trainer.save_dir.name)}")

    except Exception as e:
        print(f"ERROR: An error occurred during model training.")
        print(f"Error details: {e}")
else:
    if not model:
        print("\nSkipping training because the model was not loaded successfully.")
    if not data_config:
        print("\nSkipping training because the dataset configuration (data.yaml) was not loaded successfully.")


--- Starting YOLOv8 Segmentation Training ---
  Dataset YAML: C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset\dataset.yaml
  Model being trained: Initialized from C:\Users\weiss\Desktop\JT101\Models\larvae_best.pt
  Epochs: 10
  Image Size: 640x640
  Batch Size: 8
  Device: 0
  Project Directory: Larvae_fresh_training
  Run Name: train_l_Larvae_seg
  Patience for Early Stopping: 10
  Number of Data Loader Workers: 4
  Using pretrained weights for training (model.train pretrained=True): True
  Allow overwrite if run exists (exist_ok=True): True
New https://pypi.org/project/ultralytics/8.3.179 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.137  Python-3.9.21 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 3090, 24575MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=fl

[34m[1mtrain: [0mScanning C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset\labels.cache... 299 images, 0 backgrounds, 0 corrupt: 100%|██████████| 299/299 [00:00<?, ?it/s]


[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 98.022.3 MB/s, size: 7.4 KB)


[34m[1mval: [0mScanning C:\Users\weiss\Downloads\larvae_yolo_dataset\larvae_yolo_dataset\labels.cache... 299 images, 0 backgrounds, 0 corrupt: 100%|██████████| 299/299 [00:00<?, ?it/s]


Plotting labels to Larvae_fresh_training\train_l_Larvae_seg\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 86 weight(decay=0.0), 97 weight(decay=0.0005), 96 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mLarvae_fresh_training\train_l_Larvae_seg[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/38 [00:05<?, ?it/s]

ERROR: An error occurred during model training.
Error details: DataLoader worker (pid(s) 21144, 19112, 22388, 24224) exited unexpectedly





Using the Trained Model (Optional / not helpful, use compareModels.py)

In [23]:
print(f"\n--- Loading the Best Trained Model for Inference ---")

# The training process saves the run in a directory that might be RUN_NAME or RUN_NAME<number>.
# We need to find the actual save directory used by the trainer.
# If model.train() completed successfully, model.trainer.save_dir should point to the correct run directory.

final_best_model_path = None
actual_run_save_dir = None

if 'results' in locals() and hasattr(results, 'save_dir'): # 'results' is the output of model.train()
    actual_run_save_dir = results.save_dir # This is the most reliable path if training just completed
    candidate_final_best_pt = os.path.join(actual_run_save_dir, 'weights', 'best.pt')
    if os.path.exists(candidate_final_best_pt):
        final_best_model_path = candidate_final_best_pt
        print(f"Located best model from the recent training session: {final_best_model_path}")
    else:
        print(f"WARNING: 'best.pt' not found in the expected training output directory: {os.path.join(actual_run_save_dir, 'weights')}")
        # Fallback: try to glob again, in case 'results' object is not available or run was from a previous session
        potential_run_dirs_after_train_pattern = os.path.join(ULTRALYTICS_RUNS_DIR, SEGMENTATION_TASK_DIR, PROJECT_NAME, RUN_NAME + '*')
        list_of_potential_run_dirs_after_train = sorted(glob.glob(potential_run_dirs_after_train_pattern))
        if list_of_potential_run_dirs_after_train:
            latest_run_dir_after_train = list_of_potential_run_dirs_after_train[-1]
            candidate_final_best_pt_glob = os.path.join(latest_run_dir_after_train, 'weights', 'best.pt')
            if os.path.exists(candidate_final_best_pt_glob):
                final_best_model_path = candidate_final_best_pt_glob
                print(f"Located best model via globbing: {final_best_model_path}")
            else:
                 print(f"WARNING: 'best.pt' also not found via globbing in {latest_run_dir_after_train}/weights/")
elif model and hasattr(model, 'trainer') and model.trainer and hasattr(model.trainer, 'save_dir'):
    # Alternative if 'results' is not available but 'model' object from training is
    actual_run_save_dir = model.trainer.save_dir
    candidate_final_best_pt = os.path.join(actual_run_save_dir, 'weights', 'best.pt')
    if os.path.exists(candidate_final_best_pt):
        final_best_model_path = candidate_final_best_pt
        print(f"Located best model from model.trainer.save_dir: {final_best_model_path}")


if final_best_model_path:
    print(f"Loading best performing model from: {final_best_model_path}")
    try:
        trained_model = YOLO(final_best_model_path)
        print("Successfully loaded the trained model.")
    except Exception as e:
        print(f"ERROR: Failed to load the trained model from {final_best_model_path}. Error: {e}")
        trained_model = None
else:
    print(f"CRITICAL WARNING: Could not find 'best.pt' from the training run. Please check the path: "
          f"{os.path.join(ULTRALYTICS_RUNS_DIR, SEGMENTATION_TASK_DIR, PROJECT_NAME, RUN_NAME, 'weights', 'best.pt')} "
          f"or similar if RUN_NAME was suffixed.")
    print("Prediction cannot proceed without a loaded model.")
    trained_model = None


--- Loading the Best Trained Model for Inference ---
Located best model from the recent training session: Larvae_additional_training\train_l_Larvae_seg\weights\best.pt
Loading best performing model from: Larvae_additional_training\train_l_Larvae_seg\weights\best.pt
Successfully loaded the trained model.
