In [None]:
from ultralytics import YOLO

# Load the YOLOv8x model (extra-large detection model)
model = YOLO("yolov8x.pt")  # Pre-trained weights

# Train the model
model.train(
    data="filtered_coco/dataset.yaml",  # Path to your dataset config
    epochs=100,                         # Number of epochs
    patience=10,                        # Early stopping after 10 epochs without improvement
    batch=16,                           # Batch size
    workers=28,                         # DataLoader workers (adjust based on CPU cores)
    imgsz=640,                          # Image size (default, adjust if needed)
    device=0,                           # GPU device (0 for single GPU)
    optimizer="AdamW",                  # Optimizer (good for generalization)
    lr0=0.001,                          # Initial learning rate
    weight_decay=0.0005,                # Regularization
    save=True,                          # Save checkpoints
    save_period=10,                     # Save every 10 epochs
    project="runs/train",               # Output directory
    name="exp",                         # Experiment name
    exist_ok=True,                      # Overwrite existing runs
    verbose=True                        # Detailed logging
)

Ultralytics 8.3.98 🚀 Python-3.11.11 torch-2.3.1.post300 CUDA:0 (Tesla T4, 14918MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=filtered_coco/dataset.yaml, epochs=100, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=10, cache=False, device=0, workers=28, project=runs/train, name=exp, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf

[34m[1mtrain: [0mScanning /home/sagemaker-user/src/filtered_coco/labels/train... 14187 images, 0 backgrounds, 0 corrupt: 100%|██████████| 14187/14187 [00:08<00:00, 1606.02it/s]


[34m[1mtrain: [0mNew cache created: /home/sagemaker-user/src/filtered_coco/labels/train.cache


[34m[1mval: [0mScanning /home/sagemaker-user/src/filtered_coco/labels/val... 1577 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1577/1577 [00:01<00:00, 1108.08it/s]


[34m[1mval: [0mNew cache created: /home/sagemaker-user/src/filtered_coco/labels/val.cache
Plotting labels to runs/train/exp/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.0005), 103 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 28 dataloader workers
Logging results to [1mruns/train/exp[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      12.9G      1.276      1.996      1.434        194        640: 100%|██████████| 887/887 [16:54<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.43it/s]


                   all       1577      12086      0.385      0.192      0.161     0.0939

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      12.7G      1.426       2.18      1.565        127        640: 100%|██████████| 887/887 [16:42<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.44it/s]


                   all       1577      12086      0.348       0.25      0.214      0.128

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100      12.7G      1.369      2.028      1.524        129        640: 100%|██████████| 887/887 [16:38<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.44it/s]


                   all       1577      12086      0.401      0.273      0.258      0.162

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100      12.6G      1.297      1.873      1.471        182        640: 100%|██████████| 887/887 [16:37<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.415      0.319      0.297      0.189

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      13.1G      1.248      1.733      1.434        133        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.491      0.332      0.333      0.218

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100      12.4G      1.208      1.652      1.406        164        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.483      0.356      0.353      0.237

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      12.4G      1.177      1.582      1.381        120        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.482      0.367      0.367      0.244

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      12.6G      1.155      1.512      1.364        198        640: 100%|██████████| 887/887 [16:37<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.499      0.398      0.402      0.271

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100        13G      1.134      1.468      1.349        170        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.537        0.4      0.422      0.285

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100      12.4G      1.113       1.42      1.333        142        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.47it/s]


                   all       1577      12086      0.516      0.417      0.423      0.288

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      12.4G      1.102      1.387      1.324        210        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.548      0.405       0.43      0.295

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100      12.3G      1.089      1.364      1.316        179        640: 100%|██████████| 887/887 [16:37<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.47it/s]


                   all       1577      12086      0.566      0.425       0.45      0.311

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      12.7G      1.075      1.329      1.305        136        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.519      0.432      0.446      0.312

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      12.8G      1.061      1.298      1.295        182        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:33<00:00,  1.47it/s]


                   all       1577      12086      0.546      0.441      0.464      0.327

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      12.8G      1.055      1.272      1.289        215        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:33<00:00,  1.47it/s]


                   all       1577      12086      0.535      0.441      0.466      0.324

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100      13.1G      1.044      1.249      1.282         81        640: 100%|██████████| 887/887 [16:36<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.569      0.449      0.469      0.327

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      12.4G      1.031      1.221      1.271        129        640: 100%|██████████| 887/887 [16:38<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.47it/s]


                   all       1577      12086      0.564      0.461      0.474      0.331

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      12.4G      1.023      1.204      1.269        165        640: 100%|██████████| 887/887 [16:37<00:00,  1.12s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.47it/s]


                   all       1577      12086      0.559      0.471      0.492      0.345

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/100      12.4G      1.015      1.184       1.26        128        640: 100%|██████████| 887/887 [16:39<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.584      0.463      0.492      0.345

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/100      13.1G      1.008      1.167      1.257        119        640: 100%|██████████| 887/887 [16:42<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.568      0.471      0.498      0.352

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/100      12.2G      1.002      1.145      1.248        144        640: 100%|██████████| 887/887 [16:43<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.584      0.467      0.503      0.354

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/100      12.2G     0.9919      1.117      1.239        121        640: 100%|██████████| 887/887 [16:44<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.563      0.475      0.498      0.353

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/100      12.2G     0.9834      1.107      1.235        138        640: 100%|██████████| 887/887 [16:45<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.553      0.492      0.504      0.355

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     24/100      12.2G     0.9798       1.09      1.231        203        640: 100%|██████████| 887/887 [16:45<00:00,  1.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.558      0.491      0.505      0.357

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     25/100      12.2G     0.9651      1.062       1.22        167        640: 100%|██████████| 887/887 [16:48<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.585      0.483      0.509      0.359

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     26/100      12.2G     0.9613       1.05      1.213        155        640: 100%|██████████| 887/887 [16:51<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.589      0.506       0.52      0.366

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     27/100      12.2G     0.9554       1.04       1.21        155        640: 100%|██████████| 887/887 [16:51<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.602      0.497      0.523      0.367

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     28/100      12.2G      0.951      1.021      1.207        183        640: 100%|██████████| 887/887 [16:53<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.585      0.505      0.526      0.378

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     29/100      12.2G      0.939          1      1.201        115        640: 100%|██████████| 887/887 [16:54<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.595      0.502      0.526      0.377

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     30/100      12.2G     0.9347     0.9909      1.198        147        640: 100%|██████████| 887/887 [16:54<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.46it/s]


                   all       1577      12086      0.607      0.507       0.53      0.376

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     31/100      12.2G     0.9243     0.9661       1.19        246        640:  99%|█████████▉| 876/887 [16:41<00:12,  1.15s/it]

In [None]:
from ultralytics import YOLO

# Load the model from the checkpoint
model = YOLO("runs/train/exp/weights/epoch30.pt")  # Load epoch 30 checkpoint

# Resume training
results = model.train(
    data="filtered_coco/dataset.yaml",  # Same dataset config
    epochs=100,                         # Total epochs (will resume from 31)
    patience=10,                        # Early stopping after 10 epochs no improvement
    batch=16,                           # Batch size
    workers=28,                         # DataLoader workers
    imgsz=640,                          # Image size (must match original)
    device=0,                           # GPU device
    optimizer="AdamW",                  # Same optimizer
    lr0=0.001,                          # Initial LR (adjusted internally by checkpoint)
    weight_decay=0.0005,                # Regularization
    # Save options
    save=True,                          # Continue saving
    save_period=10,                     # Save every 10 epochs
    save_dir="runs/train/exp",          # Same directory to continue run
    save_conf=True,                     # Save confidence scores (optional)
    project="runs/train",               # Base directory
    name="exp",                         # Experiment name
    exist_ok=True,                      # Overwrite existing files in this run
    verbose=True,                       # Detailed logging
    resume=True                         # Explicitly enable resume mode
)

# Save final model
model.save("runs/train/exp/final_model.pt")  # Final weights
model.export(format="onnx", imgsz=640)       # Optional ONNX export

New https://pypi.org/project/ultralytics/8.3.99 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.98 🚀 Python-3.11.11 torch-2.3.1.post300 CUDA:0 (Tesla T4, 14918MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=runs/train/exp/weights/epoch30.pt, data=filtered_coco/dataset.yaml, epochs=100, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=10, cache=False, device=0, workers=28, project=runs/train, name=exp, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=runs/train/exp/weights/epoch30.pt, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, cl

[34m[1mtrain: [0mScanning /home/sagemaker-user/src/filtered_coco/labels/train.cache... 14187 images, 0 backgrounds, 0 corrupt: 100%|██████████| 14187/14187 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /home/sagemaker-user/src/filtered_coco/labels/val.cache... 1577 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1577/1577 [00:00<?, ?it/s]


Plotting labels to runs/train/exp/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.0005), 103 bias(decay=0.0)
Resuming training runs/train/exp/weights/epoch30.pt from epoch 32 to 100 total epochs
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 28 dataloader workers
Logging results to [1mruns/train/exp[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     32/100      13.3G     0.9539      1.047      1.218        194        640: 100%|██████████| 887/887 [16:55<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.44it/s]


                   all       1577      12086      0.592      0.485       0.52      0.372

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     33/100      12.7G     0.9336     0.9877        1.2        127        640: 100%|██████████| 887/887 [16:56<00:00,  1.15s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.594      0.502      0.528      0.376

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     34/100      12.7G     0.9226     0.9625       1.19        129        640: 100%|██████████| 887/887 [16:55<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.619      0.496      0.532      0.383

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     35/100      12.7G     0.9083     0.9367       1.18        182        640: 100%|██████████| 887/887 [16:54<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.583      0.509      0.531      0.381

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     36/100      12.7G     0.9057     0.9252      1.176        133        640: 100%|██████████| 887/887 [16:54<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086      0.587      0.523      0.536      0.386

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     37/100      12.7G      0.898     0.9087      1.172        164        640: 100%|██████████| 887/887 [16:55<00:00,  1.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 50/50 [00:34<00:00,  1.45it/s]


                   all       1577      12086       0.63      0.498      0.537      0.385

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     38/100      12.7G     0.8946      0.902      1.168        216        640:  13%|█▎        | 115/887 [02:11<14:48,  1.15s/it]