# Finetuning Yolov11m

In [14]:
from ultralytics import YOLO
import os

# load the pretrained medium model
model = YOLO("yolo11l.pt")

# Use absolute path to YAML
yaml_path = "/home/joey/Projects/chatbrain/backend/vision_model/chat_dataset.yaml"

# Verify paths
assert os.path.exists(yaml_path), "YAML file not found!"
assert os.path.exists(os.path.join(os.path.dirname(yaml_path), "dataset/train/images")), "Train images missing!"
assert os.path.exists(os.path.join(os.path.dirname(yaml_path), "dataset/val/images")), "Val images missing!"

results = model.train(
    data=yaml_path,
    epochs=100,
    time=0.05, # maxium training time in hours
    patience=10, # early stopping patience
    batch=-1, # auto batch size
    imgsz=640, # image size
    multi_scale=True,  # Vary imgsz ±50% during training
    cache=True, # cache images for faster training
)

Ultralytics 8.3.67 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce GTX 1060 3GB, 3004MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11l.pt, data=/home/joey/Projects/chatbrain/backend/vision_model/chat_dataset.yaml, epochs=100, time=0.05, patience=10, batch=-1, imgsz=640, save=True, save_period=-1, cache=True, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_c

100%|██████████| 755k/755k [00:00<00:00, 18.7MB/s]

Overriding model.yaml nc=80 with nc=3

                   from  n    params  module                                       arguments                     
  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  2    173824  ultralytics.nn.modules.block.C3k2            [128, 256, 2, True, 0.25]     
  3                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
  4                  -1  2    691712  ultralytics.nn.modules.block.C3k2            [256, 512, 2, True, 0.25]     
  5                  -1  1   2360320  ultralytics.nn.modules.conv.Conv             [512, 512, 3, 2]              
  6                  -1  2   2234368  ultralytics.nn.modules.block.C3k2            [512, 512, 2, True]           
  7                  -1  1   2360320  ultralytics




 13                  -1  2   2496512  ultralytics.nn.modules.block.C3k2            [1024, 512, 2, True]          
 14                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 15             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 16                  -1  2    756736  ultralytics.nn.modules.block.C3k2            [1024, 256, 2, True]          
 17                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
 18            [-1, 13]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 19                  -1  2   2365440  ultralytics.nn.modules.block.C3k2            [768, 512, 2, True]           
 20                  -1  1   2360320  ultralytics.nn.modules.conv.Conv             [512, 512, 3, 2]              
 21            [-1, 10]  1         0  ultralytics.nn.modules.conv.Concat           [1]  

100%|██████████| 5.35M/5.35M [00:00<00:00, 44.0MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /home/joey/Projects/chatbrain/backend/vision_model/dataset/train/labels... 29 images, 4 backgrounds, 6 corrupt: 100%|██████████| 29/29 [00:00<00:00, 282.01it/s]

[34m[1mtrain: [0mNew cache created: /home/joey/Projects/chatbrain/backend/vision_model/dataset/train/labels.cache







[34m[1mtrain: [0mCaching images (0.0GB RAM): 100%|██████████| 23/23 [00:00<00:00, 207.37it/s]


[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=640 at 60.0% CUDA memory utilization.
[34m[1mAutoBatch: [0mCUDA:0 (NVIDIA GeForce GTX 1060 3GB) 2.93G total, 0.25G reserved, 0.23G allocated, 2.46G free
      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    25312793       87.28         1.363         97.12         163.5        (1, 3, 640, 640)                    list
    25312793       174.6         2.038         141.2         252.9        (2, 3, 640, 640)                    list
CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 2.93 GiB of which 74.44 MiB is free. Including non-PyTorch memory, this process has 1.95 GiB memory in use. Of the allocated memory 1.76 GiB is allocated by PyTorch, and 57.97 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See doc

[34m[1mtrain: [0mScanning /home/joey/Projects/chatbrain/backend/vision_model/dataset/train/labels.cache... 29 images, 4 backgrounds, 6 corrupt: 100%|██████████| 29/29 [00:00<?, ?it/s]








[34m[1mtrain: [0mCaching images (0.0GB RAM): 100%|██████████| 23/23 [00:00<00:00, 230.59it/s]
[34m[1mval: [0mScanning /home/joey/Projects/chatbrain/backend/vision_model/dataset/val/labels... 8 images, 2 backgrounds, 2 corrupt: 100%|██████████| 8/8 [00:00<00:00, 195.26it/s]


[34m[1mval: [0mNew cache created: /home/joey/Projects/chatbrain/backend/vision_model/dataset/val/labels.cache


[34m[1mval: [0mCaching images (0.0GB RAM): 100%|██████████| 6/6 [00:00<00:00, 64.79it/s]


Plotting labels to /home/joey/Projects/chatbrain/runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 167 weight(decay=0.0), 174 weight(decay=0.0005), 173 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 6 dataloader workers
Logging results to [1m/home/joey/Projects/chatbrain/runs/detect/train3[0m
Starting training for 0.05 hours...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      1.77G      4.316      5.983       2.81          0        416: 100%|██████████| 23/23 [00:11<00:00,  2.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  2.98it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/14      1.85G       4.27       6.93        2.9         11        800: 100%|██████████| 23/23 [00:11<00:00,  1.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  8.19it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/15      1.96G      4.603      5.512      3.059         25        640: 100%|██████████| 23/23 [00:09<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  9.02it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/15       1.9G      4.555      6.605      2.991         23        960: 100%|██████████| 23/23 [00:10<00:00,  2.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  8.07it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/16      1.52G      4.438      6.088      2.771         25        352: 100%|██████████| 23/23 [00:10<00:00,  2.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.96it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/16      1.88G      4.873      6.073      3.082          3        576: 100%|██████████| 23/23 [00:09<00:00,  2.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.99it/s]

                   all          6         41          0          0          0          0





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/16      1.89G      3.651      7.608      2.795         14        672: 100%|██████████| 23/23 [00:11<00:00,  1.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.88it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/16      1.94G       3.94      7.921      2.675          5        608: 100%|██████████| 23/23 [00:11<00:00,  1.94it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  8.00it/s]

                   all          6         41          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/16      1.97G      3.697      6.723      2.632          5        768: 100%|██████████| 23/23 [00:12<00:00,  1.91it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  8.09it/s]

                   all          6         41   0.000483     0.0185   0.000268   2.68e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/15      1.93G      4.295      7.353      2.962          8        704: 100%|██████████| 23/23 [00:11<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  8.04it/s]

                   all          6         41   0.000483     0.0185   0.000268   2.68e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/15      1.95G      3.897      7.045      2.655         10        320: 100%|██████████| 23/23 [00:10<00:00,  2.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00, 10.27it/s]

                   all          6         41   0.000501     0.0175   0.000278   2.78e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/16      1.97G      3.801      7.126      2.587          0        352: 100%|██████████| 23/23 [00:09<00:00,  2.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  8.56it/s]

                   all          6         41   0.000501     0.0175   0.000278   2.78e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/16      1.79G      3.786      7.386      2.736          0        640: 100%|██████████| 23/23 [00:09<00:00,  2.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00, 10.30it/s]

                   all          6         41   0.000501     0.0175   0.000278   2.78e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/16      1.78G      3.922      6.641        2.7          8        704: 100%|██████████| 23/23 [00:08<00:00,  2.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  9.06it/s]

                   all          6         41   0.000518     0.0175   0.000284   2.84e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/16      1.97G      3.933      7.879       2.64         14        704: 100%|██████████| 23/23 [00:09<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  9.70it/s]

                   all          6         41   0.000518     0.0175   0.000284   2.84e-05






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/16      1.76G      3.972      6.135      2.762          0        576: 100%|██████████| 23/23 [00:09<00:00,  2.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.38it/s]

                   all          6         41   0.000518     0.0175   0.000284   2.84e-05






16 epochs completed in 0.051 hours.
Optimizer stripped from /home/joey/Projects/chatbrain/runs/detect/train3/weights/last.pt, 51.2MB
Optimizer stripped from /home/joey/Projects/chatbrain/runs/detect/train3/weights/best.pt, 51.2MB

Validating /home/joey/Projects/chatbrain/runs/detect/train3/weights/best.pt...
Ultralytics 8.3.67 🚀 Python-3.10.12 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce GTX 1060 3GB, 3004MiB)
YOLO11l summary (fused): 464 layers, 25,281,625 parameters, 0 gradients, 86.6 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.83it/s]


                   all          6         41   0.000513     0.0175   0.000281   2.81e-05
          contact_name          4         19    0.00154     0.0526   0.000842   8.42e-05
               own_msg          4         18          0          0          0          0
            others_msg          4          4          0          0          0          0
Speed: 1.0ms preprocess, 57.9ms inference, 0.0ms loss, 1.9ms postprocess per image
Results saved to [1m/home/joey/Projects/chatbrain/runs/detect/train3[0m


#### Sort detected boxes vertically to get message order

In [6]:
def process_results(results):
    boxes = []
    for box in results[0].boxes:
        cls = int(box.cls)
        coords = box.xyxy[0].tolist()  # [ax, ay, bx, by]
        boxes.append((cls, coords))
    
    # Sort non-contact boxes by Y-coordinate (ay)
    contact_boxes = [b for b in boxes if b[0] == 0]
    message_boxes = sorted(
        [b for b in boxes if b[0] != 0],
        key=lambda x: x[1][1]  # Sort by ay (top Y-coordinate)
    )
    
    # Assign 'num' (0 for contacts, 1,2,3... for messages)
    final = []
    for cls, (ax, ay, bx, by) in contact_boxes:
        final.append([ax, ay, bx, by, 0, 0])  # Type 0, num=0
    
    for i, (cls, (ax, ay, bx, by)) in enumerate(message_boxes, 1):
        final.append([ax, ay, bx, by, cls, i])  # Type 1/2, num=i
    
    return final