## 📚 Libraries

In [1]:
import os
import csv
import cv2
import shutil
from datetime import timedelta
from ultralytics import YOLO
from pyzbar.pyzbar import decode
from sklearn.model_selection import train_test_split

This following code organizes a dataset for training and validation in a YOLOv8 project. It starts by defining the directory paths for training and validation images and labels. It then ensures that the validation directories exist, creating them if necessary. Next, it retrieves a list of all image files in the training images directory and splits them into training and validation sets using an 80-20 ratio. The corresponding images and their associated label files (text files with annotations) for the validation set are moved to their respective validation directories. Finally, a message confirms that the validation dataset has been successfully created.

In [2]:
data_dir = "../data/"
images_dir = os.path.join(data_dir, "train/images")
labels_dir = os.path.join(data_dir, "train/labels")
valid_images_dir = os.path.join(data_dir, "valid/images")
valid_labels_dir = os.path.join(data_dir, "valid/labels")

os.makedirs(valid_images_dir, exist_ok=True)
os.makedirs(valid_labels_dir, exist_ok=True)

image_files = [f for f in os.listdir(images_dir) if f.endswith(".jpg")]

train_images, valid_images = train_test_split(image_files, test_size=0.2, random_state=42)

for img_file in valid_images:
    shutil.move(os.path.join(images_dir, img_file), os.path.join(valid_images_dir, img_file))
    label_file = img_file.replace(".jpg", ".txt")
    shutil.move(os.path.join(labels_dir, label_file), os.path.join(valid_labels_dir, label_file))

print("Validation dataset created successfully.")

Validation dataset created successfully.


This next code block initializes and trains a YOLOv8 model for object detection. It begins by importing the YOLO class from the Ultralytics library and loading a pre-trained YOLOv8n model, which serves as the starting point for training. The train method is then called to fine-tune the model using a custom dataset specified in the data.yaml file. Key training parameters include 50 epochs, an image size of 640 pixels and a batch size of 16. These settings determine the training duration, input image resolution and the number of images processed in each training batch, respectively.

In [None]:
model = YOLO("yolov8n.pt") 
 
model.train(
    data="../data/data.yaml",
    epochs=50,
    imgsz=640,
    batch=16
)

New https://pypi.org/project/ultralytics/8.3.150 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.140  Python-3.11.5 torch-2.7.0+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=../data/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train9, nbs=64, nms=False, opset=None, opti

[34m[1mtrain: [0mScanning I:\year 2 fontys\sem 4\product-tracking\data\train\labels... 136 images, 0 backgrounds, 0 corrupt: 100%|██████████| 136/136 [00:00<00:00, 328.02it/s]

[34m[1mtrain: [0mNew cache created: I:\year 2 fontys\sem 4\product-tracking\data\train\labels.cache





[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 3.80.6 MB/s, size: 55.5 KB)


[34m[1mval: [0mScanning I:\year 2 fontys\sem 4\product-tracking\data\valid\labels... 933 images, 19 backgrounds, 0 corrupt: 100%|██████████| 933/933 [00:02<00:00, 353.59it/s]


[34m[1mval: [0mNew cache created: I:\year 2 fontys\sem 4\product-tracking\data\valid\labels.cache




Plotting labels to runs\detect\train9\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train9[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G      1.603      3.729      1.327         50        640: 100%|██████████| 9/9 [00:31<00:00,  3.47s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:30<00:00,  3.01s/it]


                   all        933       8304    0.00655      0.245     0.0422     0.0274

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50         0G      1.502      2.346      1.113        117        640: 100%|██████████| 9/9 [00:50<00:00,  5.56s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [02:02<00:00,  4.08s/it]

                   all        933       8304     0.0119      0.452      0.285      0.174






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50         0G      1.468      1.707      1.128         99        640: 100%|██████████| 9/9 [00:29<00:00,  3.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:35<00:00,  3.17s/it]


                   all        933       8304     0.0388       0.47      0.359      0.227

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50         0G      1.362      1.569      1.108        116        640: 100%|██████████| 9/9 [00:29<00:00,  3.31s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:27<00:00,  2.92s/it]


                   all        933       8304       0.98     0.0912       0.48      0.298

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50         0G      1.351      1.454      1.098         90        640: 100%|██████████| 9/9 [00:29<00:00,  3.23s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:27<00:00,  2.91s/it]

                   all        933       8304      0.935      0.168       0.45      0.259






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50         0G      1.372      1.379      1.104         63        640: 100%|██████████| 9/9 [00:29<00:00,  3.23s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:30<00:00,  3.02s/it]

                   all        933       8304      0.926      0.289      0.553      0.326






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50         0G       1.33      1.284      1.104        125        640: 100%|██████████| 9/9 [00:30<00:00,  3.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:30<00:00,  3.02s/it]

                   all        933       8304      0.946      0.253      0.509      0.316






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50         0G      1.274       1.25      1.086         71        640: 100%|██████████| 9/9 [00:28<00:00,  3.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:34<00:00,  3.14s/it]

                   all        933       8304      0.839      0.342      0.641      0.379






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50         0G      1.328      1.233      1.074        103        640: 100%|██████████| 9/9 [00:29<00:00,  3.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:30<00:00,  3.00s/it]

                   all        933       8304      0.801      0.512      0.702      0.393






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50         0G      1.294      1.166      1.086        131        640: 100%|██████████| 9/9 [00:28<00:00,  3.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:29<00:00,  2.98s/it]


                   all        933       8304      0.871      0.472      0.705      0.414

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50         0G      1.334      1.162      1.111        111        640: 100%|██████████| 9/9 [00:29<00:00,  3.23s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:29<00:00,  2.99s/it]


                   all        933       8304      0.864      0.423      0.681      0.398

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50         0G      1.272      1.134      1.079         69        640: 100%|██████████| 9/9 [00:28<00:00,  3.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:29<00:00,  2.99s/it]


                   all        933       8304      0.696       0.64      0.668      0.377

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50         0G      1.267      1.174      1.076        109        640: 100%|██████████| 9/9 [00:28<00:00,  3.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:27<00:00,  2.92s/it]


                   all        933       8304      0.711       0.63      0.682      0.398

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50         0G      1.239      1.117      1.057        102        640: 100%|██████████| 9/9 [00:29<00:00,  3.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:26<00:00,  2.89s/it]


                   all        933       8304      0.713      0.625      0.686      0.402

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50         0G      1.232      1.079      1.085         94        640: 100%|██████████| 9/9 [00:29<00:00,  3.24s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:26<00:00,  2.88s/it]

                   all        933       8304      0.665      0.637      0.689       0.39






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50         0G      1.229      1.039      1.065        153        640: 100%|██████████| 9/9 [00:29<00:00,  3.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:25<00:00,  2.86s/it]

                   all        933       8304      0.692      0.689      0.741      0.428






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50         0G      1.221     0.9993      1.064         94        640: 100%|██████████| 9/9 [00:28<00:00,  3.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:23<00:00,  2.80s/it]

                   all        933       8304      0.731      0.705      0.761      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/50         0G      1.156     0.9923      1.027         90        640: 100%|██████████| 9/9 [00:28<00:00,  3.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:23<00:00,  2.77s/it]


                   all        933       8304      0.721      0.695      0.753       0.45

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/50         0G      1.171     0.9535      1.048         79        640: 100%|██████████| 9/9 [00:28<00:00,  3.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:21<00:00,  2.72s/it]

                   all        933       8304      0.738      0.703       0.76      0.455






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/50         0G      1.134      0.972      1.039         96        640: 100%|██████████| 9/9 [00:28<00:00,  3.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:20<00:00,  2.68s/it]


                   all        933       8304       0.76      0.711      0.765      0.461

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/50         0G      1.211     0.9725      1.056         68        640: 100%|██████████| 9/9 [00:28<00:00,  3.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:22<00:00,  2.74s/it]


                   all        933       8304      0.763      0.695      0.749      0.451

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/50         0G      1.138     0.9037      1.003         85        640: 100%|██████████| 9/9 [00:28<00:00,  3.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:21<00:00,  2.72s/it]

                   all        933       8304      0.759      0.719      0.762       0.46






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/50         0G      1.169     0.9133      1.037        140        640: 100%|██████████| 9/9 [00:28<00:00,  3.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:23<00:00,  2.77s/it]

                   all        933       8304      0.768      0.749      0.787      0.477






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/50         0G      1.179     0.9264      1.026        111        640: 100%|██████████| 9/9 [00:29<00:00,  3.25s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:23<00:00,  2.77s/it]


                   all        933       8304      0.765      0.749       0.79      0.483

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/50         0G      1.125     0.9218      1.033         68        640: 100%|██████████| 9/9 [00:28<00:00,  3.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:21<00:00,  2.70s/it]


                   all        933       8304      0.732      0.726      0.777      0.474

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/50         0G      1.088     0.8883     0.9967         59        640: 100%|██████████| 9/9 [00:28<00:00,  3.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:21<00:00,  2.71s/it]

                   all        933       8304      0.774      0.745        0.8      0.497






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/50         0G      1.116     0.8459      1.013         72        640: 100%|██████████| 9/9 [00:28<00:00,  3.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:20<00:00,  2.69s/it]

                   all        933       8304      0.765      0.747      0.792      0.483






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/50         0G      1.087     0.8357      1.011        109        640: 100%|██████████| 9/9 [00:28<00:00,  3.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:19<00:00,  2.64s/it]

                   all        933       8304      0.744      0.763      0.796      0.493






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/50         0G      1.083     0.8292      1.019         68        640: 100%|██████████| 9/9 [00:28<00:00,  3.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:20<00:00,  2.69s/it]

                   all        933       8304      0.795      0.744      0.809      0.499






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/50         0G       1.06     0.8219     0.9966        102        640: 100%|██████████| 9/9 [00:28<00:00,  3.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:20<00:00,  2.68s/it]

                   all        933       8304      0.772      0.757      0.805      0.496






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/50         0G      1.018     0.7942     0.9726        120        640: 100%|██████████| 9/9 [00:28<00:00,  3.15s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:19<00:00,  2.64s/it]

                   all        933       8304      0.774      0.751       0.81      0.512






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/50         0G      1.069     0.8047     0.9978        114        640: 100%|██████████| 9/9 [00:28<00:00,  3.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.63s/it]

                   all        933       8304      0.785      0.751      0.813      0.506






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/50         0G      1.041     0.7842     0.9947        112        640: 100%|██████████| 9/9 [00:28<00:00,  3.16s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:17<00:00,  2.60s/it]

                   all        933       8304      0.797      0.745       0.81      0.516






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/50         0G      1.066      0.818      0.977        118        640: 100%|██████████| 9/9 [00:28<00:00,  3.16s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:17<00:00,  2.60s/it]

                   all        933       8304      0.786      0.751      0.805      0.513






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/50         0G      1.062     0.7826      0.996         79        640: 100%|██████████| 9/9 [00:28<00:00,  3.21s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.60s/it]


                   all        933       8304      0.782      0.763      0.807      0.506

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/50         0G      1.014     0.7593       0.98         56        640: 100%|██████████| 9/9 [00:28<00:00,  3.19s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.62s/it]


                   all        933       8304      0.783      0.771       0.82      0.529

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/50         0G      1.023     0.7717     0.9768         89        640: 100%|██████████| 9/9 [00:28<00:00,  3.18s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:19<00:00,  2.64s/it]


                   all        933       8304      0.784      0.779      0.824      0.525

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/50         0G       1.02     0.7681     0.9725        128        640: 100%|██████████| 9/9 [00:28<00:00,  3.15s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.62s/it]


                   all        933       8304      0.794      0.783       0.83       0.53

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/50         0G      1.014     0.7671     0.9818         48        640: 100%|██████████| 9/9 [00:28<00:00,  3.14s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.63s/it]

                   all        933       8304      0.789      0.788      0.829      0.527






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      40/50         0G      1.023     0.7319     0.9613        166        640: 100%|██████████| 9/9 [00:29<00:00,  3.23s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.61s/it]

                   all        933       8304       0.79      0.773      0.821       0.52





Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      41/50         0G     0.9472     0.7802     0.9651         28        640: 100%|██████████| 9/9 [00:27<00:00,  3.09s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.63s/it]

                   all        933       8304      0.795       0.78      0.826      0.526






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      42/50         0G     0.9239     0.7288     0.9453         65        640: 100%|██████████| 9/9 [00:27<00:00,  3.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.63s/it]


                   all        933       8304      0.812      0.767      0.823      0.526

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      43/50         0G     0.9161     0.7287     0.9449         61        640: 100%|██████████| 9/9 [00:27<00:00,  3.04s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:15<00:00,  2.53s/it]


                   all        933       8304      0.802      0.773      0.822      0.528

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      44/50         0G     0.9326     0.7086     0.9511         88        640: 100%|██████████| 9/9 [00:27<00:00,  3.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:17<00:00,  2.60s/it]

                   all        933       8304      0.786      0.777       0.82      0.527






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      45/50         0G     0.8953     0.6867     0.9379         64        640: 100%|██████████| 9/9 [00:27<00:00,  3.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.63s/it]


                   all        933       8304      0.784       0.78      0.823      0.531

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      46/50         0G     0.8782     0.6696     0.9296         48        640: 100%|██████████| 9/9 [00:27<00:00,  3.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:21<00:00,  2.72s/it]


                   all        933       8304      0.786      0.786      0.827      0.534

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      47/50         0G     0.8986     0.6645     0.9355         59        640: 100%|██████████| 9/9 [00:27<00:00,  3.05s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.63s/it]


                   all        933       8304      0.791      0.783      0.828      0.534

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      48/50         0G     0.8649     0.6612     0.9287         65        640: 100%|██████████| 9/9 [00:27<00:00,  3.07s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.62s/it]


                   all        933       8304      0.797       0.78       0.83      0.538

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      49/50         0G     0.8518     0.6542      0.928         39        640: 100%|██████████| 9/9 [00:27<00:00,  3.05s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.62s/it]

                   all        933       8304      0.796      0.784       0.83      0.539






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      50/50         0G     0.8898     0.6807     0.9469         30        640: 100%|██████████| 9/9 [00:27<00:00,  3.09s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:18<00:00,  2.62s/it]

                   all        933       8304        0.8      0.785      0.832       0.54






50 epochs completed in 1.570 hours.
Optimizer stripped from runs\detect\train9\weights\last.pt, 6.2MB
Optimizer stripped from runs\detect\train9\weights\best.pt, 6.2MB

Validating runs\detect\train9\weights\best.pt...
Ultralytics 8.3.140  Python-3.11.5 torch-2.7.0+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 30/30 [01:04<00:00,  2.15s/it]


                   all        933       8304      0.799      0.785      0.832       0.54
                   box        898       3628      0.799      0.785      0.844      0.599
               product        764       4676      0.799      0.786      0.819      0.482
Speed: 1.0ms preprocess, 57.1ms inference, 0.0ms loss, 4.2ms postprocess per image
Results saved to [1mruns\detect\train9[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000002079DBCE0D0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0

# Model Evaluation (Company Provided Data Only)

The model shows consistent improvement over the course of training. Loss values for bounding boxes, classification, and distribution focal loss steadily decrease, indicating that the model is learning effectively. Precision, recall, and mAP metrics improve as well, demonstrating better detection performance with more epochs and data. Early results start modestly, but as training continues, the model achieves higher accuracy and fewer false positives. GPU memory usage remains low and stable throughout training. Overall, the model performs reliably and shows clear progress as it trains on increasing amounts of data.

# Model Evaluation (Data, collected on last Logicall Trip)

This updated model performs noticeably better than the previous one, largely thanks to being trained on a larger dataset. One of the main issues however was the time it took us to train the model. In this case (with the company provided data and the data from the 2 cameras from our latest Logicall Trip) it took us 10 hours and 45 Minutes. With more examples to learn from, the model picks up patterns faster and more accurately. The loss values—covering bounding boxes, classification, and focal loss—drop more smoothly and settle lower than before, showing that the model is learning efficiently. We also see solid improvements in key metrics like precision, recall, and mAP, meaning it’s doing a better job of correctly identifying objects and reducing mistakes. Compared to earlier results, it's more accurate and consistent, even in trickier cases. Despite training on more data, GPU usage stayed stable, which is a nice bonus. Overall, adding more data has made the model stronger, smarter, and more reliable.



# 📦 Box/Product Detection Test

In [3]:
# CONFIGURATION
model_path = "../runs/detect/train9/weights/best.pt"
input_video_path = "../videos/before/_2025-05-28_11_44_21_572.mp4"
output_dir = "../videos/test/"
output_filename = "testtest.mp4"
output_video_path = os.path.join(output_dir, output_filename)
csv_output_path = os.path.join(output_dir, "detection_log.csv")

DISTANCE_THRESHOLD = 150  # pixels
DISAPPEAR_TIME_THRESHOLD = 2.0  # seconds

# SETUP
os.makedirs(output_dir, exist_ok=True)
model = YOLO(model_path)
cap = cv2.VideoCapture(input_video_path)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# video writer
save_output = True
if save_output:
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# CSV file
with open(csv_output_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Timestamp", "Object Type", "Object ID", "X1", "Y1", "X2", "Y2"])

# color map
color_map = {
    0: (151, 86, 4),     # box
    1: (176, 42, 176),   # product
}

id_counters = {0: 0, 1: 0}
tracked_objects = {0: {}, 1: {}}

def get_center(box):
    x1, y1, x2, y2 = box
    return ((x1 + x2) // 2, (y1 + y2) // 2)

def euclidean_distance(p1, p2):
    return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5

# MAIN LOOP
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    timestamp_sec = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
    timestamp_str = str(timedelta(seconds=timestamp_sec)).split('.')[0]

    # timestamp
    cv2.putText(frame, f"Time: {timestamp_str}", (10, height - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    # all tracked objects as unseen
    for cls_id in tracked_objects:
        for obj in tracked_objects[cls_id].values():
            obj["seen"] = False

    # YOLOv8 detection
    results = model(frame)[0]
    boxes = results.boxes.xyxy.cpu().numpy()
    scores = results.boxes.conf.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy().astype(int)

    # detection
    for box, score, cls_id in zip(boxes, scores, class_ids):
        if cls_id not in (0, 1):
            continue

        x1, y1, x2, y2 = map(int, box)
        center = get_center((x1, y1, x2, y2))

        best_match_id = None
        min_distance = float('inf')

        for obj_id, obj_data in tracked_objects[cls_id].items():
            dist = euclidean_distance(center, obj_data["center"])
            if dist < DISTANCE_THRESHOLD and dist < min_distance:
                best_match_id = obj_id
                min_distance = dist

        if best_match_id is not None:
            # ppdate existing tracked object
            tracked_objects[cls_id][best_match_id].update({
                "center": center,
                "last_seen": timestamp_sec,
                "seen": True
            })
            assigned_id = best_match_id
        else:
            # assign new ID
            assigned_id = id_counters[cls_id]
            tracked_objects[cls_id][assigned_id] = {
                "center": center,
                "first_seen": timestamp_sec,
                "last_seen": timestamp_sec,
                "seen": True
            }
            id_counters[cls_id] += 1

        # draw box and label
        detect_time_str = str(timedelta(seconds=tracked_objects[cls_id][assigned_id]["first_seen"])).split('.')[0]
        label = f"{'Box' if cls_id == 0 else 'Product'} ID {assigned_id} ({detect_time_str})"
        color = color_map.get(cls_id, (255, 255, 255))

        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 2)

        # write to CSV
        with open(csv_output_path, mode='a', newline='') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow([timestamp_str, "Box" if cls_id == 0 else "Product",
                                 assigned_id, x1, y1, x2, y2])

    # cleanup disappeared objects
    current_time = timestamp_sec
    for cls_id in tracked_objects:
        to_remove = []
        for obj_id, obj in tracked_objects[cls_id].items():
            if not obj["seen"] and (current_time - obj["last_seen"]) > DISAPPEAR_TIME_THRESHOLD:
                to_remove.append(obj_id)
        for obj_id in to_remove:
            del tracked_objects[cls_id][obj_id]

    # show and save output
    cv2.imshow("YOLOv8 Product Detection", frame)
    if save_output:
        out.write(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# CLEANUP
cap.release()
if save_output:
    out.release()
cv2.destroyAllWindows()


0: 480x640 (no detections), 194.9ms
Speed: 9.3ms preprocess, 194.9ms inference, 10.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 91.8ms
Speed: 5.8ms preprocess, 91.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 82.1ms
Speed: 3.3ms preprocess, 82.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 64.6ms
Speed: 3.5ms preprocess, 64.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 70.4ms
Speed: 3.3ms preprocess, 70.4ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 71.9ms
Speed: 3.8ms preprocess, 71.9ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 73.6ms
Speed: 3.8ms preprocess, 73.6ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 81.5ms
Speed: 3.2ms preprocess, 81.5m

# Real-Time Box/Product Detection

In [3]:
import cv2
from ultralytics import YOLO
from datetime import timedelta
import time

# Load YOLOv8 model
model = YOLO("../runs/detect/train9/weights/best.pt")

# Class colors and labels
color_map = {
    0: (151, 86, 4),     # Box
    1: (176, 42, 176),   # Product
}

class_names = {
    0: "Box",
    1: "Product"
}

DISTANCE_THRESHOLD = 150  # pixels
DISAPPEAR_TIME_THRESHOLD = 2.0  # seconds

# Object tracking state
id_counters = {0: 0, 1: 0}
tracked_objects = {0: {}, 1: {}}

def get_center(box):
    x1, y1, x2, y2 = box
    return ((x1 + x2) // 2, (y1 + y2) // 2)

def euclidean_distance(p1, p2):
    return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5

def detect_realtime():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("❌ Error: Webcam not accessible.")
        return

    print("📸 Starting real-time detection with tracking. Press 'x' to exit.")
    start_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            print("⚠️ Error: Failed to capture frame.")
            break

        timestamp_sec = time.time() - start_time
        timestamp_text = f"Time: {str(timedelta(seconds=timestamp_sec)).split('.')[0]}"
        cv2.putText(frame, timestamp_text, (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        # Mark all tracked objects as unseen
        for cls_id in tracked_objects:
            for obj in tracked_objects[cls_id].values():
                obj["seen"] = False

        # YOLO detection
        results = model(frame)[0]
        boxes = results.boxes.xyxy.cpu().numpy()
        class_ids = results.boxes.cls.cpu().numpy().astype(int)
        scores = results.boxes.conf.cpu().numpy()

        for box, cls_id, score in zip(boxes, class_ids, scores):
            if score < 0.5 or cls_id not in (0, 1):
                continue

            x1, y1, x2, y2 = map(int, box)
            center = get_center((x1, y1, x2, y2))

            # Try to match existing object
            best_match_id = None
            min_distance = float('inf')

            for obj_id, obj_data in tracked_objects[cls_id].items():
                dist = euclidean_distance(center, obj_data["center"])
                if dist < DISTANCE_THRESHOLD and dist < min_distance:
                    best_match_id = obj_id
                    min_distance = dist

            if best_match_id is not None:
                tracked_objects[cls_id][best_match_id].update({
                    "center": center,
                    "last_seen": timestamp_sec,
                    "seen": True
                })
                assigned_id = best_match_id
            else:
                assigned_id = id_counters[cls_id]
                tracked_objects[cls_id][assigned_id] = {
                    "center": center,
                    "first_seen": timestamp_sec,
                    "last_seen": timestamp_sec,
                    "seen": True
                }
                id_counters[cls_id] += 1

            # Draw box, ID and timestamp
            detect_time_str = str(timedelta(seconds=tracked_objects[cls_id][assigned_id]["first_seen"])).split('.')[0]
            label = f"{class_names[cls_id]} ID {assigned_id} ({detect_time_str})"
            color = color_map.get(cls_id, (255, 255, 255))

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 2)

        # Remove unseen objects
        current_time = timestamp_sec
        for cls_id in tracked_objects:
            to_remove = []
            for obj_id, obj in tracked_objects[cls_id].items():
                if not obj["seen"] and (current_time - obj["last_seen"]) > DISAPPEAR_TIME_THRESHOLD:
                    to_remove.append(obj_id)
            for obj_id in to_remove:
                del tracked_objects[cls_id][obj_id]

        # Show the frame
        cv2.imshow("Real-Time YOLOv8 Detection with Tracking", frame)

        # Exit loop on 'x' key
        if cv2.waitKey(1) & 0xFF == ord('x'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Run it
detect_realtime()


📸 Starting real-time detection with tracking. Press 'x' to exit.

0: 480x640 (no detections), 173.0ms
Speed: 7.7ms preprocess, 173.0ms inference, 9.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 53.6ms
Speed: 2.4ms preprocess, 53.6ms inference, 8.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 57.4ms
Speed: 1.6ms preprocess, 57.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 110.3ms
Speed: 1.2ms preprocess, 110.3ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 86.2ms
Speed: 1.9ms preprocess, 86.2ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 60.6ms
Speed: 1.3ms preprocess, 60.6ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 62.5ms
Speed: 1.5ms preprocess, 62.5ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 box, 57.8ms
Speed: 2.1ms preprocess, 57.8ms in

# Connection between Box and Product - Test

In [1]:
import os
import cv2
import csv
from datetime import timedelta
from ultralytics import YOLO

# CONFIGURATION
model_path = "../runs/detect/train9/weights/best.pt"
input_video_path = "../videos/before/_2025-05-28_11_44_21_572.mp4"
output_dir = "../videos/test/"
output_filename = "testtestbpc.mp4"
output_video_path = os.path.join(output_dir, output_filename)
csv_output_path = os.path.join(output_dir, "detection_log.csv")

DISTANCE_THRESHOLD = 150  # pixels for tracking
DISAPPEAR_TIME_THRESHOLD = 2.0  # seconds
BOX_ASSOCIATION_RADIUS = 200

# SETUP
os.makedirs(output_dir, exist_ok=True)
model = YOLO(model_path)
cap = cv2.VideoCapture(input_video_path)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Video writer
save_output = True
if save_output:
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# CSV file
with open(csv_output_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Timestamp", "Object Type", "Object ID", "X1", "Y1", "X2", "Y2"])

# Color map
color_map = {
    0: (151, 86, 4),     # box
    1: (176, 42, 176),   # product
}

id_counters = {0: 0, 1: 0}
tracked_objects = {0: {}, 1: {}}
product_associations = {}  # box_id: set(product_ids)

# Helper functions
def get_center(box):
    x1, y1, x2, y2 = box
    return ((x1 + x2) // 2, (y1 + y2) // 2)

def euclidean_distance(p1, p2):
    return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5

# MAIN LOOP
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    timestamp_sec = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
    timestamp_str = str(timedelta(seconds=timestamp_sec)).split('.')[0]

    # Put timestamp on frame
    cv2.putText(frame, f"Time: {timestamp_str}", (10, height - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    # Reset seen status
    for cls_id in tracked_objects:
        for obj in tracked_objects[cls_id].values():
            obj["seen"] = False

    # YOLO detection
    results = model(frame)[0]
    boxes = results.boxes.xyxy.cpu().numpy()
    scores = results.boxes.conf.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy().astype(int)

    product_centers = []  # reset for this frame

    for box, score, cls_id in zip(boxes, scores, class_ids):
        if cls_id not in (0, 1):
            continue

        x1, y1, x2, y2 = map(int, box)
        center = get_center((x1, y1, x2, y2))

        # Object tracking
        best_match_id = None
        min_distance = float('inf')
        for obj_id, obj_data in tracked_objects[cls_id].items():
            dist = euclidean_distance(center, obj_data["center"])
            if dist < DISTANCE_THRESHOLD and dist < min_distance:
                best_match_id = obj_id
                min_distance = dist

        if best_match_id is not None:
            tracked_objects[cls_id][best_match_id].update({
                "center": center,
                "last_seen": timestamp_sec,
                "seen": True
            })
            assigned_id = best_match_id
        else:
            assigned_id = id_counters[cls_id]
            tracked_objects[cls_id][assigned_id] = {
                "center": center,
                "first_seen": timestamp_sec,
                "last_seen": timestamp_sec,
                "seen": True
            }
            id_counters[cls_id] += 1

        color = color_map.get(cls_id, (255, 255, 255))
        detect_time_str = str(timedelta(seconds=tracked_objects[cls_id][assigned_id]["first_seen"])).split('.')[0]
        label = f"{'Box' if cls_id == 0 else 'Product'} ID {assigned_id} ({detect_time_str})"

        # Draw rectangle and label
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 2)

        # Log to CSV
        with open(csv_output_path, mode='a', newline='') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow([timestamp_str, "Box" if cls_id == 0 else "Product",
                                 assigned_id, x1, y1, x2, y2])

        # Track products and associate with boxes
        if cls_id == 1:
            product_centers.append((center, assigned_id))
        elif cls_id == 0:
            # Draw circle around box center
            cv2.circle(frame, center, BOX_ASSOCIATION_RADIUS, color, 2)
            for prod_center, prod_id in product_centers:
                dist = euclidean_distance(center, prod_center)
                if dist <= BOX_ASSOCIATION_RADIUS:
                    # Draw line
                    cv2.line(frame, center, prod_center, (0, 255, 255), 1)
                    # Save association
                    if assigned_id not in product_associations:
                        product_associations[assigned_id] = set()
                    product_associations[assigned_id].add(prod_id)

    # Cleanup disappeared objects
    current_time = timestamp_sec
    for cls_id in tracked_objects:
        to_remove = []
        for obj_id, obj in tracked_objects[cls_id].items():
            if not obj["seen"] and (current_time - obj["last_seen"]) > DISAPPEAR_TIME_THRESHOLD:
                to_remove.append(obj_id)
        for obj_id in to_remove:
            del tracked_objects[cls_id][obj_id]

    # Show and save
    cv2.imshow("YOLOv8 Product Detection", frame)
    if save_output:
        out.write(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# CLEANUP
cap.release()
if save_output:
    out.release()
cv2.destroyAllWindows()

# Print associations
print("\n=== Box to Product Associations ===")
for box_id, prod_ids in product_associations.items():
    print(f"Box {box_id} -> Products: {sorted(prod_ids)}")



0: 480x640 (no detections), 176.7ms
Speed: 10.3ms preprocess, 176.7ms inference, 12.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 53.3ms
Speed: 3.0ms preprocess, 53.3ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 51.9ms
Speed: 2.6ms preprocess, 51.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 49.5ms
Speed: 2.8ms preprocess, 49.5ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 50.0ms
Speed: 3.0ms preprocess, 50.0ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 49.8ms
Speed: 2.3ms preprocess, 49.8ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 48.9ms
Speed: 2.3ms preprocess, 48.9ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 51.8ms
Speed: 3.2ms preprocess, 51.8

# Connection between Box and Product - Alternative

In [9]:
import os
import cv2
import csv
import numpy as np
from datetime import timedelta
from ultralytics import YOLO

# CONFIGURATION
model_path = "../runs/detect/train9/weights/best.pt"
input_video_path = "../videos/before/_2025-05-28_11_44_21_572.mp4"
output_dir = "../videos/test/"
output_filename = "testtestbpcalternative.mp4"
output_video_path = os.path.join(output_dir, output_filename)
csv_output_path = os.path.join(output_dir, "detection_log.csv")

DISTANCE_THRESHOLD = 150  # pixels for tracking
DISAPPEAR_TIME_THRESHOLD = 4.0  # Increased to reduce flicker
ASSOCIATION_RADIUS = 200  # for product-box and person-box
PERSON_RADIUS = 200  # New: visual indicator for each person

# SETUP
os.makedirs(output_dir, exist_ok=True)
model = YOLO(model_path)
coco_model = YOLO("yolov8n.pt")
cap = cv2.VideoCapture(input_video_path)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Video writer
save_output = True
if save_output:
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# CSV file initialization
with open(csv_output_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["Timestamp", "Object Type", "Object ID", "X1", "Y1", "X2", "Y2"])

# Color map for drawing boxes
color_map = {
    0: (151, 86, 4),     # box
    1: (176, 42, 176),   # product
    2: (0, 255, 0),      # person
}

id_counters = {0: 0, 1: 0, 2: 0}
tracked_objects = {0: {}, 1: {}, 2: {}}  # 0=box, 1=product, 2=person
associations = {}  # box_id: {person_id, product_ids}

# Helper functions
def get_center(box):
    x1, y1, x2, y2 = box
    return ((x1 + x2) // 2, (y1 + y2) // 2)

def euclidean_distance(p1, p2):
    return ((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2) ** 0.5

# MAIN LOOP
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    timestamp_sec = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
    timestamp_str = str(timedelta(seconds=timestamp_sec)).split('.')[0]

    cv2.putText(frame, f"Time: {timestamp_str}", (10, height - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    # Reset seen flags for all tracked objects
    for cls_id in tracked_objects:
        for obj in tracked_objects[cls_id].values():
            obj["seen"] = False

    # Detect with custom model
    results = model(frame)[0]
    boxes = results.boxes.xyxy.cpu().numpy()
    scores = results.boxes.conf.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy().astype(int)

    # Detect people with COCO pretrained model
    coco_results = coco_model(frame)[0]
    coco_boxes = coco_results.boxes.xyxy.cpu().numpy()
    coco_scores = coco_results.boxes.conf.cpu().numpy()
    coco_class_ids = coco_results.boxes.cls.cpu().numpy().astype(int)
    CONFIDENCE_THRESHOLD = 0.6  # Increased for better person filtering
    people_indices = (coco_class_ids == 0) & (coco_scores > CONFIDENCE_THRESHOLD)
    coco_boxes = coco_boxes[people_indices]
    coco_scores = coco_scores[people_indices]
    coco_class_ids = coco_class_ids[people_indices]

    # Append people detections as class 2 (person)
    for box, score in zip(coco_boxes, coco_scores):
        boxes = np.vstack([boxes, box])
        scores = np.append(scores, score)
    class_ids = np.append(class_ids, 2)

    detected_centers = {0: [], 1: [], 2: []}  # 0=box, 1=product, 2=person

    for box, score, cls_id in zip(boxes, scores, class_ids):
        if cls_id not in (0, 1, 2):
            continue

        x1, y1, x2, y2 = map(int, box)
        center = get_center((x1, y1, x2, y2))

        # Object tracking
        best_match_id = None
        min_distance = float('inf')
        for obj_id, obj_data in tracked_objects[cls_id].items():
            dist = euclidean_distance(center, obj_data["center"])
            if dist < DISTANCE_THRESHOLD and dist < min_distance:
                best_match_id = obj_id
                min_distance = dist

        if best_match_id is not None:
            tracked_objects[cls_id][best_match_id].update({
                "center": center,
                "last_seen": timestamp_sec,
                "seen": True
            })
            assigned_id = best_match_id
        else:
            assigned_id = id_counters[cls_id]
            tracked_objects[cls_id][assigned_id] = {
                "center": center,
                "first_seen": timestamp_sec,
                "last_seen": timestamp_sec,
                "seen": True
            }
            id_counters[cls_id] += 1

        # Draw bounding box and label
        color = color_map.get(cls_id, (255, 255, 255))
        detect_time_str = str(timedelta(seconds=tracked_objects[cls_id][assigned_id]["first_seen"])).split('.')[0]
        label_name = ["Box", "Product", "Person"][cls_id]
        label = f"{label_name} ID {assigned_id} ({detect_time_str})"
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.55, color, 2)

        # Draw radius circle around each person
        if cls_id == 2:
            cv2.circle(frame, center, PERSON_RADIUS, (0, 180, 255), 2)

        # Log detection to CSV
        with open(csv_output_path, mode='a', newline='') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow([timestamp_str, label_name, assigned_id, x1, y1, x2, y2])

        detected_centers[cls_id].append((center, assigned_id))

    # Draw associations
    for box_center, box_id in detected_centers[0]:
        cv2.circle(frame, box_center, ASSOCIATION_RADIUS, color_map[0], 2)

        nearest_person_id = None
        nearest_person_center = None
        nearest_person_dist = float('inf')
        for person_center, person_id in detected_centers[2]:
            dist = euclidean_distance(box_center, person_center)
            if dist <= ASSOCIATION_RADIUS and dist < nearest_person_dist:
                nearest_person_id = person_id
                nearest_person_center = person_center
                nearest_person_dist = dist

        if nearest_person_id is not None:
            cv2.line(frame, box_center, nearest_person_center, (255, 255, 0), 2)
            for prod_center, prod_id in detected_centers[1]:
                dist = euclidean_distance(nearest_person_center, prod_center)
                if dist <= ASSOCIATION_RADIUS:
                    cv2.line(frame, nearest_person_center, prod_center, (0, 255, 255), 2)

                    if box_id not in associations:
                        associations[box_id] = {"person_id": nearest_person_id, "products": set()}
                    associations[box_id]["products"].add(prod_id)
                    associations[box_id]["person_id"] = nearest_person_id

    # Cleanup disappeared objects
    current_time = timestamp_sec
    for cls_id in tracked_objects:
        to_remove = []
        for obj_id, obj in tracked_objects[cls_id].items():
            if not obj["seen"] and (current_time - obj["last_seen"]) > DISAPPEAR_TIME_THRESHOLD:
                to_remove.append(obj_id)
        for obj_id in to_remove:
            del tracked_objects[cls_id][obj_id]

    # Show and save frame
    cv2.imshow("YOLOv8 Product Detection", frame)
    if save_output:
        out.write(frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# CLEANUP
cap.release()
if save_output:
    out.release()
cv2.destroyAllWindows()



0: 480x640 (no detections), 95.5ms
Speed: 5.1ms preprocess, 95.5ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 9 persons, 86.5ms
Speed: 3.7ms preprocess, 86.5ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 85.5ms
Speed: 4.1ms preprocess, 85.5ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 9 persons, 89.8ms
Speed: 4.2ms preprocess, 89.8ms inference, 1.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 118.1ms
Speed: 16.9ms preprocess, 118.1ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 9 persons, 117.1ms
Speed: 4.2ms preprocess, 117.1ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 113.0ms
Speed: 4.3ms preprocess, 113.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 9 persons, 111.3ms
Speed: 3.9ms preprocess, 111.3ms inference, 1.5ms

## 🎯 Barcode Reading

In [1]:
import cv2
from pyzbar.pyzbar import decode
from datetime import datetime
import openpyxl
import os

# === File paths ===
box_file = "scanned_barcodes.xlsx"
product_file = "products.xlsx"

def init_excel(file_path):
    if not os.path.exists(file_path):
        wb = openpyxl.Workbook()
        ws = wb.active
        ws.append(["ID", "Barcode", "Timestamp"])
        wb.save(file_path)
    wb = openpyxl.load_workbook(file_path)
    ws = wb.active
    existing = {}
    next_id = 1
    for row in ws.iter_rows(min_row=2, values_only=True):
        id_, barcode, _ = row
        existing[barcode] = id_
        if id_ >= next_id:
            next_id = id_ + 1
    return wb, ws, existing, next_id

# === Initialize Excel files ===
box_wb, box_ws, box_scanned, box_next_id = init_excel(box_file)
prod_wb, prod_ws, prod_scanned, prod_next_id = init_excel(product_file)

# === Webcam Start ===
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")

print("Scanning... '3S' barcodes go to boxes file, others to products. Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    barcodes = decode(frame)
    for barcode in barcodes:
        x, y, w, h = barcode.rect
        barcode_data = barcode.data.decode('utf-8')
        barcode_type = barcode.type
        display_text = f'{barcode_type}: {barcode_data}'

        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(frame, display_text, (x, y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        if barcode_data.startswith("3S"):
            if barcode_data not in box_scanned:
                box_ws.append([box_next_id, barcode_data, timestamp])
                box_wb.save(box_file)
                print(f"📦 Box saved: ID={box_next_id}, {barcode_data}")
                box_scanned[barcode_data] = box_next_id
                box_next_id += 1
        else:
            if barcode_data not in prod_scanned:
                prod_ws.append([prod_next_id, barcode_data, timestamp])
                prod_wb.save(product_file)
                print(f"🧃 Product saved: ID={prod_next_id}, {barcode_data}")
                prod_scanned[barcode_data] = prod_next_id
                prod_next_id += 1

    cv2.imshow("Barcode Scanner", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Scanning... '3S' barcodes go to boxes file, others to products. Press 'q' to quit.


KeyboardInterrupt: 