## Setup libraries

In [2]:
import zipfile
import requests
import cv2
import matplotlib.pyplot as plt
import glob
import random
import os
import json
import time
import yaml
from pathlib import Path
from ultralytics import YOLO

## Definition paths

In [3]:
root_dir = "Tensorflow/workspace/datasets"
images_dir = os.path.join(root_dir, "images")
annotations_dir = os.path.join(root_dir, "annotations")
xml_dir = os.path.join(root_dir, "xml_dir")
model_dir = os.path.join(root_dir, "model_dir")
log_dir = os.path.join(root_dir, "log")
yolo_dir = os.path.join(root_dir, "yolo")

## Label images

In [None]:
!labelImg

## Test yolo

In [None]:
model = YOLO(yolo_dir + "/yolov8n.pt")
# results = model(["testYolo.png"]) 
# print(results)
# Process results list
# for result in results:
#     boxes = result.boxes  # Boxes object for bounding box outputs
#     masks = result.masks  # Masks object for segmentation masks outputs
#     keypoints = result.keypoints  # Keypoints object for pose outputs
#     probs = result.probs  # Probs object for classification outputs
#     obb = result.obb  # Oriented boxes object for OBB outputs
#     result.show()  # display to screen
#     result.save(filename="result.jpg")  # save to disk

# Load the YOLOv8 model
# model = YOLO("yolov8n.pt")

# Open the video file
# video_path = "path/to/your/video/file.mp4"image = cv2.imread("AnyPathToAnImage.jpeg")
# image = cv2.imread('testYolo.jpg')
# # print(cv2.getBuildInformation())
# cv2.imshow("Image",image)
# cv2.waitKey(0)



video_path = "oak-d.avi"
# video_path = "output.avi" 
# video_path = "traffic.mp4"
# video_path = "street.mp4"

print(video_path)

cap = cv2.VideoCapture(video_path)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLOv8 Inference", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(50) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()


# Train on custom data

## part 1: View custom.yaml

In [3]:
# settings.yaml(/home/avncalst/.config/Ultralytics/settings.yaml) defines the datasets_dir and runs_dir:
# datasets_dir: /home/avncalst/Dropbox/deeplearning/SSD/RealTimeObjectDetection/Tensorflow/workspace/datasets/yolo
# /home/avncalst/Dropbox/deeplearning/SSD/RealTimeObjectDetection/Tensorflow/workspace/datasets/yolo/runs

with open(yolo_dir +'/custom.yaml', 'r') as f:
    data = yaml.safe_load(f)
    # print(data)
    for item, doc in data.items():
        print(item, ":", doc)


train : /home/avncalst/Dropbox/deeplearning/SSD/RealTimeObjectDetection/Tensorflow/workspace/datasets/yolo/Training
val : /home/avncalst/Dropbox/deeplearning/SSD/RealTimeObjectDetection/Tensorflow/workspace/datasets/yolo/Validation
nc : 2
names : {0: 'apple', 1: 'pear'}


## Train on custom data

In [None]:
#params:
    #epochs=150
    #batch=16 (default),-1:automatically adjust batch size for approximately 60% CUDA memory
    #patience=0 (no early stopping)
    #imgsz=640 (default), 416 (increases speed)

# model = YOLO() # full custom 
# model.train(data=yolo_dir + "/yolo.yaml",epochs=150,batch=2) # results shown in yolo_dir/runs/detect/train
model.train(data=yolo_dir + "/custom.yaml",epochs=150,patience=0,batch=12) # results shown in yolo_dir/runs/detect/train, no early stopping

## Prediction

In [None]:
model_cust = YOLO(yolo_dir+"/yolo_cust.pt")
# model_cust.predict(source=yolo_dir+"/Validation/pear14.jpg")
results = model_cust([yolo_dir+"/Validation/apple12.jpg"]) 
print(results)
# Process results list
for result in results:
    boxes = result.boxes  # Boxes object for bounding box outputs
#     masks = result.masks  # Masks object for segmentation masks outputs
#     keypoints = result.keypoints  # Keypoints object for pose outputs
#     probs = result.probs  # Probs object for classification outputs
#     obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen
    result.save(filename="result.jpg")  # save to disk


## Part 2: View voc.yaml

In [3]:
# path in voc.yaml  is relative to datasets_dir defined in settings.yaml

with open("/home/avncalst/.local/lib/python3.10/site-packages/ultralytics/cfg/datasets/VOC.yaml", 'r') as f:
    data = yaml.safe_load(f)
    # print(data)
    for item, doc in data.items():
        print(item, ":", doc)

path : ../datasets/VOC
train : ['images/train2012', 'images/train2007', 'images/val2012', 'images/val2007']
val : ['images/test2007']
test : ['images/test2007']
names : {0: 'aeroplane', 1: 'bicycle', 2: 'bird', 3: 'boat', 4: 'bottle', 5: 'bus', 6: 'car', 7: 'cat', 8: 'chair', 9: 'cow', 10: 'diningtable', 11: 'dog', 12: 'horse', 13: 'motorbike', 14: 'person', 15: 'pottedplant', 16: 'sheep', 17: 'sofa', 18: 'train', 19: 'tvmonitor'}
download : import xml.etree.ElementTree as ET

from tqdm import tqdm
from ultralytics.utils.downloads import download
from pathlib import Path

def convert_label(path, lb_path, year, image_id):
    def convert_box(size, box):
        dw, dh = 1. / size[0], 1. / size[1]
        x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
        return x * dw, y * dh, w * dw, h * dh

    in_file = open(path / f'VOC{year}/Annotations/{image_id}.xml')
    out_file = open(lb_path, 'w')
    tree = ET.parse(in_file)
    ro

## Train on  VOC

In [None]:
model = YOLO(yolo_dir + "/yolov8n.pt")
model.train(data="/home/avncalst/.local/lib/python3.10/site-packages/ultralytics/cfg/datasets/VOC.yaml", epochs=150, patience=0,imgsz=416)

## Test inference yolo_voc (416)

training results are nomally saved in the folder "runs" at the same directory as the jupyter notebook

In [None]:
model = YOLO(yolo_dir + "/yolov8n_voc.pt")
# results = model(["testYolo.png"]) 
# print(results)
# Process results list
# for result in results:
#     boxes = result.boxes  # Boxes object for bounding box outputs
#     masks = result.masks  # Masks object for segmentation masks outputs
#     keypoints = result.keypoints  # Keypoints object for pose outputs
#     probs = result.probs  # Probs object for classification outputs
#     obb = result.obb  # Oriented boxes object for OBB outputs
#     result.show()  # display to screen
#     result.save(filename="result.jpg")  # save to disk

# Load the YOLOv8 model
# model = YOLO("yolov8n.pt")

# Open the video file
# video_path = "path/to/your/video/file.mp4"image = cv2.imread("AnyPathToAnImage.jpeg")
# image = cv2.imread('testYolo.jpg')
# # print(cv2.getBuildInformation())
# cv2.imshow("Image",image)
# cv2.waitKey(0)



video_path = "oak-d.avi"
# video_path = "output.avi" 
# video_path = "traffic.mp4"
# video_path = "street.mp4"

print(video_path)

cap = cv2.VideoCapture(video_path)

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Display the annotated frame
        cv2.imshow("YOLOv8 Inference", annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(50) & 0xFF == ord("q"):
            break
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()

## Convert to onnx

In [4]:
model_voc = YOLO(yolo_dir + "/yolov8n_voc.pt")
model_voc.export(format='onnx')

Ultralytics 8.3.49 🚀 Python-3.10.12 torch-2.5.1+cu124 CPU (13th Gen Intel Core(TM) i7-13700HX)
Model summary (fused): 168 layers, 3,009,548 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from 'Tensorflow/workspace/datasets/yolo/yolov8n_voc.pt' with input shape (1, 3, 416, 416) BCHW and output shape(s) (1, 24, 3549) (5.9 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.1 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.45...
[34m[1mONNX:[0m export success ✅ 0.7s, saved as 'Tensorflow/workspace/datasets/yolo/yolov8n_voc.onnx' (11.6 MB)

Export complete (0.9s)
Results saved to [1m/home/avncalst/Dropbox/deeplearning/SSD/RealTimeObjectDetection/Tensorflow/workspace/datasets/yolo[0m
Predict:         yolo predict task=detect model=Tensorflow/workspace/datasets/yolo/yolov8n_voc.onnx imgsz=416  
Validate:        yolo val task=detect model=Tensorflow/workspace/datasets/yolo/yolov8n_voc.onnx imgsz=416 data=/home/avncalst/.local/lib/python3.10/site-packages

'Tensorflow/workspace/datasets/yolo/yolov8n_voc.onnx'