# YOLOv5

## Model

In [None]:
import torch
import os
import numpy as np
import cv2 as cv
import time

model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, trust_repo=True)

In [None]:
def put_bboxs(img, result, threshold):
    object_ids = []
    id_counter = 1
    for bbox, label, conf in zip(result.xyxy[0][:,:4], result.pred[0][:,5], result.pred[0][:,4]):
        if conf >= threshold:
            x1, y1, x2, y2 = map(int, bbox[:4])
            class_name = result.names[int(label)]
            
            cv.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 7)
            
            label_text = str(id_counter) + "-" + str(class_name) + ": {:.2f}".format(conf)
            cv.putText(img, label_text, (x1, y1 - 10), cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 6)
            
            object_ids.append(id_counter)
            id_counter = id_counter + 1
        else:
            # -1 id means model is not confident enough
            object_ids.append(-1)

    return object_ids

In [None]:
def calculate_angles(img, result):
    # list of objects center pixels offsets according to images center point
    # negative: left/up
    # positive: right/down
    # X:row axis, determines how high or low an object is
    # Y:column axis, determines how left or right an object is
    im_s = img.shape

    # finding the angles of object according to camera, most left(y=-1):-50 degrees, most right(y=+1):+50 degrees
    object_angles = [(
                    round((((c[1] + c[3])/2 - im_s[0]/2) / (im_s[0]/2) * 5).item())*10,
                    round((((c[0] + c[2])/2 - im_s[1]/2) / (im_s[1]/2) * 5).item())*10)
                    for c in result.xyxy[0][:,:4]]

    return object_angles

In [None]:
def display_img(img):
    cv.namedWindow('result', cv.WINDOW_NORMAL)
    cv.imshow('result', img)
    cv.waitKey(0)
    cv.destroyAllWindows()

## Local Images

In [None]:
folder_path = "./YOLO_test_images"
images = os.listdir(folder_path)

for img_name in images:
    img = cv.imread(os.path.join(folder_path, img_name))
    
    result = model(img)

    object_ids = put_bboxs(img, result, 0.60)
    object_angles = calculate_angles(img, result)
    object_labels = [result.names[int(l)] for l in result.pred[0][:,5]]
    
    display_img(img)

In [None]:
print("ID\tCLASS\tLEFT-RIGHT ANGLE")
for id, l, d in zip(object_ids, object_labels, object_angles):
    print(id,"\t",l,"   ","\t",d)

## Local Video

In [None]:
video_path = './YOLO_test_videos/video1.webm'
cap = cv.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

fps = 5
frame_interval = 1 / fps

while True:
    start_time = time.time()
    ret, img = cap.read()
    if not ret:
        break

    result = model(img)

    object_ids = put_bboxs(img, result, 0.60)
    object_angles = calculate_angles(img, result)
    object_labels = [result.names[int(l)] for l in result.pred[0][:,5]]

    cv.namedWindow('result', cv.WINDOW_NORMAL)
    cv.imshow('result', img)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

    elapsed_time = time.time() - start_time
    time_to_wait = max(0, frame_interval - elapsed_time)
    time.sleep(time_to_wait)

cap.release()
cv.destroyAllWindows()

## Gstreamer

In [None]:
video_path = './YOLO_test_videos/video1.mp4'
pipeline = f'filesrc location={video_path} ! matroskademux ! vp8dec ! videoconvert ! appsink'

cap = cv.VideoCapture(pipeline, cv.CAP_GSTREAMER)

if not cap.isOpened():
    print("Error: Could not open video file.")
    print(8/0)

frame_rate = 5
frame_interval = 1 / frame_rate

while True:
    start_time = time.time()
    ret, img = cap.read()
    if not ret:
        break

    result = model(img)

    object_ids = put_bboxs(img, result, 0.60)
    object_angles = calculate_angles(img, result)
    object_labels = [result.names[int(l)] for l in result.pred[0][:,5]]
    
    cv.namedWindow('result', cv.WINDOW_NORMAL)
    cv.imshow('result', img)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

    elapsed_time = time.time() - start_time
    time_to_wait = max(0, frame_interval - elapsed_time)
    time.sleep(time_to_wait)

cap.release()
cv.destroyAllWindows()


# YOLOv8

## Model

In [None]:
# Load a model
# model = YOLO("yolov8n.yaml")  # build a new model from scratch
# model = YOLO("yolov8s-seg.pt")  # load a pretrained model (recommended for training)

# Use the model
# model.train(data="coco128.yaml", epochs=3)  # train the model
# metrics = model.val()  # evaluate model performance on the validation set
# results = model("https://ultralytics.com/images/bus.jpg")  # predict on an image
# path = model.export(format="onnx")  # export the model to ONNX format

In [None]:
from ultralytics import YOLO
import cv2 as cv
import torch
import os
import numpy as np
import time

model = YOLO("yolov8s-seg.pt") # small YOLOv8 for segmentation

In [None]:
# result[0].boxes.data = bbox data in format:
"""
tensor([[5.1427e+01, 3.9802e+02, 2.4646e+02, 9.0317e+02, 9.0121e-01, 0.0000e+00],
        [6.6840e+02, 3.9336e+02, 8.0955e+02, 8.7632e+02, 8.8721e-01, 0.0000e+00],
        [2.2226e+02, 4.0550e+02, 3.4572e+02, 8.6009e+02, 8.8701e-01, 0.0000e+00],
        [2.1634e+01, 2.2947e+02, 8.0414e+02, 7.4868e+02, 8.7066e-01, 5.0000e+00],
        [3.1326e-01, 5.4515e+02, 7.6508e+01, 8.7436e+02, 7.3195e-01, 0.0000e+00],
        [2.8495e+02, 4.8044e+02, 3.0199e+02, 5.2454e+02, 3.9280e-01, 2.7000e+01],
        [6.6887e+02, 8.2729e+02, 8.0940e+02, 8.8368e+02, 3.0749e-01, 3.6000e+01]])
"""

## result[0].masks.data = segmentation data in format:
"""
tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

"""

In [None]:
def draw_bbox_id_text_confidance(image, result, draw_rectangle = True, draw_label_text = True, threshold= 0.8):
    object_ids = []
    id_counter = 1

    for [a, b, c, d, conf, pred_id] in result[0].boxes.data:
        if conf.item() > threshold:
            lu, ru, ld, rd = int(a.item()), int(b.item()), int(c.item()), int(d.item())

            if draw_rectangle:
                cv.rectangle(image, (lu, ru), (ld, rd), (0, 255, 0), 7)
            if draw_label_text:
                label_text = str(id_counter) + "-" + str(result[0].names[pred_id.item()]) + ": {:.2f}".format(conf.item())
                cv.putText(image, label_text, (lu, ru - 10), cv.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 6)
            
            object_ids.append(id_counter)
            id_counter = id_counter + 1
        else:
            # -1 id means model is not confident enough
            object_ids.append(-1)

    return object_ids

In [None]:
def calculate_angles(image, result):
    # list of objects center pixels offsets according to images center point
    # negative: left/up
    # positive: right/down
    # X:row axis, determines how high or low an object is
    # Y:column axis, determines how left or right an object is
    im_s = image.shape

    # finding the angles of object according to camera, most left(y=-1):-50 degrees, most right(y=+1):+50 degrees
    object_angles = [(
                    round((((c[1] + c[3])/2 - im_s[0]/2) / (im_s[0]/2) * 5).item())*10,
                    round((((c[0] + c[2])/2 - im_s[1]/2) / (im_s[1]/2) * 5).item())*10)
                    for c in result[0].boxes.data[:,:4]]

    return object_angles

In [None]:
def display_img(img):
    cv.namedWindow('result', cv.WINDOW_NORMAL)
    cv.imshow('result', img)
    cv.waitKey(0)
    cv.destroyAllWindows()

In [None]:
image = cv.imread("./bus.jpg")
result = model(image)

## Local Images

In [None]:
folder_path = "./YOLO_test_images"
images = os.listdir(folder_path)

threshold = 0.8
for img_name in images:
    img = cv.imread(os.path.join(folder_path, img_name))
    
    result = model(img)

    wanted_objects = result[0].boxes.data[:,4] > threshold
    annotated_frame = result[0][wanted_objects].plot()
    
    """ Calculating angle, label etc.
    object_ids = draw_bbox_id_text_confidance(img, result, threshold=0.8)
    object_angles = calculate_angles(img, result)
    object_labels = [result[0].names[x.item()] for x in result[0].boxes.data[:, 5]]
    """

    display_img(annotated_frame)

In [None]:
folder_path = "./YOLO_test_images"
images = os.listdir(folder_path)

for img_name in images:
    img = cv.imread(os.path.join(folder_path, img_name))
    
    result = model(img)

    object_ids = draw_bbox_id_text_confidance(img, result, threshold=0.8)
    object_angles = calculate_angles(img, result)
    object_labels = [result[0].names[x.item()] for x in result[0].boxes.data[:, 5]]
    
    display_img(img)

In [None]:
print("ID\tCLASS\t\tUP-DOWN & LEFT-RIGHT ANGLE")
for id, l, d in zip(object_ids, object_labels, object_angles):
    print(id,"\t",l,"   ","\t",d)

## Local Video

In [None]:
video_path = './YOLO_test_videos/video1.webm'
cap = cv.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

fps = 5
threshold = 0.8
frame_interval = 1 / fps

while True:
    start_time = time.time()
    ret, img = cap.read()
    if not ret:
        break

    result = model(img)

    wanted_objects = result[0].boxes.data[:,4] > threshold
    annotated_frame = result[0][wanted_objects].plot()
    
    """ Calculating angle, label etc.
    object_ids = draw_bbox_id_text_confidance(img, result, threshold=0.8)
    object_angles = calculate_angles(img, result)
    object_labels = [result[0].names[x.item()] for x in result[0].boxes.data[:, 5]]
    """    

    cv.namedWindow('result', cv.WINDOW_NORMAL)
    cv.imshow('result', annotated_frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

    elapsed_time = time.time() - start_time
    time_to_wait = max(0, frame_interval - elapsed_time)
    time.sleep(time_to_wait)

cap.release()
cv.destroyAllWindows()