## VisionEye Mapping with YOLO v8 nano

In [None]:
###############
# PIP Install #
###############

!pip install ultralytics

In [3]:
###########
# Imports #
###########

import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import colors, Annotator
import math

### Vision Eye Pinpoint code

In [39]:
######################
# Parameters & model #
######################


## Initialize segmentation model ##
model = YOLO("yolov8n.pt")

## Create VideoCapture object ##
cap = cv2.VideoCapture("/home/valerio/code/ValerioCann/Modèles/Data/Videos/People_walking_0.mp4")

## Retrieve height, widht & frame per second ##
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

## Create VideoWriter object ##
out = cv2.VideoWriter('visioneye-pinpoint.avi', cv2.VideoWriter_fourcc(*'MJPG'), fps, (w, h))

## Define the origin point ##
center_point = (-10, h)


##########################
# Tracking & Annotations #
##########################


while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    ## Instantiates annotator ##
    annotator = Annotator(im0, line_width=2)

    ## Track objects ##
    results = model.track(im0, persist=True)
    boxes = results[0].boxes.xyxy.cpu()

    if results[0].boxes.id is not None:
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for box, track_id in zip(boxes, track_ids):
            annotator.box_label(box, label=str(track_id), color=colors(int(track_id)))
            annotator.visioneye(box, center_point)
            
    ## Adds a new frame to the output video ##
    out.write(im0)
    
    ## Displays the frame ##
    cv2.imshow("visioneye-pinpoint", im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()


0: 384x640 13 persons, 2 birds, 70.7ms
Speed: 1.5ms preprocess, 70.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 2 birds, 111.3ms
Speed: 1.7ms preprocess, 111.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 persons, 2 birds, 56.8ms
Speed: 1.5ms preprocess, 56.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 persons, 2 birds, 52.6ms
Speed: 1.3ms preprocess, 52.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 2 birds, 55.9ms
Speed: 1.4ms preprocess, 55.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 2 birds, 50.2ms
Speed: 1.3ms preprocess, 50.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 2 birds, 62.2ms
Speed: 2.2ms preprocess, 62.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 2 birds, 52.9ms


0: 384x640 27 persons, 1 bird, 1 dog, 44.5ms
Speed: 1.7ms preprocess, 44.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 28 persons, 1 bird, 46.3ms
Speed: 1.8ms preprocess, 46.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 29 persons, 45.8ms
Speed: 1.9ms preprocess, 45.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 30 persons, 47.7ms
Speed: 1.4ms preprocess, 47.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 29 persons, 48.7ms
Speed: 2.9ms preprocess, 48.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 29 persons, 51.8ms
Speed: 1.5ms preprocess, 51.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 28 persons, 1 bird, 49.1ms
Speed: 1.3ms preprocess, 49.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 28 persons, 1 bird, 1 dog, 38.8ms
Speed: 1.2ms preprocess, 38


0: 384x640 25 persons, 1 bird, 51.1ms
Speed: 1.3ms preprocess, 51.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 51.2ms
Speed: 1.3ms preprocess, 51.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 45.4ms
Speed: 1.3ms preprocess, 45.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 27 persons, 41.9ms
Speed: 1.7ms preprocess, 41.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 28 persons, 1 dog, 48.8ms
Speed: 1.3ms preprocess, 48.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 27 persons, 1 bird, 46.8ms
Speed: 1.6ms preprocess, 46.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 1 bird, 50.3ms
Speed: 1.4ms preprocess, 50.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 1 bird, 44.0ms
Speed: 1.4ms preprocess, 44.0ms in


0: 384x640 26 persons, 45.9ms
Speed: 1.3ms preprocess, 45.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 44.9ms
Speed: 1.3ms preprocess, 44.9ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 44.3ms
Speed: 1.6ms preprocess, 44.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 38.8ms
Speed: 1.3ms preprocess, 38.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 50.3ms
Speed: 1.3ms preprocess, 50.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 45.5ms
Speed: 1.5ms preprocess, 45.5ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 43.3ms
Speed: 1.4ms preprocess, 43.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 27 persons, 59.7ms
Speed: 2.0ms preprocess, 59.7ms inference, 0.8ms postprocess per image at


0: 384x640 21 persons, 46.6ms
Speed: 1.4ms preprocess, 46.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 1 motorcycle, 1 bird, 1 dog, 46.4ms
Speed: 1.2ms preprocess, 46.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 1 motorcycle, 1 bird, 48.9ms
Speed: 1.7ms preprocess, 48.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 19 persons, 1 bird, 46.0ms
Speed: 1.3ms preprocess, 46.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 44.5ms
Speed: 1.8ms preprocess, 44.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 44.2ms
Speed: 1.3ms preprocess, 44.2ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 20 persons, 48.4ms
Speed: 1.3ms preprocess, 48.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 51.2ms
Speed: 1.6ms p


0: 384x640 21 persons, 44.3ms
Speed: 1.2ms preprocess, 44.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 37.8ms
Speed: 1.4ms preprocess, 37.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 46.4ms
Speed: 1.3ms preprocess, 46.4ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 47.3ms
Speed: 1.3ms preprocess, 47.3ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 43.2ms
Speed: 1.4ms preprocess, 43.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 46.5ms
Speed: 1.7ms preprocess, 46.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 42.0ms
Speed: 2.0ms preprocess, 42.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 37.7ms
Speed: 1.8ms preprocess, 37.7ms inference, 0.8ms postprocess per image at

### Vision Eye Distance Calculation code

In [35]:
def adjust_pixel_per_meter(y, h, base_ppm=150):
    """
    Adjusts the pixel_per_meter factor based on the vertical position y in the image.
         :param y: vertical position of the object in the image
         :param h: height of the image in pixels
         :param base_ppm: base pixel_per_meter at camera height
         :return: pixel_per_meter adjusted
    """
    # Define your fitting function here, for example a simple linear one
    return base_ppm * (1 + (y / h))

In [38]:
######################
# Parameters & model #
######################


## Initialize segmentation model ##
model = YOLO("yolov8s.pt")

## Create VideoCapture object ##
cap = cv2.VideoCapture("/home/valerio/code/ValerioCann/Modèles/Data/Videos/People_walking_0.mp4")

## Retrieve height, widht & frame per second ##
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))

## Create VideoWriter object ##
out = cv2.VideoWriter('visioneye-distance-calculation.avi', cv2.VideoWriter_fourcc(*'MJPG'), fps, (w, h))

## Define the origin point ##
center_point = (0, h)

pixel_per_meter = 50

txt_color, txt_background, bbox_clr = ((0, 0, 0), (255, 255, 255), (255, 0, 255))


#######################################
# Tracking, Annotations & Calculation #
#######################################


while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    ## Instantiates annotator ##
    annotator = Annotator(im0, line_width=2)

    ## Track objects ##
    results = model.track(im0, persist=True)
    boxes = results[0].boxes.xyxy.cpu()

    if results[0].boxes.id is not None:
        track_ids = results[0].boxes.id.int().cpu().tolist()

        for box, track_id in zip(boxes, track_ids):
            annotator.box_label(box, label=str(track_id), color=bbox_clr)
            annotator.visioneye(box, center_point)

            # Bounding box centroid 
            x1, y1 = int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)    

            # Adjust pixel_per_meter according to the y height of the bounding box
            adjusted_ppm = adjust_pixel_per_meter(y1, h, pixel_per_meter)
            distance = (math.sqrt((x1 - center_point[0]) ** 2 + (y1 - center_point[1]) ** 2)) / adjusted_ppm
        
    
###################
# Text parameters #
###################


            ## Text size calculation ##
            text = f"Distance: {distance:.2f} m"
            (text_width, text_height), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 1)
            text_offset_x = 10
            text_offset_y = text_height + baseline + 5  # added baseline height and some space

            ## Calculation of the coordinates of the background rectangle ##
            start_point = (x1 + text_offset_x, y1 - text_offset_y)
            end_point = (x1 + text_offset_x + text_width, y1)

            ## Draw rectangle ##
            cv2.rectangle(im0, start_point, end_point, txt_background, cv2.FILLED)

            ## Adjusted text position to fit inside rectangle ##
            cv2.putText(im0, text, (x1 + text_offset_x, y1 - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.8, txt_color, 1)


    out.write(im0)
    cv2.imshow("visioneye-distance-calculation", im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()


0: 384x640 16 persons, 151.3ms
Speed: 1.6ms preprocess, 151.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 171.4ms
Speed: 1.9ms preprocess, 171.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 138.5ms
Speed: 1.6ms preprocess, 138.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 16 persons, 135.4ms
Speed: 2.4ms preprocess, 135.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 17 persons, 140.6ms
Speed: 1.8ms preprocess, 140.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 145.9ms
Speed: 1.7ms preprocess, 145.9ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 147.9ms
Speed: 2.1ms preprocess, 147.9ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 persons, 141.4ms
Speed: 2.1ms preprocess, 141.4ms inference, 0.9ms postproc


0: 384x640 27 persons, 142.4ms
Speed: 2.5ms preprocess, 142.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 27 persons, 135.9ms
Speed: 1.9ms preprocess, 135.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 141.4ms
Speed: 3.8ms preprocess, 141.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 144.7ms
Speed: 1.5ms preprocess, 144.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 142.9ms
Speed: 1.9ms preprocess, 142.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 142.2ms
Speed: 1.5ms preprocess, 142.2ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 143.1ms
Speed: 1.4ms preprocess, 143.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 139.3ms
Speed: 1.7ms preprocess, 139.3ms inference, 0.9ms postproc


0: 384x640 26 persons, 134.1ms
Speed: 1.8ms preprocess, 134.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 138.1ms
Speed: 1.7ms preprocess, 138.1ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 138.1ms
Speed: 1.6ms preprocess, 138.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 189.7ms
Speed: 2.0ms preprocess, 189.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 1 dog, 147.0ms
Speed: 1.6ms preprocess, 147.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 1 dog, 133.3ms
Speed: 1.9ms preprocess, 133.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 124.6ms
Speed: 1.6ms preprocess, 124.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 139.9ms
Speed: 1.6ms preprocess, 139.9ms inference, 

Speed: 2.0ms preprocess, 124.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 1 handbag, 140.5ms
Speed: 2.5ms preprocess, 140.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 125.0ms
Speed: 2.0ms preprocess, 125.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 140.9ms
Speed: 2.2ms preprocess, 140.9ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 137.1ms
Speed: 1.3ms preprocess, 137.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 141.0ms
Speed: 1.8ms preprocess, 141.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 140.7ms
Speed: 1.6ms preprocess, 140.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 142.4ms
Speed: 2.4ms preprocess, 142.4ms inference, 0.9ms postprocess per image at shap


0: 384x640 23 persons, 1 handbag, 143.6ms
Speed: 1.9ms preprocess, 143.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 23 persons, 1 handbag, 140.6ms
Speed: 1.6ms preprocess, 140.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 21 persons, 1 handbag, 139.8ms
Speed: 2.4ms preprocess, 139.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 1 handbag, 135.6ms
Speed: 2.5ms preprocess, 135.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 1 handbag, 140.8ms
Speed: 1.7ms preprocess, 140.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 1 handbag, 153.8ms
Speed: 6.7ms preprocess, 153.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 persons, 1 handbag, 139.6ms
Speed: 1.7ms preprocess, 139.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2

0: 384x640 27 persons, 140.8ms
Speed: 1.4ms preprocess, 140.8ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 26 persons, 143.5ms
Speed: 3.4ms preprocess, 143.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 145.5ms
Speed: 2.1ms preprocess, 145.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 139.7ms
Speed: 1.8ms preprocess, 139.7ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 136.1ms
Speed: 1.5ms preprocess, 136.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 24 persons, 117.0ms
Speed: 1.7ms preprocess, 117.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 121.5ms
Speed: 1.5ms preprocess, 121.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 25 persons, 139.2ms
Speed: 2.2ms preprocess, 139.2ms inference, 1.0ms postproce