# Install necessary package

In [1]:
!pip install ultralytics opencv-python

Collecting ultralytics
  Downloading ultralytics-8.0.235-py3-none-any.whl (677 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m677.8/677.8 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.0.235


# **Initial demo**

In [None]:
# @title
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import colors, Annotator

model = YOLO("yolov8n.pt")
names = model.model.names
cap = cv2.VideoCapture("horse.mp4")

out = cv2.VideoWriter('horse_object_detection.avi', cv2.VideoWriter_fourcc(*'MJPG'),
                      30, (int(cap.get(3)), int(cap.get(4))))

center_point = (-10, int(cap.get(4)))

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = model.predict(im0)
    boxes = results[0].boxes.xyxy.cpu()
    clss = results[0].boxes.cls.cpu().tolist()

    annotator = Annotator(im0, line_width=2)

    for box, cls in zip(boxes, clss):
        annotator.box_label(box, label=names[int(cls)], color=colors(int(cls)))
        annotator.visioneye(box, center_point,color=(255,0,0),pin_color=(255,0,0))

    out.write(im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()

# **General Vision Eye object mapping**

In [6]:
# @title
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import colors, Annotator

model = YOLO("yolov8n.pt")
names = model.model.names
cap = cv2.VideoCapture("lion.mp4")

out = cv2.VideoWriter('visioneye-pinpoint.avi', cv2.VideoWriter_fourcc(*'MJPG'),
                      30, (int(cap.get(3)), int(cap.get(4))))

center_point = (-10, int(cap.get(4)))

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = model.predict(im0)
    boxes = results[0].boxes.xyxy.cpu()
    clss = results[0].boxes.cls.cpu().tolist()

    annotator = Annotator(im0, line_width=2)

    for box, cls in zip(boxes, clss):
        annotator.box_label(box, label=names[int(cls)], color=colors(int(cls)))
        annotator.visioneye(box, center_point)

    out.write(im0)
    # cv2.imshow("visioneye-pinpoint", im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

0: 384x640 5 horses, 1 cow, 9.1ms
Speed: 1.1ms preprocess, 9.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.2ms
Speed: 2.2ms preprocess, 8.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.2ms
Speed: 1.7ms preprocess, 8.2ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.6ms
Speed: 1.6ms preprocess, 8.6ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 7.9ms
Speed: 1.1ms preprocess, 7.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 9.1ms
Speed: 2.9ms preprocess, 9.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.7ms
Speed: 1.0ms preprocess, 8.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 

# **Now my idea play the role: Auto label part**

1. Detect Horse using opensource yolov8 object mapping
2. I am plan to develop automated annotation
3. crop the images and xml file generated
4. Xml file contain croping object image size (Height and weight)

In [19]:
#annotation and store one folder
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import colors, Annotator
import os
import xml.etree.ElementTree as ET

model = YOLO("yolov8n.pt")
names = model.model.names
cap = cv2.VideoCapture("horse.mp4")

out = cv2.VideoWriter('visioneye-pinpoint_anno.avi', cv2.VideoWriter_fourcc(*'MJPG'),
                      30, (int(cap.get(3)), int(cap.get(4))))

center_point = (-10, int(cap.get(4)))
output_folder = "output_folder_annotation"
os.makedirs(output_folder, exist_ok=True)

object_count = 0

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = model.predict(im0)
    boxes = results[0].boxes.xyxy.cpu()
    clss = results[0].boxes.cls.cpu().tolist()

    annotator = Annotator(im0, line_width=2)

    for box, cls in zip(boxes, clss):
        annotator.box_label(box, label=names[int(cls)], color=colors(int(cls)))
        annotator.visioneye(box, center_point)

        if names[int(cls)] == "horse":
            # Crop and save the detected horse
            x, y, w, h = [int(val) for val in box]
            horse_crop = im0[y:h, x:w]
            cv2.imwrite(os.path.join(output_folder, f"horse_{object_count}.jpg"), horse_crop)

            # Generate XML file for annotation
            root = ET.Element("annotation")
            ET.SubElement(root, "filename").text = f"horse_{object_count}.jpg"
            size = ET.SubElement(root, "size")
            ET.SubElement(size, "width").text = str(w - x)
            ET.SubElement(size, "height").text = str(h - y)
            ET.SubElement(size, "depth").text = str(im0.shape[2])
            object_elem = ET.SubElement(root, "object")
            ET.SubElement(object_elem, "name").text = "horse"
            bndbox = ET.SubElement(object_elem, "bndbox")
            ET.SubElement(bndbox, "xmin").text = str(x)
            ET.SubElement(bndbox, "ymin").text = str(y)
            ET.SubElement(bndbox, "xmax").text = str(w)
            ET.SubElement(bndbox, "ymax").text = str(h)

            tree = ET.ElementTree(root)
            tree.write(os.path.join(output_folder, f"horse_{object_count}.xml"))

            object_count += 1

    out.write(im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()

# Store object counts in a text file
with open(os.path.join(output_folder, "object_counts.txt"), "w") as count_file:
    count_file.write(f"Total horse objects: {object_count}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 1.0ms preprocess, 9.9ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 horses, 1 cow, 10.2ms
Speed: 1.2ms preprocess, 10.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 9.0ms
Speed: 1.3ms preprocess, 9.0ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 11.0ms
Speed: 1.1ms preprocess, 11.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 11.1ms
Speed: 1.1ms preprocess, 11.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.2ms
Speed: 1.1ms preprocess, 8.2ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 10.2ms
Speed: 1.1ms preprocess, 10.2ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.2ms
Spee

# Autoannotation with detection count

In [20]:
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import colors, Annotator
import os
import xml.etree.ElementTree as ET

model = YOLO("yolov8n.pt")
names = model.model.names
cap = cv2.VideoCapture("horse.mp4")

out = cv2.VideoWriter('visioneye-pinpoint_anno_count.avi', cv2.VideoWriter_fourcc(*'MJPG'),
                      30, (int(cap.get(3)), int(cap.get(4))))

center_point = (-10, int(cap.get(4)))
output_folder = "output_folder_annotation"
os.makedirs(output_folder, exist_ok=True)

object_count = 0

while True:
    ret, im0 = cap.read()
    if not ret:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = model.predict(im0)
    boxes = results[0].boxes.xyxy.cpu()
    clss = results[0].boxes.cls.cpu().tolist()

    annotator = Annotator(im0, line_width=2)

    for box, cls in zip(boxes, clss):
        annotator.box_label(box, label=names[int(cls)], color=colors(int(cls)))
        annotator.visioneye(box, center_point)

        if names[int(cls)] == "horse":
            # Crop and save the detected horse
            x, y, w, h = [int(val) for val in box]
            horse_crop = im0[y:h, x:w]
            cv2.imwrite(os.path.join(output_folder, f"horse_{object_count}.jpg"), horse_crop)

            # Generate XML file for annotation
            root = ET.Element("annotation")
            ET.SubElement(root, "filename").text = f"horse_{object_count}.jpg"
            size = ET.SubElement(root, "size")
            ET.SubElement(size, "width").text = str(w - x)
            ET.SubElement(size, "height").text = str(h - y)
            ET.SubElement(size, "depth").text = str(im0.shape[2])
            object_elem = ET.SubElement(root, "object")
            ET.SubElement(object_elem, "name").text = "horse"
            bndbox = ET.SubElement(object_elem, "bndbox")
            ET.SubElement(bndbox, "xmin").text = str(x)
            ET.SubElement(bndbox, "ymin").text = str(y)
            ET.SubElement(bndbox, "xmax").text = str(w)
            ET.SubElement(bndbox, "ymax").text = str(h)

            tree = ET.ElementTree(root)
            tree.write(os.path.join(output_folder, f"horse_{object_count}.xml"))

            object_count += 1

    # Display horse count on the video output
    cv2.putText(im0, f'Horse Count: {object_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    out.write(im0)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

out.release()
cap.release()
cv2.destroyAllWindows()

# Store object counts in a text file
with open(os.path.join(output_folder, "object_counts.txt"), "w") as count_file:
    count_file.write(f"Total horse objects: {object_count}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Speed: 1.1ms preprocess, 12.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 horses, 1 cow, 10.1ms
Speed: 1.4ms preprocess, 10.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.7ms
Speed: 1.2ms preprocess, 8.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 8.8ms
Speed: 1.1ms preprocess, 8.8ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 9.3ms
Speed: 1.0ms preprocess, 9.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 14.5ms
Speed: 1.1ms preprocess, 14.5ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 9.8ms
Speed: 1.2ms preprocess, 9.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 horses, 2 cows, 15.0ms
Speed: