In [1]:
# ! pip install ultralytics
# ! pip install fastprogress

https://arxiv.org/abs/1506.02640 (yolo paper) <br/>
https://github.com/ultralytics/ultralytics (official lib, yolov8) <br/>
https://docs.ultralytics.com/quickstart/ (official doc) <br/>
https://www.youtube.com/@Ultralytics (official youtube channel) <br/>

https://docs.ultralytics.com/modes/track/ (live object tracking)

In [11]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import time
import numpy as np
from IPython.display import Video, HTML

In [2]:
# load basic pre-trained detect model
model = YOLO('yolov8n.pt')

In [17]:
# input target video input
video_path = "./data/Amazing_Indoor_Koi_Pond.mp4"

In [35]:
# video capture is a helper function to get the frames from video file.
# returns generator object, we can efficiently loop through get the subsequent frames.
desired_fps = 500
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
original_fps = cap.get(cv2.CAP_PROP_FPS)
print('total frames: ', total_frames)
print('orignal fps: ', original_fps)

total frames:  500
orignal fps:  30.0


In [21]:
predicted_frames = []

# setup the progress bar
progress_bar = tqdm(total_frames, desc="prediction", unit=" frames")


while cap.isOpened():
    res, frame = cap.read()

    if not res:
        print('done!')
        break
    
 
    # persist: if True current frame output will be passed to the next frame prediction
    predictions = model.track(frame, persist=True, verbose=False)

    # plot the predictions
    # cv2.imshow("YOLOv8 Tracking", predictions[0].plot())
    
    # store each frames
    predicted_frames.append(predictions[0].plot())
    
    # updte the progress bar
    progress_bar.update(1)
    
    # break the loop by key press.
    # cv2.waitKey(1) wait for 1 milli second.
    # 0xFF: & 0xFF: This is a bitwise AND operation with the hexadecimal value 0xFF (which is 255 in decimal).
    # this operation is used to extract the least significant 8 bits (or the lowest byte) of the ASCII value.
    # this is necessary because the return value of cv2.waitKey() can be larger than 255,
    # and we are interested only in the lower 8 bits, which correspond to the ASCII value
    # ord(.): integer representation of unicode character of the given character.
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# close the progress bar
progress_bar.close()

# destroy all the memory related to VideoCapture object
cap.release()

# any open windows like image/video display
cv2.destroyAllWindows()


prediction: 500frames [01:14,  6.70frames/s]

done!





In [38]:
video_path = "./output/object_tracking_v1.mp4"

frames = predicted_frames
if not frames:
    print("Error: No frames to create video.")
else:
    height, width, layers = frames[0].shape

    # codec with videowriter
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    speed = 1 # increase for make it fast
    fps = speed*original_fps # for smoothness
    video_writer = cv2.VideoWriter(video_path, fourcc, fps, (width, height))

    # write each frame
    for frame in frames:
        video_writer.write(frame)

    # read to the disk
    video_writer.release()

In [39]:
# html video embedding
html_video = f"""
<video width="640" height="480" controls>
  <source src="{video_path}" type="video/mp4">
  Your browser does not support the video tag.
</video>
"""

# display the hmtl 
HTML(html_video)