<a href="https://colab.research.google.com/github/antoronson/Vision-Based-Tracking/blob/dev/Vision_Based_Tracking/Object_Tracking_From_Video_Stream.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics
from ultralytics import YOLO
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import shutil
import imageio



In [None]:
from IPython.display import HTML

from base64 import b64encode

def play_mp4(file_path):
  mp4 = open(file_path, 'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

  # Add "return" here!
  return HTML(f"""
  <video width="640" height="480" controls>
      <source src="{data_url}" type="video/mp4">
  </video>
  """)

In [None]:
model = YOLO('yolov8n.pt')

dir = "/content/drive/MyDrive/datasets/yolo_video"

input_file = "input_file.mp4"

output_file = "output_file.mp4"

out_gif_file = "output_gif_file.gif"

input_path = os.path.join(dir, input_file)

output_path = os.path.join(dir, output_file)
output_gif_path = os.path.join(dir, out_gif_file)


#play_mp4(input_path)

In [None]:
from google.colab.patches import cv2_imshow
from collections import defaultdict

cap = cv2.VideoCapture(input_path)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
print (f"Width of video: {width}")
print (f"Height of video: {height}")
print (f"FPS of video: {fps}")

#fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#out_write = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
out_write = None
frame_count = 0
gif_frames = []
tracks = defaultdict(list)

while cap.isOpened():
  ret, frame = cap.read()
  if not ret:
    break
  results = model.track(frame, persist=True, classes=[2], conf=0.4)
  frame_count += 1
  ########################
  # Recording trajectory
  ########################
  for box in results[0].boxes:
    if box.id is None:
      continue
    track_id = int(box.id)
    x1,x2,y1,y2 = box.xyxy[0].tolist()
    cx = (x1+x2)/2
    cy = (y1+y2)/2
    tracks[track_id].append({
        "frame_id": frame_count,
        "pos_x": cx,
        "pos_y":cy
    })

  marked_frame = results[0].plot()

  print(f"Processing the frame {frame_count}")
  #######################
  # Write to Mp4 file
  #######################
  if out_write is None:
    h,w,_ = marked_frame.shape
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_write = cv2.VideoWriter(output_path, fourcc, fps, (w, h))

  out_write.write(marked_frame)

  #######################
  # Write to Gif file
  #######################
  marked_frame_rgb = cv2.cvtColor(marked_frame, cv2.COLOR_BGR2RGB)
  small_frame = cv2.resize(marked_frame_rgb, (640, 360))
  out_write.write(marked_frame)
  #if frame_count%5 == 0: Trying to write all gif frames (greedy)
  gif_frames.append(small_frame)

cap.release()
cv2.destroyAllWindows()


# Converting to Gif

imageio.mimsave(output_gif_path, gif_frames, fps=5)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))

for track_id, points in tracks.items():
    if len(points) < 2:
        continue  # skip very short tracks

    xs = [p["pos_x"] for p in points]
    ys = [p["pos_y"] for p in points]

    plt.plot(xs, ys, marker='o', linewidth=2, label=f"car_{track_id}")

plt.xlabel("X position (pixels)")
plt.ylabel("Y position (pixels)")
plt.title("Object trajectories")
plt.gca().invert_yaxis()  # important for image coordinates
plt.legend()
plt.grid(True)
plt.show()

As we see the above model is not sophisticated. Though it plots the box arounf object and stores as image, the output is not how it was expected. Further the algorithm cannot detect the van again frame to frame and the busses remains undetected. Thus the algorithm has to be improved and the data presentation format should also be changed.

# 07.01.2026:
During the test the model kept crashing. When noticed, we ended up with the problem of buffer overflowing. Therefore, the image will be written back to mp4 file on the fly and then the video with the embedded frame will be played back. A second gif file with compressed frames will also be tried.

