In [None]:
!pip install ultralytics
from ultralytics import YOLO
import cv2 as cv
import numpy as np
import math
import time

In [6]:
# Find the center of the image
# Detect all the object in the frame
# Get all the x, y, w, h, name, of the objects
# Draw boxes around the obejcts(optional)
# Get the area of each object
# Find out which one has the highest area and its associated class
# Calculate the offset X, Y from the center of the image to the center of the object

from google.colab.patches import cv2_imshow

# Function to return attributes of the detected classes within an image
def detection(model, img):
  # List containing the coordinate, confidence score, class name of each object
  cor_list = []
  conf_score = []
  class_list = []

  # Initialize variables that will be used to determine which object have the largest area
  prominent_object_area = 0
  object_position_x = None
  object_position_y = None
  object_id = None

  # Load the model
  model = YOLO(model)
  # classes = model.names
  # color = np.random.uniform(0, 255, size = (len(classes), 3))

  # Get the size of the image
  centerx, centery = img.shape[:2]
  # Calculate the mid point of the image
  frame_y, frame_x = centerx // 2, centery // 2

  # Detect the objects in the image
  predict_result = model(img, conf = 0.5, save = False)

  for result in predict_result:
    for r in result:
      # Confidence score
      class_conf = float(r.boxes.conf)
      box_cordinates = r.boxes.xyxy[0]
      data = r.boxes.xywh[0]

      # x, y, width, height of the object
      x, y, w, h = int(data[0]), int(data[1]), int(data[2]), int(data[3])
      # Coordinates
      x_top_left, y_top_left, x_bot_right, y_bot_right = box_cordinates[0], box_cordinates[1], box_cordinates[2], box_cordinates[3]

      # Class ID
      class_id = int(r.boxes.cls[0])
      # colors = color[class_id]
      # detected_object = classes[class_id]

      # Append the correct data into the lists
      cor_list.append([x_top_left, y_top_left, x_bot_right, y_bot_right])
      conf_score.append(class_conf)
      class_list.append(class_id)
      # color_list.append(colors)

      # cv.rectangle(img, (int(x_top_left), int(y_top_left)), (int(x_bot_right), int(y_bot_right)), colors, thickness=2)
      # cv.putText(img, detected_object, (int(x_top_left), int(y_top_left) - 5), cv.FONT_HERSHEY_SIMPLEX, 1, colors, 2)
      # cv.putText(img, f'{class_conf:.2f}', (((int(x_top_left) + int(x_bot_right)) // 2), int(y_top_left) - 5), cv.FONT_HERSHEY_SIMPLEX, 1, colors, 2)

      # Calculate the area of the object
      area = w * h
      # Find the object with the largest area, record the attributes
      if area >= prominent_object_area:
        prominent_object_area = area
        object_position_x = x
        object_position_y = y
        object_id = class_id

  # calculating x and y offset.
  x_offset = (object_position_x - frame_x) if object_position_x != None else None
  y_offset = frame_y - object_position_y if object_position_y != None else None
  distance = math.sqrt(((x_offset) ** 2) + ((y_offset) ** 2)) if object_position_x != None else None
  current_time = time.strftime("%H:%M:%S", time.localtime())

  object_attribute = (object_id, x_offset, y_offset, current_time)

  # cv.line(img, (object_position_x, object_position_y), (frame_x, frame_y), (0, 0, 255), thickness = 3)

  # cv.circle(img, (object_position_x, object_position_y), 10, color = (0, 0, 255), thickness = -1)


  # print(f'{object_position_x, object_position_y} | {frame_x, frame_y}')

  # print(f'X offset: {x_offset} | y offset: {y_offset} | time stamp: {current_time}')



  # cv2_imshow(img)

  return object_attribute, frame_x, frame_y, object_position_x, object_position_y, cor_list, conf_score, class_list

In [24]:
# function to create a video using the model
def video(model, video_link):
  # load the model
  detect_model = YOLO(model)
  cap = cv.VideoCapture(video_link)

  #get all the class name
  classes = detect_model.names
  num_class = len(classes)

  #get random colors
  color = np.random.uniform(0, 255, size = (len(classes), 3))

  # make video object for outputing video
  fourcc = cv.VideoWriter_fourcc(*'DIVX')
  out = cv.VideoWriter('output1.mp4', fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

  while True:
    _, img = cap.read()

    if not _:
      print("Can't receive frame (stream end?). Exiting ...")
      break

    object_attributes, frame_x, frame_y, object_position_x, object_position_y, cor_list, conf_score, class_list = detection(model, img)

    for i in range(len(cor_list)):
      if object_position_x != None and object_position_y != None and cor_list[i][0] != None:
        # draw all the boxes, points, lines onto the image
        cv.rectangle(img, (int(cor_list[i][0]), int(cor_list[i][1])), (int(cor_list[i][2]), int(cor_list[i][3])), color[class_list[i]], thickness=2)
        cv.putText(img, classes[class_list[i]], (int(cor_list[i][0]), int(cor_list[i][1]) - 5), cv.FONT_HERSHEY_SIMPLEX, 1, color[class_list[i]], 2)
        cv.putText(img, f'{conf_score[i]:.2f}', (((int(cor_list[i][0]) + int(cor_list[i][2])) // 2), int(cor_list[i][1]) - 5), cv.FONT_HERSHEY_SIMPLEX, 2, color[class_list[i]])
        cv.circle(img, center = (frame_x, frame_y), radius = 10, color = (255, 255, 0), thickness = -1)
        cv.circle(img, center = (object_position_x, object_position_y), radius = 3, color = (255, 255, 0), thickness = -1)
        cv.line(img, (object_position_x, object_position_y), (frame_x, frame_y), color = (0, 0, 0), thickness = 1)
        # cv.putText(img, f'Xoffset: {object_attributes[1]} | y: {object_attributes[2]}', (120, 100), cv.FONT_HERSHEY_SIMPLEX, 2, color = (0, 255, 0))

    # print out the offsets.
    print(f'X offset: {object_attributes[1]} | y offset: {object_attributes[2]} | current time: {object_attributes[3]}')


    if cv.waitKey(10) & 0xFF == ord('q'):
          break

    out.write(img)

  cap.release()
  out.release()

In [None]:
video('/content/model.pt', '/content/testing_record.mp4')