<a href="https://colab.research.google.com/github/jewoolee0502/ComputerVision/blob/main/ECSE415_A5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ECSE 415: Introduction to Computer Vision
###### Jewoo Lee - 260910789
###### Anthony Bonta - 261053688

## Assignment 5: Video Analysis

### Library Requirements

In [9]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Installs
!pip install -q ultralytics
!pip install -q ultralytics deep-sort-realtime
!pip install -q kaggle

# Imports
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os
import math
import torch

from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h

#### Path

In [2]:
path = '/content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/' # jay's path
# path = '' # anthony's path

object_tracking_root = os.path.join(path, "Object_Tracking")

### 1. Data Preparation

In [3]:
task1_images_dir = os.path.join(object_tracking_root, "Task1", "images")
task1_video_path = os.path.join(object_tracking_root, "task1_input.mp4")

print("Task1 images dir:", task1_images_dir)
print("Output video path:", task1_video_path)

Task1 images dir: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/Task1/images
Output video path: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/task1_input.mp4


In [13]:
FPS = 14 # given fps value

def images_to_video(images_dir, output_path, fps):
  # all images are .jpg
  image_files = sorted(glob.glob(os.path.join(images_dir, "*.jpg")))
  print(f"Found {len(image_files)} images in {images_dir}")

  if len(image_files) == 0:
    raise RuntimeError(f"No .jpg image files found in {images_dir}")

  # read the first image and get its dimensions
  first_img = cv2.imread(image_files[0])
  height, width = first_img.shape[:2]
  frame_size = (width, height)
  print(f"Target frame size: {frame_size}")

  # set up the video writer
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
  writer = cv2.VideoWriter(output_path, fourcc, fps, frame_size)

  if not writer.isOpened():
    raise RuntimeError(f"VideoWriter could not be opened for {output_path}")

  # write all images as frames
  for idx, img_path in enumerate(image_files):
    frame = cv2.imread(img_path)

    if frame is None:
      print(f"Skipping unreadable image: {img_path}")
      continue

    # resize every frame matches the first image's size
    frame = cv2.resize(frame, frame_size)
    writer.write(frame)

  writer.release()
  print(f"Video saved to: {output_path}")

images_to_video(task1_images_dir, task1_video_path, FPS)

Found 429 images in /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/Task1/images
Target frame size: (1920, 1080)
Video saved to: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/task1_input.mp4


### 2. Model Implementation

In [16]:
conf_threshold = 0.3

output_video_path = os.path.join(object_tracking_root, "task1.mp4")
output_txt_path   = os.path.join(object_tracking_root, "task1.txt")

print("Output video path:", output_video_path)
print("Output text path:", output_txt_path)

def draw_and_log_box(frame, frame_idx, track, txt_handle, color=(0, 0, 255)):
    track_id = int(track.track_id)
    x1, y1, x2, y2 = track.to_ltrb()

    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    w = x2 - x1
    h = y2 - y1

    # draw bounding box
    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)

    # draw label slightly above the box
    label_pos = (x1, max(0, y1 - 8))
    cv2.putText(
        frame,
        f"ID: {track_id}",
        label_pos,
        cv2.FONT_HERSHEY_SIMPLEX,
        0.5,
        color,
        2
    )

    # write tracking line
    txt_handle.write(f"{frame_idx},{track_id},{x1},{y1},{w},{h}\n")

Output video path: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/task1.mp4
Output text path: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/task1.txt


In [18]:
yolo_model = YOLO("yolov8s.pt")
tracker = DeepSort(max_age=30, n_init=3, max_iou_distance=0.7)

cap = cv2.VideoCapture(task1_video_path) # input video
if not cap.isOpened():
  raise RuntimeError("Cannot open video")

w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fps_val = cap.get(cv2.CAP_PROP_FPS)
if fps_val <= 0:
  fps_val = FPS

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter(output_video_path, fourcc, fps_val, (w, h))

txt_file = open(output_txt_path, "w")

tracks_memory = []
frame_idx = 1

while True:
  ok, frame = cap.read()
  if not ok:
    break

  # run the YOLO model
  det = yolo_model(frame, conf=conf_threshold, verbose=False)[0]

  # convert YOLO detections into DeepSORT format
  det_list = []
  if det.boxes is not None:
    for b in det.boxes:
      cls_id = int(b.cls[0])
      conf   = float(b.conf[0])
      if cls_id != 0:
        continue

      x1, y1, x2, y2 = b.xyxy[0].tolist()
      det_list.append(([x1, y1, x2 - x1, y2 - y1], conf, "person"))

  # track
  tracks = tracker.update_tracks(det_list, frame=frame)

  # draw & log only confirmed tracks
  for trk in tracks:
    if not trk.is_confirmed():
      continue

    draw_and_log_box(frame, frame_idx, trk, txt_file)

    # saving in memory
    x1, y1, x2, y2 = trk.to_ltrb()
    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
    tracks_memory.append((frame_idx, trk.track_id, x1, y1, x2 - x1, y2 - y1))

  writer.write(frame)

  if frame_idx % 50 == 0:
    print("Processed:", frame_idx)

  frame_idx += 1

cap.release()
writer.release()
txt_file.close()

print("\nCompleted!")
print("Video saved to:", output_video_path)
print("Text file saved to:", output_txt_path)

Processed: 50
Processed: 100
Processed: 150
Processed: 200
Processed: 250
Processed: 300
Processed: 350
Processed: 400
Completed!
Video saved to: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/task1.mp4
Text file saved to: /content/drive/MyDrive/McGill/2025/Fall 2025/ECSE 415/A5/Object_Tracking/task1.txt


### 3. Model Evaluation

### 4. Prediction & Kaggle Competition