In [None]:
# Part 3: Bounding Box Tracking with SIFT

# Importing necessary libraries for tasks
import zipfile # For extracting 'data.zip'
import os # For image file handling
import cv2 # for SIFT and keypoint matching
import numpy as np # For calculations
import matplotlib.pyplot as plt  # For visualisations

# In order for extraction below to work, need to first upload 'data.zip' to Files
# Extracting 'data.zip'
with zipfile.ZipFile('data.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
# Initialising SIFT detector
sift = cv2.SIFT_create()

"""
Using the BFMatcher from the Week 6 tutorial.
Initialising BFMatcher with L2 norm (for SIFT).
cv2.NORM_L2: Euclidean distance; crossCheck=True: mutual best match
"""
bf = cv2.BFMatcher()

# Output folder to store every 5th frame
bbsift_results = 'BoundingBox_Results_SIFT/'
os.makedirs(bbsift_results, exist_ok=True) # creating folder if it does not yet exist

og_box = (20, 353, 322, 215) # Initial bounding box (x, y, w, h)
box_x, box_y, box_w, box_h = og_box
prev_box = og_box # initialising the prev_box variable here since it is utilised in for loop

# Drawing the original bounding box on the first frame
frame0_path = os.path.join(f'data/frame_0.jpg')
frame0_BGR = cv2.imread(frame0_path)
cv2.rectangle(frame0_BGR, (box_x, box_y), (box_x + box_w, box_y + box_h), (0, 255, 0), 2)

# Displaying the updated box
plt.figure(figsize=(12, 6))
plt.imshow(cv2.cvtColor(frame0_BGR, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title(f"Bounding Box Tracking with SIFT: frame_0")
plt.show()

# Going through frames 1-69, using the previous frame to update the box for the next frame
for i in range(1,70):
  f1_path = os.path.join(f'data/frame_{i-1}.jpg')
  f2_path = os.path.join(f'data/frame_{i}.jpg')

  # Loading the frames
  frame1_BGR = cv2.imread(f1_path)
  frame2_BGR = cv2.imread(f2_path)

  # Converting the frame to grayscale for SIFT
  frame1_gray = cv2.cvtColor(frame1_BGR, cv2.COLOR_BGR2GRAY)
  frame2_gray = cv2.cvtColor(frame2_BGR, cv2.COLOR_BGR2GRAY)

  # If either frame is not loaded, raise an error
  if frame1_gray is None or frame2_gray is None:
    raise ValueError("Error loading frame. Check that the file path exists.")

  # Extracting the keypoints and descriptors for both frames
  f1_kp, f1_des = sift.detectAndCompute(frame1_gray, None)
  f2_kp, f2_des = sift.detectAndCompute(frame2_gray, None)

  # Filtering to get just the keypoints within the bounding box
  box_kp = []
  box_des = []
  box_x, box_y, box_w, box_h = prev_box
  for index, kp in enumerate(f1_kp):
    # Checking that keypoint from frame1 is within the bounds of the box
    if box_x <= kp.pt[0] <= box_x + box_w and box_y <= kp.pt[1] <= box_y + box_h:
      box_kp.append(kp)
      box_des.append(f1_des[index])

  # Converting to an array so it is in the right format for bf.knnMatch
  box_des = np.array(box_des)

  # Matching descriptors using knnMatch
  knn_matches = bf.knnMatch(box_des, f2_des, k=2)
  good_matches = []
  # Applying Lowe's ratio test to get the good matches
  for m1, m2 in knn_matches:
    if m1.distance < (m2.distance * 0.75):
      good_matches.append(m1)

  # Using the keypoint displacement to update the bounding box coordinates
  total_x_disp, total_y_disp = 0, 0
  match_count = 0

  # Looping through the good matches to sum up the total displacement
  for gm in good_matches:
      f1_pt = box_kp[gm.queryIdx].pt  # Point within the bounding box from the first frame
      f2_pt = f2_kp[gm.trainIdx].pt   # Matching point from the second frame

      # Calculating the x and y displacement for each keypoint
      dx = f2_pt[0] - f1_pt[0]
      dy = f2_pt[1] - f1_pt[1]

      # Adding the displacements to the running total for all keypoints
      total_x_disp += dx
      total_y_disp += dy
      match_count += 1

  # Calculating the average displacement
  avg_x_disp = total_x_disp / match_count
  avg_y_disp = total_y_disp / match_count

  # Updating the bounding box's centre based on the average displacement
  x_centre = box_x + (box_w / 2) + avg_x_disp
  y_centre = box_y + (box_h / 2) + avg_y_disp

  # Adjusting the box's x and y based on the newly calculated centre
  box_x = int(x_centre - (box_w / 2))
  box_y = int(y_centre - (box_h / 2))
  # Retaining the width and height so the box doesn't grow or shrink
  prev_box = (box_x, box_y, box_w, box_h) # Saving the updated box for the next iteration

  # Drawing the updated bounding box on frame2_BGR
  cv2.rectangle(frame2_BGR, (box_x, box_y), (box_x + box_w, box_y + box_h), (0, 255, 0), 2)

  # Visualising the updated frame
  plt.figure(figsize=(12, 6))
  plt.imshow(cv2.cvtColor(frame2_BGR, cv2.COLOR_BGR2RGB))
  plt.axis('off')
  plt.title(f"Bounding Box Tracking with SIFT: frame_{i}")
  plt.show()

  # Saving every fifth frame's resulting image
  if i % 5 == 0:
    filename = os.path.join(bbsift_results, f'frame_{i}.jpg')
    cv2.imwrite(filename, frame2_BGR)

## The cell below is only needed if running the code in Google Colab rather than locally on Jupyter Notebook.

In [None]:
# Downloading the saved 'BoundingBox_Results_SIFT' from Google Colab to my local machine

from google.colab import files
import shutil

# Converting the file into a zip file
shutil.make_archive('BoundingBox_Results_SIFT', 'zip', 'BoundingBox_Results_SIFT')

# Downloading the zip file to my local machine
files.download('BoundingBox_Results_SIFT.zip')