## ME5413: Autonomous Mobile Robot  

### Homework 1: Perception  


### 0. Install Packages
Prefered use of these version so that the code can be run during evaluation

In [None]:
# !pip install numpy==1.23
# !pip install torch==2.1.0
# !pip install transformers==4.48.1
# !pip install timm==1.0.14
# !pip install matplotlib

In [None]:
from transformers import AutoImageProcessor, DetrForObjectDetection
from PIL import Image, ImageDraw
from deep_sort_realtime.deepsort_tracker import DeepSort
from yolov5 import YOLOv5
from scipy.optimize import linear_sum_assignment

## Task 1.1 Single-Object Tracking 


In [None]:
## put your code here
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import warnings


### Using Template Matching


#### Task 1.1.1 Different Template Matching methods for different sequences
This script performs Template Matching (TM) on 5 sequences using 6 different matching methods.
The results for each method and sequence are saved in separate text files for comparison.

In [None]:
def template_matching(input_dir, firsttrack_filename, output_dir):
    img_folder = os.path.join(input_dir, "img/")
    first_image_path = os.path.join(img_folder, "00000001.jpg")
    firsttrack_path = os.path.join(input_dir, firsttrack_filename)

    # Read initial bounding box
    with open(firsttrack_path, "r") as f:
        bbox = list(map(int, f.readline().strip().split(",")))

    # Read the first image and extract the template
    first_image = cv2.imread(first_image_path)
    template = first_image[bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2]]

    # Define different template matching methods
    methods = {
        "TM_CCOEFF": cv2.TM_CCOEFF,
        "TM_CCOEFF_NORMED": cv2.TM_CCOEFF_NORMED,
        "TM_CCORR": cv2.TM_CCORR,
        "TM_CCORR_NORMED": cv2.TM_CCORR_NORMED,
        "TM_SQDIFF": cv2.TM_SQDIFF,
        "TM_SQDIFF_NORMED": cv2.TM_SQDIFF_NORMED,
    }

    image_files = sorted(f for f in os.listdir(img_folder) if f.endswith(".jpg"))
    os.makedirs(output_dir, exist_ok=True)

    for method_name, method in methods.items():
        method_output_file = os.path.join(output_dir, f"trackresults_{method_name}.txt")
        result_lines = []

        for img_file in image_files:
            img_path = os.path.join(img_folder, img_file)
            frame = cv2.imread(img_path)

            # Apply template matching
            res = cv2.matchTemplate(frame, template, method)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

            # Determine bounding box based on the method
            if method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
                top_left = (
                    min_loc  # For these methods, the best match has the lowest value
                )
            else:
                top_left = max_loc

            x, y, w, h = top_left[0], top_left[1], template.shape[1], template.shape[0]
            result_lines.append(f"{x},{y},{w},{h}")

        # Save results for each method
        with open(method_output_file, "w") as f:
            f.write("\n".join(result_lines))

        print(f"Results saved to: {method_output_file}")


if __name__ == "__main__":
    base_input_directory = "data/"
    firsttrack_filename = "firsttrack.txt"
    output_base_directory = "results/1_template_matching/"

    seq_list = [1, 2, 3, 4, 5]
    for seq_num in seq_list:
        input_directory = os.path.join(base_input_directory, f"seq{seq_num}")
        output_directory = os.path.join(output_base_directory, f"seq{seq_num}")

        template_matching(input_directory, firsttrack_filename, output_directory)


#### Task 1.1.2 Template Matching and Adaptive Template Update

This script implements a single-object tracker using TM with Adaptive Template Updating (ATU). TM is used to locate the object in each frame, while ATU updates the template dynamically based on a confidence threshold to adapt to appearance changes, improving tracking robustness.

In [None]:
class SingleObjectTracker:
    def __init__(
        self,
        input_dir,
        firsttrack_filename,
        output_filename,
        matching_method,
        alpha=0.1,
        confidence_threshold=0.6,
    ):
        self.img_folder = os.path.join(input_dir, "img/")
        self.first_image_path = os.path.join(self.img_folder, "00000001.jpg")
        self.firsttrack_path = os.path.join(input_dir, firsttrack_filename)
        self.output_filename = output_filename
        self.alpha = alpha
        self.confidence_threshold = confidence_threshold
        self.matching_method = matching_method

        # Read initial bounding box
        self.template, self.bbox = self._initialize_template()

    def _initialize_template(self):
        # Read the first bounding box from file
        with open(self.firsttrack_path, "r") as f:
            bbox = list(
                map(int, f.readline().strip().split(","))
            )  # [x, y, width, height]

        # Load the first image and extract the template
        first_image = cv2.imread(self.first_image_path)
        template = first_image[bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2]]
        return template, bbox

    def _update_template(self, current_frame, bbox, score):
        if score > self.confidence_threshold:
            x, y, w, h = bbox
            new_template = current_frame[y : y + h, x : x + w]
            if new_template.shape == self.template.shape:
                self.template = cv2.addWeighted(
                    self.template, 1 - self.alpha, new_template, self.alpha, 0
                )

    def track(self):
        result_lines = []
        image_files = sorted(
            [f for f in os.listdir(self.img_folder) if f.endswith(".jpg")]
        )

        for img_file in image_files:
            img_path = os.path.join(self.img_folder, img_file)
            frame = cv2.imread(img_path)

            # Template matching using the selected method
            res = cv2.matchTemplate(frame, self.template, self.matching_method)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

            if self.matching_method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
                top_left = min_loc  # For SQDIFF methods, lower values are better
                score = 1.0 - min_val  # Normalize to be confidence-like
            else:
                top_left = max_loc  # For other methods, higher values are better
                score = max_val  # Direct use of max_val as score

            # Calculate bounding box for the matched region
            h, w = self.template.shape[:2]
            bbox = (top_left[0], top_left[1], w, h)

            # Update template if confidence is high
            self._update_template(frame, bbox, score)

            # Logging the processed frame
            result_lines.append(f"{bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]}")

        # Save tracking results
        os.makedirs(os.path.dirname(self.output_filename), exist_ok=True)
        with open(self.output_filename, "w") as f:
            f.write("\n".join(result_lines))

        print(f"Results saved to: {self.output_filename}")


if __name__ == "__main__":
    base_input_directory = "data/"
    firsttrack_filename = "firsttrack.txt"
    output_directory = "results/1_template_matching/"

    seq_matching_methods = {
        1: cv2.TM_CCOEFF,
        2: cv2.TM_CCOEFF,
        3: cv2.TM_CCOEFF_NORMED,
        4: cv2.TM_SQDIFF_NORMED,
        5: cv2.TM_CCOEFF_NORMED,
    }

    seq_list = [1, 2, 3, 4, 5]
    for seq_num in seq_list:
        input_directory = os.path.join(base_input_directory, f"seq{seq_num}")
        output_filename = os.path.join(
            output_directory, f"trackresults_TM_ATU_seq{seq_num}.txt"
        )

        matching_method = seq_matching_methods[seq_num]

        tracker = SingleObjectTracker(
            input_directory,
            firsttrack_filename,
            output_filename,
            matching_method,
            alpha=0.05,
        )
        tracker.track()


#### Task 1.1.3 Template matching, Adaptive Template Update, and Kalman Filter

The script implements the modification by using a Kalman filter (KF) to smooth the tracking results, thus reducing noise and improving robustness across sequential images.

In [None]:
class KalmanFilter:
    def __init__(self, init_bbox):
        self.kf = cv2.KalmanFilter(
            4, 2
        )  # State variables (x, y, dx, dy), measurement variables (x, y)
        self.kf.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
        self.kf.transitionMatrix = np.array(
            [[1, 0, 1, 0], [0, 1, 0, 1], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32
        )
        self.kf.processNoiseCov = np.eye(4, dtype=np.float32) * 1e-2
        self.kf.measurementNoiseCov = np.eye(2, dtype=np.float32) * 1e-1
        self.kf.errorCovPost = np.eye(4, dtype=np.float32) * 0.1
        self.kf.statePost = np.array([init_bbox[0], init_bbox[1], 0, 0], np.float32)

    def predict(self):
        pred = self.kf.predict()
        return int(pred[0]), int(pred[1])

    def correct(self, measured_x, measured_y):
        measurement = np.array([[np.float32(measured_x)], [np.float32(measured_y)]])
        corrected = self.kf.correct(measurement)
        return int(corrected[0]), int(corrected[1])


class SingleObjectTracker:
    def __init__(
        self,
        input_dir,
        firsttrack_filename,
        output_filename,
        matching_method,
        alpha=0.1,
        confidence_threshold=0.6,
    ):
        self.img_folder = os.path.join(input_dir, "img/")
        self.first_image_path = os.path.join(self.img_folder, "00000001.jpg")
        self.firsttrack_path = os.path.join(input_dir, firsttrack_filename)
        self.output_filename = output_filename
        self.alpha = alpha
        self.confidence_threshold = confidence_threshold
        self.matching_method = matching_method

        # Read the initial bounding box
        self.template, self.bbox = self._initialize_template()
        self.kalman_filter = KalmanFilter(self.bbox)  # Initialize the Kalman filter

    def _initialize_template(self):
        with open(self.firsttrack_path, "r") as f:
            bbox = list(
                map(int, f.readline().strip().split(","))
            )  # [x, y, width, height]
        first_image = cv2.imread(self.first_image_path)
        template = first_image[bbox[1] : bbox[1] + bbox[3], bbox[0] : bbox[0] + bbox[2]]
        return template, bbox

    def _update_template(self, current_frame, bbox, score):
        if score > self.confidence_threshold:
            x, y, w, h = bbox
            new_template = current_frame[y : y + h, x : x + w]
            if new_template.shape == self.template.shape:
                self.template = cv2.addWeighted(
                    self.template, 1 - self.alpha, new_template, self.alpha, 0
                )

    def track(self):
        result_lines = []
        image_files = sorted(
            [f for f in os.listdir(self.img_folder) if f.endswith(".jpg")]
        )

        for img_file in image_files:
            img_path = os.path.join(self.img_folder, img_file)
            frame = cv2.imread(img_path)

            # Template matching
            res = cv2.matchTemplate(frame, self.template, self.matching_method)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)

            if self.matching_method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
                top_left = min_loc
                score = 1.0 - min_val  # Normalized score
            else:
                top_left = max_loc
                score = max_val

            # Compute bounding box
            h, w = self.template.shape[:2]
            measured_bbox = (top_left[0], top_left[1], w, h)

            # Kalman filter correction
            pred_x, pred_y = self.kalman_filter.predict()
            corrected_x, corrected_y = self.kalman_filter.correct(
                measured_bbox[0], measured_bbox[1]
            )
            smoothed_bbox = (corrected_x, corrected_y, w, h)

            # Update template
            self._update_template(frame, smoothed_bbox, score)

            # Record results
            result_lines.append(
                f"{smoothed_bbox[0]},{smoothed_bbox[1]},{smoothed_bbox[2]},{smoothed_bbox[3]}"
            )

        # Save tracking results
        os.makedirs(os.path.dirname(self.output_filename), exist_ok=True)
        with open(self.output_filename, "w") as f:
            f.write("\n".join(result_lines))

        print(f"Results saved to: {self.output_filename}")


if __name__ == "__main__":
    base_input_directory = "data/"
    firsttrack_filename = "firsttrack.txt"
    output_directory = "results/1_template_matching/"

    seq_matching_methods = {
        1: cv2.TM_CCOEFF,
        2: cv2.TM_CCOEFF,
        3: cv2.TM_CCOEFF_NORMED,
        4: cv2.TM_SQDIFF_NORMED,
        5: cv2.TM_CCOEFF_NORMED,
    }

    seq_list = [1, 2, 3, 4, 5]
    for seq_num in seq_list:
        input_directory = os.path.join(base_input_directory, f"seq{seq_num}")
        output_filename = os.path.join(
            output_directory, f"trackresults_TM_ATU_KF_seq{seq_num}.txt"
        )
        matching_method = seq_matching_methods[seq_num]

        tracker = SingleObjectTracker(
            input_directory,
            firsttrack_filename,
            output_filename,
            matching_method,
            alpha=0.05,
        )
        tracker.track()


### Using Object Detection algorithm and Association


#### Task 1.1.4 End-to-End Object Detection with Transformers

This script processes multiple image sequences, performs object detection using a pretrained End-to-End Object Detection with Transformers (DETR) model, tracks the detected objects across frames, and saves the tracking results in text files.

In [None]:
class ObjectDetectionTracker:
    def __init__(self, base_input_directory, base_output_directory, seq_list):
        # Initialize with the base directories and sequence list
        self.base_input_directory = base_input_directory
        self.base_output_directory = base_output_directory
        self.seq_list = seq_list

        # Load image processor and model
        self.image_processor = AutoImageProcessor.from_pretrained("./detr-resnet-50")
        self.model = DetrForObjectDetection.from_pretrained("./detr-resnet-50")

    def process_seq(self, seq_num):
        # Process each sequence individually
        seq_dir = os.path.join(self.base_input_directory, f"seq{seq_num}")
        image_dir = os.path.join(seq_dir, "img")
        image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])

        # Read the first frame's ground truth position
        firsttrack_dir = os.path.join(seq_dir, "firsttrack.txt")
        with open(firsttrack_dir, "r") as f:
            first_frame_gt = list(map(int, f.read().strip().split(',')))

        # Initialize target positions list
        target_positions = [first_frame_gt]
        result_lines = []

        # Create result directory
        os.makedirs(self.base_output_directory, exist_ok=True)

        # Process each image
        for frame_idx, image_file in enumerate(image_files):
            # Load image
            image_path = os.path.join(image_dir, image_file)
            image = Image.open(image_path)

            # Preprocess image
            inputs = self.image_processor(images=image, return_tensors="pt")

            # Run inference
            outputs = self.model(**inputs)

            # Get prediction results
            target_sizes = torch.tensor([image.size[::-1]])  # (height, width)
            results = self.image_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.75)[0]

            # Get all detection boxes for the current frame
            boxes = results["boxes"]
            labels = results["labels"]
            scores = results["scores"]

            # Calculate distance to previous target
            prev_position = target_positions[-1]
            prev_x, prev_y, prev_w, prev_h = prev_position
            prev_center = (prev_x + prev_w / 2, prev_y + prev_h / 2)

            min_distance = float('inf')
            selected_box = None

            for box, label in zip(boxes, labels):
                min_x, min_y, max_x, max_y = box.tolist()
                center = ((max_x + min_x) / 2, (max_y + min_y) / 2)
                distance = ((center[0] - prev_center[0]) ** 2 + (center[1] - prev_center[1]) ** 2) ** 0.5

                if distance < min_distance:
                    min_distance = distance
                    selected_box = box

            if min_distance > 50:
                selected_box = None

            # Update target position if a match is found
            if selected_box is not None:
                min_slct_x, min_slct_y, max_slct_x, max_slct_y = selected_box.tolist()
                target_positions.append([min_slct_x, min_slct_y, (max_slct_x - min_slct_x), (max_slct_y - min_slct_y)])
            else:
                target_positions.append([prev_x, prev_y, prev_w, prev_h])

            if selected_box is not None:
                result_lines.append(f"{int(min_slct_x)},{int(min_slct_y)},{int(max_slct_x - min_slct_x)},{int(max_slct_y - min_slct_y)}")
            else:
                result_lines.append(f"{int(prev_x)},{int(prev_y)},{int(prev_w)},{int(prev_h)}")

            # Save the result for the current frame
            result_file = os.path.join(self.base_output_directory, f"trackresults_ODA_seq{seq_num}.txt")
            with open(result_file, "w") as f:
                f.write("\n".join(result_lines))

    def process_all_seqs(self):
        # Loop through all sequences and process them
        for seq_num in self.seq_list:
            self.process_seq(seq_num)
            print(f"Results of {seq_num} are saved")

if __name__ == "__main__":
    # List of sequences to process
    seq_list = [1, 2, 3, 4, 5]

    # Define base input and output directories
    base_input_directory = "data"
    base_output_directory = "results/2_objectdetection_withassociation"

    # Create the ObjectDetectionTracker object
    tracker = ObjectDetectionTracker(base_input_directory, base_output_directory, seq_list)

    # Process all sequences
    tracker.process_all_seqs()


### Improvement

#### Task 1.1.5 YOLO5 + Deep SORT

Propose Improvements to the work if possible:

This script uses YOLOv5 for object detection and DeepSORT for object tracking to process multiple image sequences, track detected objects, and save the tracking results to text files.

In [None]:
## put your code here: This function should be able to visualise this image and their tracking results
img_file = 'data/Task 1/seq2/img/00000002.jpg'

print('Tracking Results for this image using Improved Method is (in xmin, ymin, width, height): ???') 
## show image of visualised result of ground truth and track results

In [None]:
class ObjectTracking:
    def __init__(self, yolo_model_path, deepsort_model_path, base_input_directory, output_directory, seq_list, firsttrack_filename):
        # Initialize YOLOv5 and DeepSORT models
        self.yolo_model = YOLOv5(yolo_model_path)
        self.deepsort_model = DeepSort(deepsort_model_path)
        self.deepsort = DeepSort(max_age=30, n_init=3)
        
        self.base_input_directory = base_input_directory
        self.output_directory = output_directory
        self.seq_list = seq_list
        self.firsttrack_filename = firsttrack_filename
    
    def load_firsttrack(self, seq_num):
        with open(self.base_input_directory + f"seq{seq_num}/" + self.firsttrack_filename, 'r') as f:
            firsttrack = [list(map(int, line.strip().split(','))) for line in f.readlines()]
        return firsttrack
    
    def calculate_distance(self, bbox1, bbox2):
        center1 = ((bbox1[2] - bbox1[0]) / 2 + bbox1[0], (bbox1[3] - bbox1[1]) / 2 + bbox1[1])
        center2 = (bbox2[0] + bbox2[2] / 2, bbox2[1] + bbox2[3] / 2)
        return np.linalg.norm(np.array(center1) - np.array(center2))
    
    def process_sequence(self, seq_num):
        input_directory = os.path.join(self.base_input_directory, f"seq{seq_num}/img/")
        output_filename = os.path.join(self.output_directory, f"trackresults_improved_seq{seq_num}.txt")

        firsttrack = self.load_firsttrack(seq_num)
        
        with open(output_filename, 'w') as output_file:
            image_files = sorted([f for f in os.listdir(input_directory) if f.endswith(".jpg")])
            prev_bbox = None
            
            for idx, image_file in enumerate(image_files):
                image_path = os.path.join(input_directory, image_file)
                image = cv2.imread(image_path)
                results = self.yolo_model.predict(image)

                boxes = results.xyxy[0].cpu().numpy()
                detections = [[box[:4], box[4]] for box in boxes]
                detections = np.array(detections) if len(detections) > 0 else []
                
                if len(detections) > 0:
                    # Select the bbox closest to the previous one or groundtruth
                    best_bbox = None
                    min_dist = float('inf')
                    reference_bbox = prev_bbox if prev_bbox is not None else firsttrack[0]
                    
                    for det in detections:
                        bbox = det[0]
                        dist = self.calculate_distance(bbox, reference_bbox)
                        if dist < min_dist:
                            min_dist = dist
                            best_bbox = bbox

                    if min_dist > 50:
                        best_bbox = None
                    
                    if best_bbox is not None:
                        prev_bbox = [best_bbox[0], best_bbox[1], best_bbox[2] - best_bbox[0], best_bbox[3] - best_bbox[1]]
                
                if prev_bbox is None:
                    prev_bbox = firsttrack[0]
                
                # x_min, y_min, x_max, y_max = prev_bbox
                output_file.write(f"{int(prev_bbox[0])},{int(prev_bbox[1])},{int(prev_bbox[2])},{int(prev_bbox[3])}\n")
        
        print(f"Processing of seq{seq_num} is complete. Results saved to {output_filename}")

    def load_groundtruth(self, groundtruth_filename):
        with open(groundtruth_filename, 'r') as f:
            groundtruth = [list(map(int, line.strip().split(','))) for line in f.readlines()]
        return groundtruth

    def run(self):
        for seq_num in self.seq_list:
            self.process_sequence(seq_num)

if __name__ == "__main__":
    base_input_directory = "data/"
    deepsort_model_path = "deep_sort/deep/checkpoint/ckpt.t7"
    yolo_model_path = "yolov5s.pt"
    output_directory = "results/3_improved/"
    seq_list = [1, 2, 3, 4, 5]
    firsttrack_filename = "firsttrack.txt"
    
    tracker = ObjectTracking(yolo_model_path, deepsort_model_path, base_input_directory, output_directory, seq_list, firsttrack_filename)
    tracker.run()


### Task 1.2 Evaluation

Evaluate the performance of the Single object tracking algorithm for both **Template Matching** and **Detection Algorithm with Association**. Using the **Success** and **Precision** metrics. 

$$
Success = \frac{BB_{tr} \cap BB_{gt}}{BB_{tr} \cup BB_{gt}} ;    
Precision = || C_{tr} - C_{gt} ||_2
$$

#### Task 1.2.1 Find the best template matching method
Use this evaluation script that assesses the tracking results for all five sequences using different template matching methods to figure out which template matching method is suitable for which sequence.

The precision criterion was set to 25 pixels and the IoU criterion was set to 0.5.

In [None]:
def calculate_iou(boxA, boxB):
    """Compute the Intersection over Union (IoU) of two bounding boxes."""
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    interWidth = max(0, xB - xA)
    interHeight = max(0, yB - yA)
    interArea = interWidth * interHeight

    boxAArea = boxA[2] * boxA[3]
    boxBArea = boxB[2] * boxB[3]

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou


def calculate_precision(gt_center, tr_center):
    """Compute the Euclidean distance between ground truth and tracker center."""
    return np.linalg.norm(np.array(gt_center) - np.array(tr_center))


def read_bounding_boxes(file_path):
    """Read bounding boxes from a file."""
    with open(file_path, "r") as file:
        lines = file.readlines()
        boxes = [list(map(int, line.strip().split(","))) for line in lines]
    return boxes


def evaluate_tracking(gt_file, tr_file, precision_threshold=20, iou_threshold=0.5):
    """Evaluate tracking results using precision and success metrics."""
    gt_boxes = read_bounding_boxes(gt_file)
    tr_boxes = read_bounding_boxes(tr_file)

    assert len(gt_boxes) == len(
        tr_boxes
    ), "Ground truth and tracking files must have the same number of frames."

    total_frames = len(gt_boxes)
    precision_count = 0
    success_count = 0
    iou_list = []
    precision_list = []

    for gt_box, tr_box in zip(gt_boxes, tr_boxes):
        gt_center = (gt_box[0] + gt_box[2] // 2, gt_box[1] + gt_box[3] // 2)
        tr_center = (tr_box[0] + tr_box[2] // 2, tr_box[1] + tr_box[3] // 2)

        # Compute Precision
        distance = calculate_precision(gt_center, tr_center)
        precision_list.append(distance)
        if distance <= precision_threshold:
            precision_count += 1

        # Compute IoU Success
        iou = calculate_iou(gt_box, tr_box)
        iou_list.append(iou)
        if iou >= iou_threshold:
            success_count += 1

    precision = precision_count / total_frames
    success = success_count / total_frames

    return precision, success, iou_list, precision_list


if __name__ == "__main__":
    base_gt_directory = "data/"
    gt_filename = "groundtruth.txt"
    tracking_result_directory = "results/1_template_matching/"

    precision_threshold = 25  # Pixels
    iou_threshold = 0.5  # IoU threshold
    seq_list = [1, 2, 3, 4, 5]
    methods = [
        "TM_CCOEFF",
        "TM_CCOEFF_NORMED",
        "TM_CCORR",
        "TM_CCORR_NORMED",
        "TM_SQDIFF",
        "TM_SQDIFF_NORMED",
    ]

    iou_results = pd.DataFrame(columns=methods, index=seq_list)
    precision_results = pd.DataFrame(columns=methods, index=seq_list)

    for seq_num in seq_list:
        ground_truth_file = os.path.join(
            base_gt_directory, f"seq{seq_num}/", gt_filename
        )
        for method in methods:
            tracking_result_file = os.path.join(
                tracking_result_directory, f"seq{seq_num}/trackresults_{method}.txt"
            )

            if os.path.exists(tracking_result_file):
                precision, success, _, _ = evaluate_tracking(
                    ground_truth_file,
                    tracking_result_file,
                    precision_threshold,
                    iou_threshold,
                )
                iou_results.loc[seq_num, method] = success * 100
                precision_results.loc[seq_num, method] = precision * 100
            else:
                iou_results.loc[seq_num, method] = None
                precision_results.loc[seq_num, method] = None

    iou_results.to_csv(f"iou_results_with_threshold{iou_threshold}.csv")
    precision_results.to_csv(
        f"precision_results_with_threshold{precision_threshold}.csv"
    )
    print(
        f"Results saved to 'iou_results_with_threshold{iou_threshold}.csv' and 'precision_results_with_threshold{precision_threshold}.csv'"
    )

    print("IoU Results:")
    print(iou_results.to_markdown())

    print("\nPrecision Results:")
    print(precision_results.to_markdown())


#### Task 1.2.2 Evaluation of Template Matching with Adaptive Template Update and Kalman Filtering

This script evaluates and compares three object tracking methods— TM, TM + ATU, and TM + ATU + KF —by measuring precision and IoU success rates across different sequences.

In [None]:
%matplotlib inline

def plot_results(
    iou_list,
    precision_list,
    only_tm_iou_list,
    only_tm_precision_list,
    kf_iou_list,
    kf_precision_list,
    precision_threshold,
    iou_threshold,
    seq_num,
    tracking_result_directory
):
    """Plot IoU and Precision per frame with threshold lines."""
    frames = range(len(iou_list))

    plt.figure(figsize=(12, 6))

    # Plot IoU
    plt.subplot(1, 2, 1)
    plt.plot(frames, only_tm_iou_list, label="TM", color="green")
    plt.plot(frames, iou_list, label="TM + ATU", color="blue")
    plt.plot(frames, kf_iou_list, label="TM + ATU + KF", color="orange")
    plt.axhline(
        y=iou_threshold,
        color="red",
        linestyle="--",
        label=f"IoU Threshold = {iou_threshold}",
    )
    plt.xlabel("Frame")
    plt.ylabel("IoU")
    plt.title(f"IoU per frame for Sequence {seq_num}")
    plt.legend()
    plt.grid()

    # Plot Precision
    plt.subplot(1, 2, 2)
    plt.plot(frames, only_tm_precision_list, label="TM", color="green")
    plt.plot(frames, precision_list, label="TM + ATU", color="blue")
    plt.plot(frames, kf_precision_list, label="TM + ATU + KF", color="orange")
    plt.axhline(
        y=precision_threshold,
        color="red",
        linestyle="--",
        label=f"Precision Threshold = {precision_threshold}",
    )
    plt.xlabel("Frame")
    plt.ylabel("Euclidean Distance")
    plt.title(f"Precision per frame for Sequence {seq_num}")
    plt.legend()
    plt.grid()

    plt.tight_layout()
    plt.savefig(f"{tracking_result_directory}/Precision per frame for Sequence {seq_num}", dpi=300, bbox_inches='tight')
    plt.show()


if __name__ == "__main__":
    base_gt_directory = "data/"
    gt_filename = "groundtruth.txt"
    tracking_result_directory = "results/1_template_matching/"

    precision_threshold = 25  # Pixels
    iou_threshold = 0.5  # IoU threshold
    seq_list = [1, 2, 3, 4, 5]
    seq_matching_methods = {
        1: "TM_CCOEFF",
        2: "TM_CCOEFF",
        3: "TM_CCOEFF_NORMED",
        4: "TM_SQDIFF_NORMED",
        5: "TM_CCOEFF_NORMED",
    }

    for seq_num in seq_list:
        ground_truth_file = (
            os.path.join(base_gt_directory, f"seq{seq_num}/") + gt_filename
        )

        # Only TM
        matching_method = seq_matching_methods[seq_num]
        only_tm_result_file = os.path.join(
            tracking_result_directory,
            f"seq{seq_num}/trackresults_{matching_method}.txt",
        )
        only_tm_precision, only_tm_success, only_tm_iou_list, only_tm_precision_list = (
            evaluate_tracking(
                ground_truth_file,
                only_tm_result_file,
                precision_threshold,
                iou_threshold,
            )
        )
        print(
            f"TM Success (IoU threshold = {iou_threshold}):                  {only_tm_success * 100:.2f}%"
        )
        print(
            f"TM Precision (threshold = {precision_threshold} pixels):              {only_tm_precision * 100:.2f}%"
        )

        # TM + ATU
        tracking_result_file = os.path.join(
            tracking_result_directory, f"trackresults_TM_ATU_seq{seq_num}.txt"
        )
        precision, success, iou_list, precision_list = evaluate_tracking(
            ground_truth_file, tracking_result_file, precision_threshold, iou_threshold
        )
        print(
            f"TM + ATU Success (IoU threshold = {iou_threshold}):            {success * 100:.2f}%"
        )
        print(
            f"TM + ATU Precision (threshold = {precision_threshold} pixels):        {precision * 100:.2f}%"
        )

        # TM + KF
        kf_tracking_result_file = os.path.join(
            tracking_result_directory, f"trackresults_TM_ATU_KF_seq{seq_num}.txt"
        )
        kf_precision, kf_success, kf_iou_list, kf_precision_list = evaluate_tracking(
            ground_truth_file,
            kf_tracking_result_file,
            precision_threshold,
            iou_threshold,
        )
        print(
            f"TM + ATU + KF Success (IoU threshold = {iou_threshold}):       {kf_success * 100:.2f}%"
        )
        print(
            f"TM + ATU + KF Precision (threshold = {precision_threshold} pixels):   {kf_precision * 100:.2f}%"
        )

        avg_kf_success = sum(kf_iou_list) / len(kf_iou_list)
        print(f"TM + ATU + KF Average Success:   {avg_kf_success * 100:.2f}%")
        avg_kf_precision = sum(kf_precision_list) / len(kf_precision_list)
        print(f"TM + ATU + KF Average Precision: {avg_kf_precision * 100:.2f} pixels")

        plot_results(
            iou_list,
            precision_list,
            only_tm_iou_list,
            only_tm_precision_list,
            kf_iou_list,
            kf_precision_list,
            precision_threshold,
            iou_threshold,
            seq_num,
            tracking_result_directory
        )



#### Task 1.2.3 Evaluation of Object Detection Algorithm with Association
This script evaluates ODA by measuring precision and IoU success rates across different sequences.

In [None]:
%matplotlib inline

def plot_results(
    iou_list,
    precision_list,
    precision_threshold,
    iou_threshold,
    seq_num,
    tracking_result_directory,
):
    """Plot IoU and Precision per frame with threshold lines."""
    frames = range(len(iou_list))

    plt.figure(figsize=(12, 6))

    # Plot IoU
    plt.subplot(1, 2, 1)
    plt.plot(frames, iou_list, label="ODA", color="blue")
    plt.axhline(
        y=iou_threshold,
        color="r",
        linestyle="--",
        label=f"IoU Threshold = {iou_threshold}",
    )
    plt.xlabel("Frame")
    plt.ylabel("IoU")
    plt.title(f"IoU per frame for Sequence {seq_num}")
    plt.legend()
    plt.grid()

    # Plot Precision
    plt.subplot(1, 2, 2)
    plt.plot(frames, precision_list, label="ODA", color="green")
    plt.axhline(
        y=precision_threshold,
        color="r",
        linestyle="--",
        label=f"Precision Threshold = {precision_threshold}",
    )
    plt.xlabel("Frame")
    plt.ylabel("Euclidean Distance")
    plt.title(f"Precision per frame for Sequence {seq_num}")
    plt.legend()
    plt.grid()

    plt.tight_layout()
    plt.savefig(f"{tracking_result_directory}/Precision per frame for Sequence {seq_num}", dpi=300, bbox_inches='tight')
    plt.show()


if __name__ == "__main__":
    base_gt_directory = "data/"
    gt_filename = "groundtruth.txt"
    tracking_result_directory = "results/2_objectdetection_withassociation/"

    precision_threshold = 25  # Pixels
    iou_threshold = 0.5  # IoU threshold
    seq_list = [1,2,3,4,5]

    for seq_num in seq_list:
        ground_truth_file = (
            os.path.join(base_gt_directory, f"seq{seq_num}/") + gt_filename
        )

        # ODA
        tracking_result_file = os.path.join(
            tracking_result_directory, f"trackresults_ODA_seq{seq_num}.txt"
        )
        precision, success, iou_list, precision_list = evaluate_tracking(
            ground_truth_file, tracking_result_file, precision_threshold, iou_threshold
        )
        print(
            f"ODA Success (IoU threshold = {iou_threshold}):            {success * 100:.2f}%"
        )
        print(
            f"ODA Precision (threshold = {precision_threshold} pixels):        {precision * 100:.2f}%"
        )

        avg_oda_success = sum(iou_list) / len(iou_list)
        print(f"ODA Average Success:   {avg_oda_success * 100:.2f}%")
        avg_oda_precision = sum(precision_list) / len(precision_list)
        print(f"ODA Average Precision: {avg_oda_precision * 100:.2f} pixels")

        plot_results(
            iou_list,
            precision_list,
            precision_threshold,
            iou_threshold,
            seq_num,
            tracking_result_directory
        )


#### Task 1.2.4 Evaluation of YOLO and Deep SORT
This script evaluates YOLO and Deep SORT by measuring precision and IoU success rates across different sequences.

In [None]:
%matplotlib inline

def plot_results(
    iou_list,
    precision_list,
    precision_threshold,
    iou_threshold,
    seq_num,
    tracking_result_directory,
):
    """Plot IoU and Precision per frame with threshold lines."""
    frames = range(len(iou_list))

    plt.figure(figsize=(12, 6))

    # Plot IoU
    plt.subplot(1, 2, 1)
    plt.plot(frames, iou_list, label="YOLO + Deep SORT", color="blue")
    plt.axhline(
        y=iou_threshold,
        color="r",
        linestyle="--",
        label=f"IoU Threshold = {iou_threshold}",
    )
    plt.xlabel("Frame")
    plt.ylabel("IoU")
    plt.title(f"IoU per frame for Sequence {seq_num}")
    plt.legend()
    plt.grid()

    # Plot Precision
    plt.subplot(1, 2, 2)
    plt.plot(frames, precision_list, label="YOLO + Deep SORT", color="green")
    plt.axhline(
        y=precision_threshold,
        color="r",
        linestyle="--",
        label=f"Precision Threshold = {precision_threshold}",
    )
    plt.xlabel("Frame")
    plt.ylabel("Euclidean Distance")
    plt.title(f"Precision per frame for Sequence {seq_num}")
    plt.legend()
    plt.grid()

    plt.tight_layout()
    plt.savefig(f"{tracking_result_directory}/Precision per frame for Sequence {seq_num}", dpi=300, bbox_inches='tight')
    plt.show()


if __name__ == "__main__":
    base_gt_directory = "data/"
    gt_filename = "groundtruth.txt"
    tracking_result_directory = "results/3_improved/"

    precision_threshold = 25  # Pixels
    iou_threshold = 0.5  # IoU threshold
    seq_list = [1,2,3,4,5]
    only_tm_iou_list, only_tm_precision_list, kf_iou_list, kf_precision_list = [], [], [], []

    for seq_num in seq_list:
        ground_truth_file = (
            os.path.join(base_gt_directory, f"seq{seq_num}/") + gt_filename
        )

        # YOLO
        tracking_result_file = os.path.join(
            tracking_result_directory, f"trackresults_improved_seq{seq_num}.txt"
        )
        precision, success, iou_list, precision_list = evaluate_tracking(
            ground_truth_file, tracking_result_file, precision_threshold, iou_threshold
        )
        print(
            f"YOLO + Deep SORT Success (IoU threshold = {iou_threshold}):            {success * 100:.2f}%"
        )
        print(
            f"YOLO + Deep SORT Precision (threshold = {precision_threshold} pixels):        {precision * 100:.2f}%"
        )

        avg_yolo_success = sum(iou_list) / len(iou_list)
        print(f"YOLO + Deep SORT Average Success:   {avg_yolo_success * 100:.2f}%")
        avg_yolo_precision = sum(precision_list) / len(precision_list)
        print(f"YOLO + Deep SORT Average Precision: {avg_yolo_precision * 100:.2f} pixels")

        plot_results(
            iou_list,
            precision_list,
            precision_threshold,
            iou_threshold,
            seq_num,
            tracking_result_directory,
        )


### Task 1.3 Visualization
Visualise the results as well. 

In [None]:
## put your code here: This function should be able to visualise this image and their tracking results
img_file = 'data/Task 1/seq2/img/00000002.jpg'

print('Tracking Results for this image using Template matching is (in xmin, ymin, width, height): ???')
## show image of visualised result of ground truth and track results

print('Tracking Results for this image using Object Detection is(in xmin, ymin, width, height): ???')
## show image of visualised result of ground truth and track results

Display the GT (blue), TM (green), and ODA (red) bbox results for the second frame in all scenes.
__Please press any key to view the next sample image.__

In [None]:
# Define the base directories for input and output
base_input_directory = 'data/'  # Path to the image directory
base_result_directory = 'results/1_template_matching'  # Path to the result directory for template matching
object_detection_result_directory = 'results/2_objectdetection_withassociation'  # Path to the result directory for object detection
groundtruth_directory = 'data/'  # Path to the ground truth directory

# List of sequence numbers
seq_list = [1, 2, 3, 4, 5]

# Loop through each sequence
for seq_num in seq_list:
    # Define file paths based on the sequence number
    img_file = os.path.join(base_input_directory, f"seq{seq_num}/img/00000002.jpg")
    print(img_file)
    template_matching_result_file = os.path.join(base_result_directory, f"trackresults_TM_ATU_KF_seq{seq_num}.txt")
    object_detection_result_file = os.path.join(object_detection_result_directory, f"trackresults_ODA_seq{seq_num}.txt")
    groundtruth_file = os.path.join(groundtruth_directory, f"seq{seq_num}/groundtruth.txt")

    # Read the image
    img = cv2.imread(img_file)

    # Function to read the second line (index 1) from a file and parse it
    def read_bounding_box(file_path):
        with open(file_path, 'r') as f:
            lines = f.readlines()
            # The bounding box is in the format [x, y, width, height]
            bbox = eval(lines[1].strip())  # Safely convert string '[x, y, width, height]' to a list
        return bbox

    # Read the tracking results from Template Matching and Object Detection
    template_matching_bbox = read_bounding_box(template_matching_result_file)
    object_detection_bbox = read_bounding_box(object_detection_result_file)

    # Read the ground truth bounding box
    groundtruth_bbox = read_bounding_box(groundtruth_file)

    # Draw the bounding boxes
    # Ground truth in red (BGR color: [0, 0, 255])
    cv2.rectangle(img, (groundtruth_bbox[0], groundtruth_bbox[1]), 
                  (groundtruth_bbox[0] + groundtruth_bbox[2], groundtruth_bbox[1] + groundtruth_bbox[3]), 
                  (0, 0, 255), 1)
    # Label the Ground Truth
    cv2.putText(img, "GT", (groundtruth_bbox[0], groundtruth_bbox[1] - 10), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

    # Template Matching result in green (BGR color: [0, 255, 0])
    cv2.rectangle(img, (template_matching_bbox[0], template_matching_bbox[1]), 
                  (template_matching_bbox[0] + template_matching_bbox[2], template_matching_bbox[1] + template_matching_bbox[3]), 
                  (0, 255, 0), 1)
    # Label the Template Matching
    cv2.putText(img, "   TM", (template_matching_bbox[0], template_matching_bbox[1] - 10), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # Object Detection result in blue (BGR color: [255, 0, 0])
    cv2.rectangle(img, (object_detection_bbox[0], object_detection_bbox[1]), 
                  (object_detection_bbox[0] + object_detection_bbox[2], object_detection_bbox[1] + object_detection_bbox[3]), 
                  (255, 0, 0), 1)
    # Label the Object Detection
    cv2.putText(img, "      ODA", (object_detection_bbox[0], object_detection_bbox[1] - 10), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)

    # Display the image in a new window
    cv2.imshow(f"Tracking Results - Seq{seq_num}", img)

    # Wait until a key is pressed, then close the window
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Print out the results
    print(f"Tracking Results for sequence {seq_num}:")
    print(f"Template matching: {template_matching_bbox}")
    print(f"Object Detection: {object_detection_bbox}")
    print(f"Ground truth: {groundtruth_bbox}")


Generate comparison videos of the perceived GT bboxes and the used detection tracking scheme for all sequences.

In [None]:
def draw_bbox_on_image(image, bbox, r, g, b, label, thickness=1):
    """Draw a bounding box on the image and draw a red dot at the center"""
    x, y, w, h = map(int, bbox)
    center_x, center_y = x + w // 2, y + h // 2
    
    # Draw bbox
    color = (r, g, b)
    cv2.rectangle(image, (x, y), (x + w, y + h), color, thickness)

    # Label the Template Matching
    cv2.putText(image, label, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness)
    
    # Draw the center point
    cv2.circle(image, (center_x, center_y), radius=4, color=(r, g, b), thickness=-1)
    
    return image

def process_images(seq_num, base_input_directory, base_output_directory):
    img_dir = os.path.join(base_input_directory, f"seq{seq_num}", "img")
    gt_file = os.path.join(base_input_directory, f"seq{seq_num}", "groundtruth.txt")
    tm_file = os.path.join(f"results/1_template_matching", f"trackresults_TM_ATU_KF_seq{seq_num}.txt")
    oda_file = os.path.join(f"results/2_objectdetection_withassociation", f"trackresults_ODA_seq{seq_num}.txt")
    imp_file = os.path.join(f"results/3_improved", f"trackresults_improved_seq{seq_num}.txt")
    
    output_video_tm = os.path.join(base_output_directory, f"1_template_matching", f"comparison_video_TM_seq{seq_num}.avi")
    output_video_oda = os.path.join(base_output_directory, f"2_objectdetection_withassociation", f"comparison_video_ODA_seq{seq_num}.avi")
    output_video_imp = os.path.join(base_output_directory, f"3_improved", f"comparison_video_improved_seq{seq_num}.avi")
    
    # Read groundtruth.txt
    with open(gt_file, 'r') as f:
        gt_bboxes = [line.strip().split(',') for line in f.readlines()]

    # Read trackresults_TM_ATU_KF_seq.txt
    with open(tm_file, 'r') as f:
        tm_bboxes = [line.strip().split(',') for line in f.readlines()]
    
    # Read trackresults_ODA_seqX.txt
    with open(oda_file, 'r') as f:
        oda_bboxes = [line.strip().split(',') for line in f.readlines()]

    # Read trackresults_improved_seqX.txt
    with open(imp_file, 'r') as f:
        imp_bboxes = [line.strip().split(',') for line in f.readlines()]
    
    # Get all image files
    img_files = sorted([f for f in os.listdir(img_dir) if f.endswith('.jpg')])
    
    if len(img_files) != len(gt_bboxes):
        raise ValueError("The number of images does not match the number of lines in groundtruth.txt")
    if len(img_files) != len(tm_bboxes):
        raise ValueError(f"The number of images does not match the number of lines in trackresults_TM_ATU_KF_seq{seq_num}.txt")
    if len(img_files) != len(oda_bboxes):
        raise ValueError(f"The number of images does not match the number of lines in trackresults_ODA_seq{seq_num}.txt")
    if len(img_files) != len(imp_bboxes):
        raise ValueError(f"The number of images does not match the number of lines in trackresults_improved_seq{seq_num}.txt")
    
    # Read the first image to get the size
    first_img = cv2.imread(os.path.join(img_dir, img_files[0]))
    height, width, _ = first_img.shape
    
    # Initialize video writers
    fourcc = cv2.VideoWriter_fourcc(*'FFV1')  # Use FFV1 encoding (no compression)
    
    video_writer_tm = cv2.VideoWriter(output_video_tm, fourcc, 30, (width, height))
    video_writer_oda = cv2.VideoWriter(output_video_oda, fourcc, 30, (width, height))
    video_writer_imp = cv2.VideoWriter(output_video_imp, fourcc, 30, (width, height))
    
    for img_file, gt_bbox, tm_bbox, oda_bbox, imp_bbox in zip(img_files, gt_bboxes, tm_bboxes, oda_bboxes, imp_bboxes):
        img_path = os.path.join(img_dir, img_file)
        img = cv2.imread(img_path)
        
        if img is None:
            print(f"Warning: Unable to read {img_path}")
            continue
        
        # Draw bounding boxes and labels for Template Matching (TM)
        img_tm = img.copy()
        img_tm = draw_bbox_on_image(img_tm, gt_bbox, 0, 255, 255, "GT")
        img_tm = draw_bbox_on_image(img_tm, tm_bbox, 255, 255, 0, "TM")
        
        # Draw bounding boxes and labels for Object Detection (ODA)
        img_oda = img.copy()
        img_oda = draw_bbox_on_image(img_oda, gt_bbox, 0, 255, 255, "GT")
        img_oda = draw_bbox_on_image(img_oda, oda_bbox, 255, 0, 0, "ODA")

        # Draw bounding boxes and labels for Object Detection (YOLO)
        img_imp = img.copy()
        img_imp = draw_bbox_on_image(img_imp, gt_bbox, 0, 255, 255, "GT")
        img_imp = draw_bbox_on_image(img_imp, imp_bbox, 255, 0, 0, "YOLO")
        
        # Write the images to their respective video files
        video_writer_tm.write(img_tm)
        video_writer_oda.write(img_oda)
        video_writer_imp.write(img_imp)
    
    # Release the video writers
    video_writer_tm.release()
    video_writer_oda.release()
    video_writer_imp.release()
    
    print(f"Videos for seq{seq_num} have been saved:")
    print(f"  Template Matching Video: {output_video_tm}")
    print(f"  Object Detection Video: {output_video_oda}")
    print(f"  Object Detection Video: {output_video_imp}")

if __name__ == "__main__":
    base_input_directory = "data"
    base_output_directory = "results"
    
    seq_list = [1, 2, 3, 4, 5]
    
    for seq_num in seq_list:
        process_images(seq_num, base_input_directory, base_output_directory)
