### Executive Summary: Integration Test for Bytetrack and Yolo model

This is the notebook for generation the source ground of truth that will be reference for integration tests 

1. **Video Frame Extraction**: 
   The 2 videos are decomposed into frames, into which we apply the object detection and tracking.

2. **output storage**: 
   The tracking object frames are stored as a text file into the expected output folder


In [None]:
%load_ext autoreload
%autoreload 2
import glob
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd

# YOLO and video packages 
from ultralytics import YOLO
from bytetracker import BYTETracker
from bytetracker.basetrack import BaseTrack
from IPython.display import Video

#### reading 2 videos

In [None]:
# Download the video
VIDEO_PATH_1 = 'videos/car_town.mp4'
VIDEO_PATH_2 = 'videos/market.mp4'
!if [ ! -f $VIDEO_PATH_1 ]; then mkdir -p videos && wget https://storage.googleapis.com/bytetrack-data-public/car_town.mp4 -O $VIDEO_PATH_1; fi
!if [ ! -f $VIDEO_PATH_2 ]; then mkdir -p videos && wget https://storage.googleapis.com/bytetrack-data-public/market.mp4 -O $VIDEO_PATH_2; fi

In [None]:
Video(VIDEO_PATH_1, width=800,embed=True)

In [None]:
Video(VIDEO_PATH_2, width=800,embed=True)

#### Yolo model and bytetrack preparation

In [None]:
### We will track only car 
CAR_CLASS_ID = 2
FRUIT_CLASS_ID = 52
PEOPLE_CLASS_ID = 0

In [None]:
MODEL_WEIGHTS = "yolov8m.pt"
model = YOLO(MODEL_WEIGHTS, task="detect")

In [None]:
tracker = BYTETracker(track_thresh= 0.15, track_buffer = 3, match_thresh = 0.85, frame_rate= 12)
BaseTrack._count = 0

#### Prediction, tracking and storage of results for first video

In [None]:
# VIDEO 1 to frames
!mkdir -p frames && ffmpeg -i $VIDEO_PATH_1 -vf fps=12 frames/video_1_%d.png -hide_banner -loglevel panic

In [None]:
available_frames_1 = glob.glob("frames/video_1_*.png")
available_frames_1 = sorted(available_frames_1, key=lambda x: int(x.split("_")[-1].split(".")[0]))

In [None]:
all_tracked_objects_1  = []
for frame_id, image_filename in enumerate(available_frames_1):
    img = cv2.imread(image_filename)
    detections = model.predict(img, classes=[CAR_CLASS_ID], conf=0.15, verbose=False)[0]
    detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)
    tracked_objects = tracker.update(detections_bytetrack_format, frame_id)
    if len(tracked_objects) > 0:
        tracked_objects = np.insert(tracked_objects, 0, frame_id, axis=1)
        all_tracked_objects_1.append(tracked_objects)

In [None]:
# Define the file path
file_path_1 = "expected_output/tracking_results_video1.txt"

# Write the array contents to the text file
with open(file_path_1, "w") as file:
    for frame in all_tracked_objects_1:
        for row in frame:
            row_str = "\t".join(map(str, row))
            file.write(row_str + "\n")
        file.write("\n")

In [None]:
!mkdir -p frames && ffmpeg -i $VIDEO_PATH_2 -vf fps=12 frames/video_2_%d.png -hide_banner -loglevel panic

In [None]:
available_frames_2 = glob.glob("frames/video_2_*.png")
available_frames_2 = sorted(available_frames_2, key=lambda x: int(x.split("_")[-1].split(".")[0]))

#### Prediction, tracking and storage of results for second video

In [None]:
# VIDEO 1 to frames
!mkdir -p frames && ffmpeg -i $VIDEO_PATH_2 -vf fps=12 frames/video_2_%d.png -hide_banner -loglevel panic

In [None]:
available_frames_2 = glob.glob("frames/video_2_*.png")
available_frames_2 = sorted(available_frames_2, key=lambda x: int(x.split("_")[-1].split(".")[0]))

In [None]:
all_tracked_objects_2  = []
for frame_id, image_filename in enumerate(available_frames_2):
    img = cv2.imread(image_filename)
    detections = model.predict(img, classes=[FRUIT_CLASS_ID], conf=0.15, verbose=False)[0]
    detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)
    tracked_objects = tracker.update(detections_bytetrack_format, frame_id)
    if len(tracked_objects) > 0:
        tracked_objects = np.insert(tracked_objects, 0, frame_id, axis=1)
        all_tracked_objects_2.append(tracked_objects)

In [None]:
# Define the file path
file_path_2 = "expected_output/tracking_results_video2.txt"

# Write the array contents to the text file
with open(file_path_2, "w") as file:
    for frame in all_tracked_objects_2:
        for row in frame:
            row_str = "\t".join(map(str, row))
            file.write(row_str + "\n")
        file.write("\n")