### Executive Summary: Integration Test for Bytetrack and Yolo model

This is the notebook for generation the source ground of truth and input that will be reference for integration tests 

1. **Preparation of expected output for the test**: 
   The 2 videos are decomposed into frames, into which we apply the object detection and tracking. We store the frames with tracking under **expected_output** folder. This will be the source of ground truth for the test.

2. **Preparation of inputs for the test**: 
   The 2 videos are decomposed into frames, into which we apply only the object detection. We store the frames with detection under **test_input** folder. This will be the input of bytetracker update method for the test in _test_model_regression.py_ script.


In [None]:
%load_ext autoreload
%autoreload 2
import glob
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd

# YOLO and video packages 
from ultralytics import YOLO
from bytetracker import BYTETracker
from bytetracker.basetrack import BaseTrack
from IPython.display import Video

In [None]:
def yolo_results_to_bytetrack_format(detections):
    """Transforms YOLO detections into the bytetrack format.

    Args:
        detections: A list of YOLO detections.

    Returns:
        A list of bytetrack detections.
    """
    boxes = detections.numpy().boxes.xyxyn
    scores = detections.numpy().boxes.conf
    classes = detections.numpy().boxes.cls
    return np.stack(
        [
            boxes[:, 0],
            boxes[:, 1],
            boxes[:, 2],
            boxes[:, 3],
            scores,
            classes,
        ],
        axis=1,
    )

#### reading 2 videos

In [None]:
# Download the video
VIDEO_PATH_1 = 'videos/traffic.mp4'
VIDEO_PATH_2 = 'videos/fruit.mp4'
!if [ ! -f $VIDEO_PATH_1 ]; then mkdir -p videos && wget https://storage.googleapis.com/bytetrack-data-public/traffic.mp4 -O $VIDEO_PATH_1; fi
!if [ ! -f $VIDEO_PATH_2 ]; then mkdir -p videos && wget https://storage.googleapis.com/bytetrack-data-public/fruit.mp4 -O $VIDEO_PATH_2; fi

In [None]:
Video(VIDEO_PATH_1, width=800,embed=True)

In [None]:
Video(VIDEO_PATH_2, width=800,embed=True)

#### Yolo model and bytetrack preparation

In [None]:
### We will track only car 
CAR_CLASS_ID = 2
FRUIT_CLASS_ID = 49
PEOPLE_CLASS_ID = 0

In [None]:
MODEL_WEIGHTS = "yolov8m.pt"
model = YOLO(MODEL_WEIGHTS, task="detect")

In [None]:
tracker = BYTETracker(track_thresh= 0.15, track_buffer = 3, match_thresh = 0.85, frame_rate= 12)
BaseTrack._count = 0

#### First video

In [None]:
# VIDEO 1 to frames
!mkdir -p frames && ffmpeg -i $VIDEO_PATH_1 -vf fps=12 frames/video_1_%d.png -hide_banner -loglevel panic

In [None]:
available_frames_1 = glob.glob("frames/video_1_*.png")
available_frames_1 = sorted(available_frames_1, key=lambda x: int(x.split("_")[-1].split(".")[0]))

#### generating the first video expected output

In [None]:
all_tracked_objects_1  = []
for frame_id, image_filename in enumerate(available_frames_1):
    img = cv2.imread(image_filename)
    detections = model.predict(img, classes=[CAR_CLASS_ID], conf=0.15, verbose=False)[0]
    detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)
    tracked_objects = tracker.update(detections_bytetrack_format, frame_id)
    if len(tracked_objects) > 0:
        tracked_objects = np.insert(tracked_objects, 0, frame_id, axis=1)
        all_tracked_objects_1.append(tracked_objects)

In [None]:
output_file_path = "expected_output/objects_detected_and_tracked_video1.txt"

with open(output_file_path, "w") as file:
    for array in all_tracked_objects_1:
        for row in array:
            line = " ".join(map(str, row)) + "\n"
            file.write(line)


#### generating the first video detection object frames, as input for integration test 

In [None]:
all_detections_by_frame_1 = []

for frame_id, image_filename in enumerate(available_frames_1):
    img = cv2.imread(image_filename)
    detections = model.predict(img, classes=[CAR_CLASS_ID], conf=0.15, verbose=False)[0]
    detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)
    all_detections_by_frame_1.append((frame_id, detections_bytetrack_format))


In [None]:
detections_file_path = "test_input/objects_detected_video1.txt"

with open(detections_file_path, "w") as file:
    for frame_id, detections_bytetrack_format in all_detections_by_frame_1:
        for detection in detections_bytetrack_format:
            line = " ".join(map(str, [frame_id] + list(detection))) + "\n"
            file.write(line)

#### Second video

In [None]:
# VIDEO 2 to frames
!mkdir -p frames && ffmpeg -i $VIDEO_PATH_2 -vf fps=12 frames/video_2_%d.png -hide_banner -loglevel panic

In [None]:
available_frames_2 = glob.glob("frames/video_2_*.png")
available_frames_2 = sorted(available_frames_2, key=lambda x: int(x.split("_")[-1].split(".")[0]))

#### generating the second video expected output

In [None]:
all_tracked_objects_2  = []
for frame_id, image_filename in enumerate(available_frames_2):
    img = cv2.imread(image_filename)
    detections = model.predict(img, classes=[FRUIT_CLASS_ID], conf=0.15, verbose=False)[0]
    detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)
    tracked_objects = tracker.update(detections_bytetrack_format, frame_id)
    if len(tracked_objects) > 0:
        tracked_objects = np.insert(tracked_objects, 0, frame_id, axis=1)
        all_tracked_objects_2.append(tracked_objects)

In [None]:
output_file_path = "expected_output/objects_detected_and_tracked_video2.txt"

with open(output_file_path, "w") as file:
    for array in all_tracked_objects_2:
        for row in array:
            line = " ".join(map(str, row)) + "\n"
            file.write(line)

#### generating the second video detection object frames, as input for integration test 

In [None]:
all_detections_by_frame_2 = []

for frame_id, image_filename in enumerate(available_frames_2):
    img = cv2.imread(image_filename)
    detections = model.predict(img, classes=[FRUIT_CLASS_ID], conf=0.15, verbose=False)[0]
    detections_bytetrack_format = yolo_results_to_bytetrack_format(detections)
    if len(detections_bytetrack_format) > 0:
        all_detections_by_frame_2.append((frame_id, detections_bytetrack_format))

In [None]:
detections_file_path = "test_input/objects_detected_video2.txt"

with open(detections_file_path, "w") as file:
    for frame_id, detections_bytetrack_format in all_detections_by_frame_2:
        for detection in detections_bytetrack_format:
            line = " ".join(map(str, [frame_id] + list(detection))) + "\n"
            file.write(line)