# 📚 Import Libraries

In [None]:
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import glob
import shutil
import sys
sys.path.append('../input/tensorflow-great-barrier-reef')
import torch
from PIL import Image
import ast

## Please Upvote if you find this Helpful

In [None]:
ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'
MODEL_DIR = '/kaggle/input/barrier-models'

CKPT_PATHS = [f'{MODEL_DIR}/yolov5l6_epoch19_video1_r_708_p_868_m_38_s_642_size_3100_all_data.pt',
              f'../input/asnorkin-gbreef-yolov5/yolov5l6_2s1_gbr_video_v0_fold2_obj8.0_yanc_strong_noise_enhance_fixedv6_1280_4b10e.pt',
            ]

AUGMENT    = [True, False]
IMG_SIZES  = [3100, 1920]
CONFS      = [0.15, 0.2]
IOUS       = [0.3, 0.3]
WEIGHTS    = [0.55, 0.45]
PRED_CONF  = 0.15

TRACKING = True

In [None]:
def get_path(row):
    row['image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

In [None]:
def voc2yolo(bboxes, image_height=720, image_width=1280):
    """
    voc  => [x1, y1, x2, y1]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes

def yolo2voc(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

def coco2yolo(bboxes, image_height=720, image_width=1280):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normolizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # converstion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes

def voc2coco(bboxes, image_height=720, image_width=1280):
    bboxes  = voc2yolo(bboxes, image_height, image_width)
    bboxes  = yolo2coco(bboxes, image_height, image_width)
    return bboxes


def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness)
            
    elif bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')

    return image

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row

np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

In [None]:
def to_uint8(img):
    return np.clip(img, 0, 255).astype(np.uint8)


def channel_stretching(img):
    I_min = np.min(img)
    I_max = np.max(img)
    I_mean = np.mean(img)
    return (img - I_min) * (1 / max(1, (I_max - I_min)))


def enchance(img):
    # TO HSV
    hsv_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)

    # Histogram equalisation on the V-channel
    hsv_img[:, :, 2] = cv2.equalizeHist(hsv_img[:, :, 2])

    # CLAHE
    h, s, v = hsv_img[:, :, 0], hsv_img[:, :, 1], hsv_img[:, :, 2]
    clahe = cv2.createCLAHE(clipLimit=15.0, tileGridSize=(20, 20))
    v = clahe.apply(v)

    # HSVStretching
    s = channel_stretching(s)
    v = channel_stretching(v)

    # TO RGB
    hsv_img = np.dstack((h, s, v))
    hsv_img = to_uint8(hsv_img)
    out_img = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB)
    out_img = to_uint8(out_img)

    # Gamma correction
    R = 255.0
    out_img = (R * np.power(img.astype(np.uint32) / R, 1.5))

    return to_uint8(out_img)

In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
def load_model(ckpt_path, conf=0.25, iou=0.50):
    model = torch.hub.load('/kaggle/input/yolov5-lib-ds',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=True)  # local repo
    model.conf = conf  # NMS confidence threshold
    model.iou  = iou  # NMS IoU threshold
    model.classes = None   # (optional list) filter by class, i.e. = [0, 15, 16] for persons, cats and dogs
    model.multi_label = False  # NMS multiple labels per box
    model.max_det = 1000  # maximum number of detections per image
    return model

# 🔭 Inference

## Helper

In [None]:
def predict(model, img, size=768, augment=False, idx=0):
    height, width = img.shape[:2]
    results = model(img, size=size)  # custom inference size
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes):
        bboxes  = voc2coco(bboxes,height,width).astype(int)
        confs   = preds.confidence.values
        return bboxes, confs
    else:
        return [],[]
    
def format_prediction(bboxes, confs):
    annot = ''
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
        annot = annot.strip(' ')
    return annot

def show_img(img, bboxes, bbox_format='yolo'):
    names  = ['starfish']*len(bboxes)
    labels = [0]*len(bboxes)
    img    = draw_bboxes(img = img,
                           bboxes = bboxes, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = bbox_format,
                           line_thickness = 2)
    return Image.fromarray(img).resize((800, 400))

## WBF

In [None]:
import sys; sys.path.append('/kaggle/input/weightedboxesfusion/')

In [None]:
ROOT_DIR = '../input/tensorflow-great-barrier-reef'

In [None]:
def prepare_boxes(bboxes_1):
    
    bboxes_1_coco = bboxes_1.copy() 
    if len(bboxes_1) > 0:
        bboxes_1_coco[:,3] = bboxes_1[:,3]+bboxes_1[:,1]
        bboxes_1_coco[:,2] = bboxes_1[:,2]+bboxes_1[:,0]
    return bboxes_1_coco

In [None]:
def get_path(row):
    row['image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

# Train Data
df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df = df.progress_apply(get_path, axis=1)
# Train Data
df['old_image_path'] = f'{ROOT_DIR}/train_images/video_'+df.video_id.astype(str)+'/'+df.video_frame.astype(str)+'.jpg'
df['annotations'] = df['annotations'].progress_apply(eval)
display(df.head(2))
df['num_bbox'] = df['annotations'].progress_apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts()/len(df)*100

In [None]:
from ensemble_boxes import *

def run_wbf(bboxes, confs, image_size=512, iou_thr=0.55, skip_box_thr=0.7, weights=None):
    boxes =  [bbox/(image_size-1) for bbox in bboxes]
    scores = [conf for conf in confs]    
    labels = [np.ones(conf.shape[0]) for conf in confs]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels

In [None]:


model_1 = load_model(CKPT_PATHS[0], conf=CONFS[0], iou=IOUS[0])
model_2 = load_model(CKPT_PATHS[1], conf=CONFS[1], iou=IOUS[1])

image_paths = df[df.num_bbox>1].sample(100).image_path.tolist()
for idx, path in enumerate(image_paths):
    image_np = cv2.imread(path)[...,::-1]
    
    bboxes_1, confis_1 = predict(model_1, image_np, size=IMG_SIZES[0], augment=AUGMENT)   
    bboxes_2, confis_2 = predict(model_2, image_np, size=IMG_SIZES[1], augment=AUGMENT)
    boxes = [prepare_boxes(bb) for bb in [bboxes_1, bboxes_2]]
    confs = [c for i,c in enumerate([confis_1, confis_2]) if len(boxes[i])>0]
    boxes = [b for b in boxes if len(b)>0]
    
    bboxes = []
    scores = []
    
    if len(boxes)>0:
        bboxes, scores, labels = run_wbf(boxes, confs, 
                                             image_size = 1280, iou_thr=0.6, skip_box_thr=0.05, weights=WEIGHTS)
        if len(bboxes)>0:
            bboxes[:,3] = bboxes[:,3] - bboxes[:,1]
            bboxes[:,2] = bboxes[:,2] - bboxes[:,0]

            bboxes = bboxes.round().astype(int).tolist()
    
    annot          = format_prediction(bboxes, scores)
    print('\n\nEnsemble (WBF) Predictions: ')
    display(show_img(image_np, bboxes, bbox_format='coco'))
    
    if idx>5:
        break

## Tracking

In [None]:
if TRACKING:
    # norfair dependencies
    %cd /kaggle/input/norfair031py3/
    !pip install commonmark-0.9.1-py2.py3-none-any.whl -f ./ --no-index
    !pip install rich-9.13.0-py3-none-any.whl

    !mkdir /kaggle/working/tmp
    !cp -r /kaggle/input/norfair031py3/filterpy-1.4.5/filterpy-1.4.5/ /kaggle/working/tmp/
    %cd /kaggle/working/tmp/filterpy-1.4.5/
    !pip install .
    !rm -rf /kaggle/working/tmp

    # norfair
    %cd /kaggle/input/norfair031py3/
    !pip install norfair-0.3.1-py3-none-any.whl -f ./ --no-index
    %cd /kaggle/working
    
    import cv2
    import numpy as np
    from norfair import Detection, Tracker
    from norfair.tracker import TrackedObject

    # Helper to convert bbox in format [x_min, y_min, x_max, y_max, score] to norfair.Detection class
    def to_norfair(detects, frame_id):
        result = []
        for x_min, y_min, x_max, y_max, score in detects:
            xc, yc = (x_min + x_max) / 2, (y_min + y_max) / 2
            w, h = x_max - x_min, y_max - y_min
            result.append(
                Detection(
                    points=np.array([xc, yc]), 
                    scores=np.array([score]), 
                    data=np.array([w, h, frame_id])
                )
            )

        return result

    # Euclidean distance function to match detections on this frame with tracked_objects from previous frames
    def euclidean_distance(detection, tracked_object):
        return np.linalg.norm(detection.points - tracked_object.estimate)


    def calculate_homography(image, next_image, min_matches=21, topk=100, alpha=0.7, algo="orb", debug=False):
        if algo == "sift":
            descriptor = cv2.SIFT_create()
        elif algo == "orb":
            descriptor = cv2.ORB_create()
        else:
            raise ValueError(f"Unexpected algorithm: {algo}")

        # find the keypoints and descriptors
        kp1, des1 = descriptor.detectAndCompute(image, None)
        kp2, des2 = descriptor.detectAndCompute(next_image, None)
        des1 = np.float32(des1)
        des2 = np.float32(des2)

        FLANN_INDEX_KDTREE = 1
        index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
        search_params = dict(checks=50)
        flann = cv2.FlannBasedMatcher(index_params, search_params)
        matches = flann.knnMatch(des1, des2, k=2)

        scores = [m.distance / n.distance for m, n in matches]
        indices = np.argsort(scores)
        good = [matches[i][0] for i in indices[:topk] if scores[i] < alpha]
        if len(good) >= min_matches:
            src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
            dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
            M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
            matchesMask = mask.ravel().tolist()
        else:
            print(f"Not enough matches: {len(good)} for {algo}")
            M, matchesMask = None, None

            # Try to improve using SIFT
            if algo == "orb":
                M, kp1, kp2, good, matchesMask = calculate_homography(
                    image, next_image, min_matches=11, topk=topk, alpha=alpha, algo="sift", debug=True)

        if debug:
            return M, kp1, kp2, good, matchesMask

        return M


    def transform(points, H):
        points = np.asarray(points, dtype=float).reshape(1, -1, 2)
        return cv2.perspectiveTransform(points, H)[0]


    def split(bboxes, confs, confthresh):
        weak_detects = [[x, y, x + w, y + h, conf] for (x, y, w, h), conf in zip(bboxes, confs)]
        _bboxes, _confs = [], []
        for bb, conf in zip(bboxes, confs):
            if conf >= confthresh:
                _bboxes.append(bb)
                _confs.append(conf)
        bboxes, confs = np.asarray(_bboxes), np.asarray(_confs)
        return bboxes, confs, weak_detects


    class HomographyEstimator:
        def __init__(self, min_matches=21, topk=100, alpha=0.7):
            self.min_matches = min_matches
            self.topk = topk
            self.alpha = alpha

            self.prev_frame = None

        def estimate(self, frame):
            H = None
            if self.prev_frame is not None:
                try:
                    H = calculate_homography(
                        self.prev_frame, 
                        frame, 
                        min_matches=self.min_matches, 
                        topk=self.topk, 
                        alpha=self.alpha
                    )
                except Exception as e:
                    print(f"Failed to calculate homography: {e}")

            self.prev_frame = frame

            return H


    class MatchingCOTSTracker(Tracker):
        def weak_update(self, detections=None, period=1, confthresh=0.0):
            self.period = period

            # Remove stale trackers and make candidate object real if it has hit inertia
            self.tracked_objects = [o for o in self.tracked_objects if o.has_inertia]

            # Update tracker
            for obj in self.tracked_objects:
                obj.tracker_step()

            # Update initialized tracked objects with detections
            unmatched_detections = self.update_objects_in_place(
                [o for o in self.tracked_objects if not o.is_initializing], detections
            )

            # Filter out detections with confidence < confthresh
            unmatched_detections = [
                d for d in unmatched_detections if d.scores[0] >= confthresh
            ]

            # Update not yet initialized tracked objects with yet unmatched detections
            unmatched_detections = self.update_objects_in_place(
                [o for o in self.tracked_objects if o.is_initializing], unmatched_detections
            )

            # Create new tracked objects from remaining unmatched detections
            for detection in unmatched_detections:
                self.tracked_objects.append(
                    TrackedObject(
                        detection,
                        self.hit_inertia_min,
                        self.hit_inertia_max,
                        self.initialization_delay,
                        self.detection_threshold,
                        self.period,
                        self.point_transience,
                        self.filter_setup,
                    )
                )

            return [p for p in self.tracked_objects if not p.is_initializing]

        def update(self, detections, frame_id, H=None, period=1, frame_width=1280, frame_height=720, confthresh=0.0):
            if H is not None:
                self.shift_tracks(H)

            detections = to_norfair(detections, frame_id)
    #         tracked_objects = super().update(detections, period)
            tracked_objects = self.weak_update(detections, period, confthresh)

            def _inside(tobj):
                w, h, _ = tobj.last_detection.data
                xc, yc = tobj.estimate[0]

                alpha = 0.5  # Part should be inside
                dw = alpha / 2 * w
                dh = alpha / 2 * h
                x_inside = 0 < xc - dw and xc + dw < frame_width
                y_inside = 0 < yc - dh and yc + dh < frame_height
                return x_inside and y_inside

            result = []
            for tobj in tracked_objects:
                w, h, last_detected_frame_id = tobj.last_detection.data
                conf = tobj.last_detection.scores[0]
                frames_diff = frame_id - last_detected_frame_id
                old_case = (frames_diff in {1, 2} and _inside(tobj))
                new_case = (conf < confthresh) and (frames_diff == 0)
                if old_case or new_case:
                    xc, yc = tobj.estimate[0]
                    x_min, y_min = int(round(xc - w / 2)), int(round(yc - h / 2))
                    bbox = [x_min, y_min, int(w), int(h), conf]
                    result.append(bbox)

            return np.asarray(result)

        def shift_tracks(self, H):
            for i, tobj in enumerate(self.tracked_objects):
                new_xc, new_yc = transform(tobj.estimate, H)[0]
                self.tracked_objects[i].filter.x[0] = new_xc
                self.tracked_objects[i].filter.x[1] = new_yc
else:
    print("No Tracking")

## Init `Env`

In [None]:
import greatbarrierreef
env = greatbarrierreef.make_env()# initialize the environment
iter_test = env.iter_test()      # an iterator which loops over the test set and sample submission

## Run Inference on **Test**

In [None]:
submission_dict = {
    'id': [],
    'prediction_string': [],
}
model_1 = load_model(CKPT_PATHS[0], conf=CONFS[0], iou=IOUS[0])
model_2 = load_model(CKPT_PATHS[1], conf=CONFS[1], iou=IOUS[1])

if TRACKING:
    tracker_params = {
        "distance_function": euclidean_distance,
        "distance_threshold": 20,
        "hit_inertia_min": 3,
        "hit_inertia_max": 6,
        "initialization_delay": 1,
    }

    homography = HomographyEstimator()
    tracker = MatchingCOTSTracker(**tracker_params)

for idx, (image_np, sample_prediction_df) in enumerate(iter_test):
    bboxes_1, confis_1 = predict(model_1, image_np, size=IMG_SIZES[0], augment=AUGMENT[0])   
    bboxes_2, confis_2 = predict(model_2, enchance(image_np), size=IMG_SIZES[1], augment=AUGMENT[1])
    boxes = [prepare_boxes(bb) for bb in [bboxes_1, bboxes_2]]
    confs = [c for i,c in enumerate([confis_1, confis_2]) if len(boxes[i])>0]
    boxes = [b for b in boxes if len(b)>0]
    
    bboxes = []
    scores = []
    
    if len(boxes)>0:
        bboxes, scores, labels = run_wbf(boxes, confs, 
                                             image_size = 1280, iou_thr=0.6, skip_box_thr=0.05, weights=WEIGHTS)
        if len(bboxes)>0:
            bboxes[:,3] = bboxes[:,3] - bboxes[:,1]
            bboxes[:,2] = bboxes[:,2] - bboxes[:,0]

#             bboxes = bboxes.round().astype(int).tolist()   
            bboxes = bboxes.astype(int).tolist()
            scores = scores.tolist()
        else:
            bboxes, scores = [], []
            
    if TRACKING:
        try:
            H = homography.estimate(image_np)
            if H is None:  # Reset tracker if homography is broken or zero frame
                tracker = MatchingCOTSTracker(**tracker_params)

            imh, imw = image_np.shape[:2]
            detects = [[x, y, x + w, y + h, score] for (x, y, w, h), score in zip(bboxes, scores)]
            tracked = tracker.update(detects, idx, H, frame_width=imw, frame_height=imh, confthresh=0.0)  # Init all detections
            for x, y, w, h, score in tracked:
                bboxes.append([x, y, w, h])
                scores.append(score)
        except Exception as e:
            print(f"[WARNING] TRACKING FAILED: {e}")

    predictions = []
    for i in range(len(bboxes)):
        box = bboxes[i]        
        score = scores[i]
        if score < PRED_CONF:
            continue

        x_min = int(box[0])
        y_min = int(box[1])
        bbox_width = int(box[2])
        bbox_height = int(box[3])

        predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))

    prediction_str = ' '.join(predictions)
    
    sample_prediction_df['annotations'] = prediction_str
    env.predict(sample_prediction_df)
    if idx<3:
        display(show_img(image_np, bboxes, bbox_format='coco'))
        print('Prediction:', prediction_str)

# 👀 Check Submission

In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()

## Debug

In [None]:
# bboxes = [[0, 0, 20, 20], [50, 50, 70, 70]]
# scores = [0.5, 0.1]
# idx = 0

# detects = [[x, y, x + w, y + h, score] for (x, y, w, h), score in zip(bboxes, scores)]
# tracked = tracker.update(detects, idx, None, frame_width=imw, frame_height=imh, confthresh=0.0)
# for x, y, w, h, score in tracked:
#     bboxes.append([x, y, w, h])
#     scores.append(score)
    
# tracked

In [None]:
# bboxes = [[0, 0, 20, 20], [50, 50, 70, 70]]
# scores = [0.5, 0.35]
# idx = 1

# detects = [[x, y, x + w, y + h, score] for (x, y, w, h), score in zip(bboxes, scores)]
# tracked = tracker.update(detects, idx, None, frame_width=imw, frame_height=imh, confthresh=0.0)
# for x, y, w, h, score in tracked:
#     bboxes.append([x, y, w, h])
#     scores.append(score)
    
# tracked

In [None]:
# bboxes = [[0, 0, 20, 20], [50, 50, 70, 70]]
# scores = [0.2, 0.4]
# idx = 2

# detects = [[x, y, x + w, y + h, score] for (x, y, w, h), score in zip(bboxes, scores)]
# tracked = tracker.update(detects, idx, None, frame_width=imw, frame_height=imh, confthresh=0.0)
# for x, y, w, h, score in tracked:
#     bboxes.append([x, y, w, h])
#     scores.append(score)
    
# tracked

In [None]:
# bboxes = [[0, 0, 20, 20], [50, 50, 70, 70]]
# scores = [0.1, 0.6]
# idx = 3

# detects = [[x, y, x + w, y + h, score] for (x, y, w, h), score in zip(bboxes, scores)]
# tracked = tracker.update(detects, idx, None, frame_width=imw, frame_height=imh, confthresh=0.0)
# for x, y, w, h, score in tracked:
#     bboxes.append([x, y, w, h])
#     scores.append(score)
    
# tracked

In [None]:
# bboxes = [[50, 50, 70, 70]]
# scores = [0.6]
# idx = 4

# detects = [[x, y, x + w, y + h, score] for (x, y, w, h), score in zip(bboxes, scores)]
# tracked = tracker.update(detects, idx, None, frame_width=imw, frame_height=imh, confthresh=0.0)
# for x, y, w, h, score in tracked:
#     bboxes.append([x, y, w, h])
#     scores.append(score)
    
# tracked