In [1]:
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import sys
from itertools import groupby
import numpy as np
import cv2
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
from glob import glob
from multiprocessing import Pool
import matplotlib.pyplot as plt
# import cupy as cp
import ast
from pathlib import Path
import pickle5 as pickle
import torch

import shutil
import sys
sys.path.append("../src/")
sys.path.append("../yolov5/")

import util

from joblib import Parallel, delayed

from IPython.display import display, HTML
from sahi.model import Yolov5DetectionModel
from sahi.utils.cv import read_image
from sahi.utils.file import download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict
from ensemble_boxes import weighted_boxes_fusion

import albumentations as A

from IPython.display import Image
from matplotlib import animation, rc
rc('animation', html='jshtml')

In [3]:
from norfair import Detection, Tracker

# Helper to convert bbox in format [x_min, y_min, x_max, y_max, score] to norfair.Detection class
def to_norfair(detects, frame_id):
    result = []
    for x_min, y_min, x_max, y_max, score in detects:
        xc, yc = (x_min + x_max) / 2, (y_min + y_max) / 2
        w, h = x_max - x_min, y_max - y_min
        result.append(Detection(points=np.array([xc, yc]), scores=np.array([score]), data=np.array([w, h, frame_id])))
        
    return result

def euclidean_distance(detection, tracked_object):
    return np.linalg.norm(detection.points - tracked_object.estimate)

def tracking_function(tracker, frame_id, bboxes, scores, best_conf, num_prev_bbox = None):
    
    detects = []
    predictions = []
    
    if len(scores)>0:
        for i in range(len(bboxes)):
            # remember to check
            if scores[i] <= best_conf:
                continue
            box = bboxes[i]
            score = scores[i]
            x_min = int(box[0])
            y_min = int(box[1])
            bbox_width = int(box[2])
            bbox_height = int(box[3])
            detects.append([x_min, y_min, x_min+bbox_width, y_min+bbox_height, score])
            predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
    # Update tracks using detects from current frame
    tracked_objects = tracker.update(detections=to_norfair(detects, frame_id))
    to_add_preds = []
    for tobj in tracked_objects:
        bbox_width, bbox_height, last_detected_frame_id = tobj.last_detection.data
        if last_detected_frame_id == frame_id:  # Skip objects that were detected on current frame
            continue
        xc, yc = tobj.estimate[0]
        x_min, y_min = int(round(xc - bbox_width / 2)), int(round(yc - bbox_height / 2))

        #exclude those in the edge
        if (x_min + bbox_width >= 1279) or (y_min + bbox_height) >= 719 or (x_min <= 1) or (y_min <= 1):
            continue
        score = tobj.last_detection.scores[0]            
        to_add_preds.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
        #predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))            
        
    if (num_prev_bbox is None or (num_prev_bbox - len(predictions) + 1) >= len(to_add_preds)):
        predictions.extend(to_add_preds)
        
    return predictions

In [4]:
from ensemble_boxes import weighted_boxes_fusion
def run_wbf(bboxes, confs, iou_thr=0.5, skip_box_thr=0.00000001):
    if len(bboxes) == 1:
        return bboxes[0], confs[0], []   
    else:
        for i in range(len(bboxes)):
            sub_bboxes = bboxes[i]
            if len(sub_bboxes) > 0:
                sub_bboxes[:,2:] = sub_bboxes[:,2:] + sub_bboxes[:,:2]
                sub_bboxes[:,0] = sub_bboxes[:,0] / 1279.
                sub_bboxes[:,1] = sub_bboxes[:,1] / 719.
                sub_bboxes[:,2] = sub_bboxes[:,2] / 1279.
                sub_bboxes[:,3] = sub_bboxes[:,3] / 719.
            bboxes[i] = sub_bboxes    
    labels = [np.ones(len(conf)) for conf in confs]
    boxes, scores, labels = weighted_boxes_fusion(bboxes, confs, labels, iou_thr=iou_thr, skip_box_thr=0.001, allows_overflow=True, conf_type='avg')
    if len(boxes) > 0:
        boxes[:,0] = boxes[:,0] * 1279.
        boxes[:,1] = boxes[:,1] * 719.
        boxes[:,2] = boxes[:,2] * 1279.
        boxes[:,3] = boxes[:,3] * 719.
        boxes[:,2:] = boxes[:,2:] - boxes[:,:2]
        
    boxes = [box for i,box in enumerate(boxes) if scores[i] >= skip_box_thr]
    scores = [conf for conf in scores if conf >= skip_box_thr]

    return boxes, scores, labels

In [5]:
from IPython.display import Image
from PIL import Image as Img
import subprocess
#This code I found in: https://www.kaggle.com/bamps53/create-annotated-video Thank you for sharing.
out_dir = "/home/vincent/Kaggle/data/tensorflow-great-barrier-reef/video_check/"
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

def load_image(image_dir):
    assert os.path.exists(image_dir), f'{image_dir} does not exist.'
    img = cv2.imread(str(image_dir))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def load_image_with_annotations(image_dir, annotations):
    img = load_image(image_dir)
    if len(annotations) > 0:
        for ann in annotations:
            cv2.rectangle(img, (int(ann['x']), int(ann['y'])),
                (int(ann['x'] + ann['width']), int(ann['y'] + ann['height'])),
                (0, 255, 0), thickness=3)
    return img

def show_prediction(img, bboxes, scores, show = True):
    colors = [(0, 0, 255)]

    obj_names = ["s"]

    for box, score in zip(bboxes, scores):
        cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])), (255,0,0), 2)
        cv2.putText(img, f'{score:.2f}', (int(box[0]), int(box[1])-3), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2, cv2.LINE_AA)
    if show:
        img = Img.fromarray(img).resize((1280, 720))
    return img

def save_bulk_images(df, video_id, sequence_id, pred_col, out_dir, best_conf, name=None, s_f=None, e_f=None):
    width = 1280
    height = 720
    if name is None:
        name = "check_pics"
    
    save_dir = f'{out_dir}/{name}/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # I just generate ony part of video
    if video_id is not None and sequence_id is not None:
        query = 'video_id == {} and sequence == {}'.format(video_id, sequence_id)
        if s_f is not None:
            query = query + 'and video_frame >= {} and video_frame <= {}'.format(s_f, e_f)
        print(query)
        video_df = df.query(query)
    else:
        video_df = df
    print(video_df.shape)
    for _, row in tqdm(video_df.iterrows(), total=len(video_df)):
        video_id = row.video_id
        video_frame = row.video_frame
        annotations = row.annotations
        img_file = row.image_path
        img = load_image_with_annotations(img_file, annotations)
        preds = row[pred_col]
        best_conf = 0
        bboxes = [preds[i][1:] for i in range(len(preds)) if preds[i][0] >= best_conf]
        confis = [preds[i][0] for i in range(len(preds)) if preds[i][0] >= best_conf]        
        img = show_prediction(img, bboxes, confis, show=False)
        cv2.putText(img, f'{video_id}-{video_frame}', (10,70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 1, cv2.LINE_AA)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        cv2.imwrite(save_dir + "{}.jpg".format(video_frame), img)    
    
def make_video(df, video_id, sequence_id, pred_col, out_dir, best_conf, name=None, s_f=None, e_f=None):
    fps = 15
    width = 1280
    height = 720
    if name is None:
        name = "video"
    
    save_path = f'{out_dir}/{name}.mp4'
    tmp_path =  f'{out_dir}/tmp-{name}.mp4'
    output_video = cv2.VideoWriter(tmp_path, cv2.VideoWriter_fourcc(*"MP4V"), fps, (width, height),)
    
    # I just generate ony part of video
    if video_id is not None and sequence_id is not None:
        query = 'video_id == {} and sequence == {}'.format(video_id, sequence_id)
        if s_f is not None:
            query = query + 'and video_frame >= {} and video_frame <= {}'.format(s_f, e_f)
        print(query)
        video_df = df.query(query)
    else:
        video_df = df
    print(video_df.shape)
    for _, row in tqdm(video_df.iterrows(), total=len(video_df)):
        video_id = row.video_id
        video_frame = row.video_frame
        annotations = row.annotations
        img_file = row.image_path
        img = load_image_with_annotations(img_file, annotations)
        preds = row[pred_col]
        best_conf = 0
        bboxes = [preds[i][1:] for i in range(len(preds)) if preds[i][0] >= best_conf]
        confis = [preds[i][0] for i in range(len(preds)) if preds[i][0] >= best_conf]        
        img = show_prediction(img, bboxes, confis, show=False)
        cv2.putText(img, f'{video_id}-{video_frame}', (10,70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 1, cv2.LINE_AA)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        output_video.write(img)
        
    output_video.release()

    if os.path.exists(save_path):
        os.remove(save_path)
    subprocess.run(
        ["ffmpeg", "-i", tmp_path, "-crf", "18", "-preset", "veryfast", "-vcodec", "libx264", save_path]
    )
    os.remove(tmp_path)

In [6]:
from base64 import b64encode
def play(filename):
    html = ''
    video = open(filename,'rb').read()
    src = 'data:video/mp4;base64,' + b64encode(video).decode()
    html += '<video width=1280 controls autoplay loop><source src="%s" type="video/mp4"></video>' % src 
    return HTML(html)

# Read Data

In [7]:
INPUT_DIR = Path("../../data/tensorflow-great-barrier-reef/")
df_origin = pd.read_csv(INPUT_DIR / "train.csv")

In [8]:

df = pd.read_csv(INPUT_DIR / "train.csv")
folds = util.load_pickle("../input/fold_test_2.pkl")
df["fold"] = df["sequence"].apply(lambda x: folds[x])
highFP_df = pd.read_csv('../input/df_highFPNoBB.csv')
df = pd.merge(df, highFP_df[['video_id',"video_frame","highFBNoBB"]], on=["video_id","video_frame"], how='left')
df["highFBNoBB"].fillna(False, inplace=True)

df.shape

(23501, 8)

In [9]:
data_param = {'root_dir':INPUT_DIR,'label_dir':INPUT_DIR / "labels"}
df = df.progress_apply(lambda x: util.get_path(x, data_param, infer=True), axis=1)
df['annotations'] = df['annotations'].progress_apply(lambda x: ast.literal_eval(x))
df["real_bbox"] = df["annotations"].apply(lambda annots: [list(annot.values()) for annot in annots])
df['num_bbox'] = df['annotations'].progress_apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts(normalize=True)*100
print(f"No BBox: {data[0]:0.2f}% | With BBox: {data[1]:0.2f}%")

  0%|          | 0/23501 [00:00<?, ?it/s]

  0%|          | 0/23501 [00:00<?, ?it/s]

  0%|          | 0/23501 [00:00<?, ?it/s]

No BBox: 79.07% | With BBox: 20.93%


In [10]:
df['train'] = False
df.loc[df.query("fold == 0 and (num_bbox > 0 or highFBNoBB)").index, 'train'] = True
df['train'].value_counts()

False    19173
True      4328
Name: train, dtype: int64

In [11]:
# COTS per frame
df.groupby("fold").apply(lambda df: df["num_bbox"].sum() / df.shape[0])

fold
0    0.549803
1    0.332484
dtype: float64

In [12]:
df['fold'].value_counts(normalize=True)

0    0.799711
1    0.200289
Name: fold, dtype: float64

In [13]:
df.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,fold,highFBNoBB,old_image_path,image_path,label_path,real_bbox,num_bbox,train
0,0,40258,0,0,0-0,[],0,False,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,False
1,0,40258,1,1,0-1,[],0,False,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,False
2,0,40258,2,2,0-2,[],0,False,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,False
3,0,40258,3,3,0-3,[],0,False,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,False
4,0,40258,4,4,0-4,[],0,False,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,False


# Add OOF prediction

In [14]:
df_oof = util.load_pickle("../input/wbf_notrack_pred.pkl")
df_oof.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,fold,old_image_path,image_path,label_path,real_bbox,num_bbox,notrack_pred
480,0,45518,873,0,0-873,[],1,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,[]
481,0,45518,874,1,0-874,[],1,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,[]
482,0,45518,875,2,0-875,[],1,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,[]
483,0,45518,876,3,0-876,[],1,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,[]
484,0,45518,877,4,0-877,[],1,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/train...,../../data/tensorflow-great-barrier-reef/label...,[],0,"[[0.6129487156867981, 717.4990131855011, 692.2..."


In [15]:
df['wbf_pred'] = None
df.loc[df['fold']==1, 'wbf_pred'] = df_oof['notrack_pred']

In [16]:
df_pred1 = util.load_pickle("whole_pred_0204_yolov5s6_B.pkl")
df_pred2 = util.load_pickle("whole_pred_0205_yolov5m6_B_LS02.pkl")
df['pred_0204_yolov5s6_B'] = df_pred1['pred_0204_yolov5s6_B']
df['pred_0205_yolov5m6_B_LS02'] = df_pred2['pred_0205_yolov5m6_B_LS02']

In [17]:
best_conf_dict = {
    '0204_yolov5s6_B':0.2,
    '0205_yolov5m6_B_LS02':0.3,
}

In [18]:
iou_thr = 0.4
skip_box_thr = 0.22
wbf_models =["0204_yolov5s6_B", '0205_yolov5m6_B_LS02']
df["wbf_noTrack_pred"] = None
frame_id = 0
tracker = Tracker(
    distance_function=euclidean_distance, 
    distance_threshold=30,
    hit_inertia_min=3,
    hit_inertia_max=6,
    initialization_delay=2,
)              
num_prev_bbox = None
for i, idx in enumerate(df.query("fold!=1").index):
    row = df.loc[idx]
    bboxes_l = []
    confs_l = []    
    for model_version in wbf_models:
        BEST_CONF = best_conf_dict[model_version]
        pred_col = "pred_" + model_version 
        prd_bboxs = row[pred_col]
        prd_bboxs = [p for p in prd_bboxs if p[0] >= BEST_CONF]
        bboxes, confis = [p[1:] for p in prd_bboxs], [p[0] for p in prd_bboxs]    
        bboxes = np.array(bboxes,dtype=np.float64)
        bboxes_l.append(bboxes)
        confs_l.append(confis)
    bboxes_l = np.array(bboxes_l)
    wbf_boxes, wbf_confs, _ = run_wbf(bboxes_l, confs_l, iou_thr, skip_box_thr)
    df.at[idx, "wbf_noTrack_pred"] = [[wbf_confs[i]] + wbf_boxes[i].tolist() for i in range(len(wbf_boxes))]
    
    predictions = tracking_function(tracker, frame_id, wbf_boxes, wbf_confs, 0, num_prev_bbox)
    prd_bboxs_tracking =  [[float(p) for p in pred.split(" ")] for pred in predictions]
    num_prev_bbox = len(wbf_boxes)
    df.at[idx, "wbf_pred"] = prd_bboxs_tracking

    frame_id += 1

  bboxes_l = np.array(bboxes_l)


In [19]:
df.loc[df.query("fold==1").index, "wbf_noTrack_pred"] = df.query("fold==1")['wbf_pred']

## check score

In [20]:
all_gt = [np.array(x) for x in  df['real_bbox']]
all_gt_m = all_gt.copy()
all_pred = [np.array(x) if x is not None else np.array([]) for x in  df['wbf_pred']]
all_pred_notrack = [np.array(x) if x is not None else np.array([]) for x in  df['wbf_noTrack_pred']]

In [21]:
def check_score_gt(df, q):
    selected_gt = []
    selected_pred = []
    for idx in df.query(q).index:
        selected_gt.append(all_gt[idx])
        selected_pred.append(all_pred[idx])
    f2_dict = util.calc_f2_score(selected_gt, selected_pred, verbose=False) 
    return f2_dict['f2']

def check_score_gt_notrack(df, q):
    selected_gt = []
    selected_pred = []
    for idx in df.query(q).index:
        selected_gt.append(all_gt[idx])
        selected_pred.append(all_pred_notrack[idx])
    f2_dict = util.calc_f2_score(selected_gt, selected_pred, verbose=False) 
    return f2_dict['f2']

In [22]:
check_score_gt(df, "fold==1")

0.7484346011131726

In [23]:
check_score_gt(df, "train")

0.8636732955082255

In [24]:
check_score_gt(df, "not train")

0.6744514106583072

In [25]:
check_score_gt_notrack(df, "train")

0.887297161118435

## save video

In [26]:
# for video_id, sequence in df.query("fold!=1").groupby(["video_id","sequence"]).size().index:
#     name = f'check-{video_id}-{sequence}'
#     make_video(df, video_id, sequence, 'wbf_pred', out_dir, 0, name=name)

In [27]:
video_id=2
sequence=22643
s_f = 5661
e_f = 5691

name = f'checkFix2-{video_id}-{sequence}_{s_f}_{e_f}'
make_video(df, video_id, sequence, 'wbf_pred', out_dir, 0, name=name,s_f=s_f, e_f=e_f)

video_id == 2 and sequence == 22643and video_frame >= 5661 and video_frame <= 5691
(31, 18)


OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


  0%|          | 0/31 [00:00<?, ?it/s]

ffmpeg version 4.2.4-1ubuntu0.1 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 9 (Ubuntu 9.3.0-10ubuntu2)
  configuration: --prefix=/usr --extra-version=1ubuntu0.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-avresample --disable-filter=resample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librsvg --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-l

# modify GT

## Modify fold==1 GT

In [None]:
# 1-15827-2148-2205
video_id = 1
sequence = 15827
s_f = 2148
e_f = 2205
q = f"video_id == {video_id} and sequence == {sequence} and video_frame >= {s_f} and video_frame <= {e_f}"
df_check = df.query(q).copy()
for idx in df_check.index:
    pred = all_pred[idx]
    if len(all_gt_m[idx]) == 0:
        if len(pred) > 0:
            all_gt_m[idx] = pred[:,1:]

In [None]:
# 2-26651-4639-4748
video_id = 2
sequence = 26651
s_f = 4639
e_f = 4748
q = f"video_id == {video_id} and sequence == {sequence} and video_frame >= {s_f} and video_frame <= {e_f}"
df_check = df.query(q).copy()
for idx in df_check.index:
    pred = all_pred[idx]
    if len(all_gt_m[idx]) == 0:
        if len(pred) > 0:
            all_gt_m[idx] = pred[:,1:]

In [None]:
# modified f2
selected_gt = []
selected_pred = []
for idx in df.query("fold==1").index:
    selected_gt.append(all_gt_m[idx])
    selected_pred.append(all_pred[idx])
f2_dict = util.calc_f2_score(selected_gt, selected_pred, verbose=False) 
f2_dict['f2']

In [None]:
df.query("video_frame == 6845")

In [None]:
df.loc[[11893]]

## Modify fold != 1 GT

In [None]:
video_id = 1
sequence = 18048
s_f = 6709
e_f = 6780

name = f'InterestingFP_wbf-{video_id}-{sequence}-{s_f}-{e_f}'
make_video(df, video_id, sequence, 'wbf_pred', out_dir, 0, name=name,s_f=s_f, e_f=e_f)

In [None]:
video_id = 1
sequence = 8503
s_f =3891
e_f =3904

name = f'Error_TP_wbf-{video_id}-{sequence}-{s_f}-{e_f}'
make_video(df, video_id, sequence, 'wbf_pred', out_dir, 0, name=name,s_f=s_f, e_f=e_f)

# Add GT

In [121]:
video_dir = out_dir
check_videos = glob(video_dir + "*.mp4")
gt_candidates = []
for name in check_videos:
    name = name.split("/")[-1]
    if name in ['UnkownFP_wbf-2-26651-4154-4200.mp4', 'commonFP_wbf-0-996-11971-11994.mp4']:
        continue
    if "fp" in name.lower() and "error" not in name.lower():
        print(name)
        _, video_id, sequence, s_f, e_f_mp4 = name.split("-")
        e_f = e_f_mp4[:-4]
        gt_candidates.append([int(video_id), int(sequence), int(s_f), int(e_f)])

FP_wbf-0-996-12136-12325.mp4
FP_wbf-0-35305-8171-8292.mp4
FP_wbf-2-37114-2692-2765.mp4
FP_wbf-0-40258-300-380.mp4
FP_wbf-0-40258-0-200.mp4
FP_wbf-0-8399-4400-4885.mp4
InterestingFP&Tracking_wbf-1-8503-5778-5950.mp4
FP_wbf-0-53708-9570-9802.mp4
FP_wbf-0-996-11850-11935.mp4
FP_wbf-0-45015-6000-6065.mp4
FP_wbf-0-53708-9802-9850.mp4
FP_wbf-0-53708-8900-8911.mp4
InterestingFP&Tracking_wbf-1-8503-5180-5726.mp4
FP_wbf-1-15827-2150-2207.mp4
FP&Tracking_wbf-1-8503-4390-4555.mp4
InterestingFP_wbf-1-18048-6709-6780.mp4
MostInterestingFP_wbf-1-8503-4100-4278.mp4
FP_wbf-1-15827-2148-2205.mp4
FP_wbf-2-26651-4639-4748.mp4
InterestingFP_wbf-2-22643-5655-5968.mp4
InterestingFP_wbf-2-29859-10500-10637.mp4
FP_wbf-0-45015-5702-5730.mp4
FP_wbf-0-45518-950-990.mp4


In [122]:
gt_candidates

[[0, 996, 12136, 12325],
 [0, 35305, 8171, 8292],
 [2, 37114, 2692, 2765],
 [0, 40258, 300, 380],
 [0, 40258, 0, 200],
 [0, 8399, 4400, 4885],
 [1, 8503, 5778, 5950],
 [0, 53708, 9570, 9802],
 [0, 996, 11850, 11935],
 [0, 45015, 6000, 6065],
 [0, 53708, 9802, 9850],
 [0, 53708, 8900, 8911],
 [1, 8503, 5180, 5726],
 [1, 15827, 2150, 2207],
 [1, 8503, 4390, 4555],
 [1, 18048, 6709, 6780],
 [1, 8503, 4100, 4278],
 [1, 15827, 2148, 2205],
 [2, 26651, 4639, 4748],
 [2, 22643, 5655, 5968],
 [2, 29859, 10500, 10637],
 [0, 45015, 5702, 5730],
 [0, 45518, 950, 990]]

In [123]:
gt_candidates_dfs = []
cols = ["video_id","sequence","video_frame"]
for video_id, sequence, s_f, e_f in gt_candidates:
    l = e_f - s_f + 1
    df_ = pd.DataFrame([f for f in range(s_f, e_f+1)], columns=['video_frame'])
    df_['video_id'] = video_id
    df_['sequence'] = sequence
    gt_candidates_dfs.append(df_)
gt_candidates_df = pd.concat(gt_candidates_dfs)[cols]

In [124]:
df_new = pd.merge(gt_candidates_df, df[cols + ['annotations','image_path',"real_bbox", "wbf_noTrack_pred"]], on=cols)
df_new = df_new.drop_duplicates(subset=['video_id',"sequence","video_frame"]).sort_values(["video_id","video_frame"])

In [125]:
df_new["added_pred"] = [list([]) for _ in range(df_new.shape[0])]
df_new["new_real_bbox"] = [list([]) for _ in range(df_new.shape[0])]

In [126]:
for idx, row in df_new.iterrows():
    if len(row['wbf_noTrack_pred']) == 0:
        continue
    elif len(row['real_bbox']) == 0:
        df_new.at[idx, 'added_pred'] = df_new.loc[idx,'wbf_noTrack_pred']
    else:
        wbf_pred = np.array(row['wbf_noTrack_pred'])
        real_gt = np.array(row['real_bbox'])
        added_pred = []
        ious = util.calc_iou(np.array(wbf_pred)[:,1:], np.array(real_gt))
        ious_max = ious.max(axis=1)
        added_pred = wbf_pred[ious_max < 0.3].copy().tolist()
        df_new.at[idx,'added_pred'] = added_pred

In [127]:
df_new['added_pred'].apply(len).sum()

1069

In [128]:
df_new['new_real_bbox'] = df_new.apply(lambda x: x['real_bbox'] + [p[1:] for p in x['added_pred']], axis=1)
df_new['wbf_pred'] = df_new['new_real_bbox'].apply(lambda pp:  [[1] + p for p in pp])

In [129]:
#make_video(df_new, None, None, 'wbf_pred', out_dir, 0, name="new_GT_check",s_f=None, e_f=None)

In [130]:
df_new["new_real_bbox"] = [list([]) for _ in range(df_new.shape[0])]
df_new["wbf_pred"] = [list([]) for _ in range(df_new.shape[0])]

In [131]:
# remove those unnecessary
pairs = [
    [0, 40],
    [0, 69],
    [0, 101],
    [0, 103],
    [0, 107],
    [0, 140],
    [0, 153],
    [0, 153],    
]
pairs = np.concatenate(pairs).tolist()
pairs.extend(
[
    0,4596,0,9648,0,11860,
])
for i in range(11857, 11870):
    pairs.extend([0, i])
for i in range(4590, 4597):
    pairs.extend([0, i])
for i in range(4751, 4759):
    pairs.extend([0, i])
    
pairs.extend([
    0, 11900, 0, 12171, 1, 4177, 1, 4178, 1, 4184, 1, 4261, 1, 4262, 1, 4255, 1, 4400, 1, 5269, 1, 5335, 1, 5344, 1, 5348,
    1, 5436, 1, 5439,  1, 5882, 1, 6737, 2, 4712, 2, 4717, 2, 4721, 2,5704, 2,5741, 2, 5907, 0, 4525,
])
for i in range(5516, 5594):
    pairs.extend([1, i])
for i in range(5675, 5684):
    pairs.extend([1, i])
len(pairs)

300

In [132]:
appeared = set([])
removed_FP = 0
manual_process = []
for i in range(len(pairs)//2):
    video_id = pairs[i * 2]
    video_frame = pairs[i * 2 + 1]
    if (video_id, video_frame) in appeared:
        continue
    else:
        appeared.add((video_id, video_frame))
    idx = df_new.query("video_id == @video_id and video_frame == @video_frame").index[0]
    if len(df_new.loc[idx, 'added_pred']) <= 1:
        removed_FP += len(df_new.loc[idx,'added_pred'])
        df_new.at[idx, 'added_pred'] = []
    else:
        manual_process.extend([video_id, video_frame])
print(removed_FP)

45


In [133]:
len(manual_process)

40

In [134]:
df_new.index

Int64Index([ 467,  468,  469,  470,  471,  472,  473,  474,  475,  476,
            ...
            3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413],
           dtype='int64', length=3427)

In [135]:
tmp = []
mannual_process_df = None
for i in range(len(manual_process)//2):
    video_id = manual_process[i * 2]
    video_frame = manual_process[i * 2 + 1]
    tmp.append(df_new.query("video_id == @video_id and video_frame == @video_frame").copy())
mannual_process_df = pd.concat(tmp)
mannual_process_df

Unnamed: 0,video_id,sequence,video_frame,annotations,image_path,real_bbox,wbf_noTrack_pred,added_pred,new_real_bbox,wbf_pred
1571,0,996,11860,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.6911746263504028, 780.2709791660309, 560.9...","[[0.6911746263504028, 780.2709791660309, 560.9...",[],[]
1568,0,996,11857,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.48151302337646484, 769.6262257099152, 86.3...","[[0.48151302337646484, 769.6262257099152, 86.3...",[],[]
1569,0,996,11858,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.5886034369468689, 769.5400809049606, 542.3...","[[0.5886034369468689, 769.5400809049606, 542.3...",[],[]
1570,0,996,11859,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.5662558674812317, 778.5100421309471, 550.9...","[[0.5662558674812317, 778.5100421309471, 550.9...",[],[]
1572,0,996,11861,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.7072060108184814, 790.9597960710526, 569.5...","[[0.7072060108184814, 790.9597960710526, 569.5...",[],[]
1573,0,996,11862,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.7071714401245117, 805.4812182188034, 46.48...","[[0.7071714401245117, 805.4812182188034, 46.48...",[],[]
1574,0,996,11863,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.6633244752883911, 807.2837029695511, 587.8...","[[0.6633244752883911, 807.2837029695511, 587.8...",[],[]
1575,0,996,11864,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.6915255784988403, 816.3384367823601, 601.7...","[[0.6915255784988403, 816.3384367823601, 601.7...",[],[]
1576,0,996,11865,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.7656852006912231, 824.5827995538712, 615.5...","[[0.7656852006912231, 824.5827995538712, 615.5...",[],[]
1577,0,996,11866,[],../../data/tensorflow-great-barrier-reef/train...,[],"[[0.7503771781921387, 836.1335981488228, 631.0...","[[0.7503771781921387, 836.1335981488228, 631.0...",[],[]


In [136]:
mannual_process_df['added_pred'].apply(lambda x: [p[0] for p in x])

1571            [0.6911746263504028, 0.22442416846752167]
1568            [0.48151302337646484, 0.2662089169025421]
1569            [0.5886034369468689, 0.42555445432662964]
1570             [0.5662558674812317, 0.3016071319580078]
1572            [0.7072060108184814, 0.28356847167015076]
1573             [0.7071714401245117, 0.6711126565933228]
1574             [0.6633244752883911, 0.5287286639213562]
1575              [0.6915255784988403, 0.632911205291748]
1576             [0.7656852006912231, 0.4901295006275177]
1577             [0.7503771781921387, 0.5626591444015503]
1578             [0.7381105422973633, 0.5699230432510376]
1579             [0.7550845146179199, 0.6722005605697632]
2833            [0.3045779764652252, 0.26051944494247437]
2826             [0.5695754289627075, 0.2420150190591812]
1862           [0.32363682985305786, 0.23518715798854828]
2628             [0.644077479839325, 0.27890917658805847]
793     [0.7297202348709106, 0.6788763999938965, 0.229...
2165          

In [137]:
for idx, row in mannual_process_df.iterrows():
    if row['sequence'] == 996:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'added_pred'] if p[2] >= 360]
        removed_FP += 1
    elif row['sequence'] == 8503 and row['video_frame'] <=4255:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'added_pred'] if p[2] <= 360]
        removed_FP += 1
    elif row['sequence'] == 8503:
        mannual_process_df.at[idx, 'added_pred'] = []
        removed_FP += 2
    else:
        origin_len = len(mannual_process_df.at[idx, 'added_pred'])
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'added_pred'] if p[0] >= 0.5]
        removed_FP += origin_len - len(mannual_process_df.at[idx, 'added_pred'])

In [138]:
removed_FP

70

In [139]:
df_new.loc[mannual_process_df.index, 'added_pred'] = mannual_process_df['added_pred']

In [140]:
df_new["added_pred"].apply(len).sum()

999

In [141]:
df_new['new_real_bbox'] = df_new.apply(lambda x: x['real_bbox'] + [p[1:] for p in x['added_pred']], axis=1)
df_new['wbf_pred'] = df_new['new_real_bbox'].apply(lambda pp:  [[1] + p for p in pp])

In [48]:
#make_video(df_new, None, None, 'wbf_pred', out_dir, 0, name="new_GT_check_clean",s_f=None, e_f=None)

## combine with old data

In [49]:
if 'mark' in df.columns:
    del df['mark']
df_new['mark'] = True
df = pd.merge(df, df_new[cols + ['mark']], on=cols, how='left')
df['mark'].fillna(False, inplace=True)

In [50]:
df["new_real_bbox"] = df['real_bbox']
df['added_pred'] = [list([]) for i in range(df.shape[0])]
df.loc[df.query('mark').index, "new_real_bbox"] = df_new['new_real_bbox'].values
df.loc[df.query('mark').index, "added_pred"] = df_new['added_pred'].values

In [51]:
df['num_bbox'].sum()

11898

In [52]:
1120/11898

0.09413346780971592

In [54]:
df.groupby("fold").apply(lambda df: np.sum(((df['added_pred'].apply(len)>0).astype(int)  + (df['num_bbox'] == 0)).astype(int) > 1 ))

fold
0    247
1     51
dtype: int64

In [55]:
353 / (df['num_bbox'] > 0).sum()

0.07176255336450497

In [56]:
df.groupby("fold").apply(lambda df: np.sum(df['added_pred'].apply(len)))

fold
0    880
1    119
dtype: int64

In [57]:
# save it
def from_bbox_to_str(bbox):
    if len(bbox) == 0:
        return '[]'
    else:
        d = []
        for p in bbox:
            d.append({"x":round(p[0],1), 
                      "y":round(p[1],1), 
                      "width":round(p[2],1), 
                      "height":round(p[3],1)})
        return str(d)
def from_pred_to_str(bbox):
    if len(bbox) == 0:
        return '[]'
    else:
        d = []
        for p in bbox:
            d.append({'conf':round(p[0],3),
                      "x":round(p[1],2), 
                      "y":round(p[2],3), 
                      "width":round(p[3],4), 
                      "height":round(p[4],5)})
        return str(d)
df['new_annotations'] = df['new_real_bbox'].apply(from_bbox_to_str)
df['new_added_pred'] = df['added_pred'].apply(from_pred_to_str)

In [58]:
ast.literal_eval(df.iloc[9316]["new_annotations"])

[{'x': 386, 'y': 531, 'width': 26, 'height': 25},
 {'x': 638, 'y': 228, 'width': 36, 'height': 42},
 {'x': 699, 'y': 314, 'width': 41, 'height': 40},
 {'x': 308, 'y': 246, 'width': 30, 'height': 25},
 {'x': 750, 'y': 362, 'width': 63, 'height': 67},
 {'x': 278, 'y': 315, 'width': 56, 'height': 38},
 {'x': 788, 'y': 339, 'width': 29, 'height': 52},
 {'x': 356, 'y': 274, 'width': 24, 'height': 24},
 {'x': 178, 'y': 418, 'width': 30, 'height': 37},
 {'x': 208, 'y': 459, 'width': 29, 'height': 34},
 {'x': 658, 'y': 541, 'width': 34, 'height': 34},
 {'x': 445.0, 'y': 195.5, 'width': 36.0, 'height': 36.0},
 {'x': 457.9, 'y': 185.4, 'width': 27.7, 'height': 25.1}]

In [59]:
ast.literal_eval(df.iloc[9316]["new_added_pred"])

[{'conf': 0.707,
  'x': 445.0,
  'y': 195.51,
  'width': 35.9944,
  'height': 35.98886},
 {'conf': 0.61, 'x': 457.87, 'y': 185.4, 'width': 27.6673, 'height': 25.13382}]

In [60]:
ast.literal_eval(df_origin.query("video_id == 1 and video_frame==4164").iloc[0]['annotations'])

[{'x': 386, 'y': 531, 'width': 26, 'height': 25},
 {'x': 638, 'y': 228, 'width': 36, 'height': 42},
 {'x': 699, 'y': 314, 'width': 41, 'height': 40},
 {'x': 308, 'y': 246, 'width': 30, 'height': 25},
 {'x': 750, 'y': 362, 'width': 63, 'height': 67},
 {'x': 278, 'y': 315, 'width': 56, 'height': 38},
 {'x': 788, 'y': 339, 'width': 29, 'height': 52},
 {'x': 356, 'y': 274, 'width': 24, 'height': 24},
 {'x': 178, 'y': 418, 'width': 30, 'height': 37},
 {'x': 208, 'y': 459, 'width': 29, 'height': 34},
 {'x': 658, 'y': 541, 'width': 34, 'height': 34}]

In [61]:
df_origin = pd.merge(df_origin, df[['video_id','video_frame','new_annotations','new_added_pred']], on=['video_id','video_frame'], how='left')
df_origin.fillna("[]", inplace=True)
df_origin

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,new_annotations,new_added_pred
0,0,40258,0,0,0-0,[],[],[]
1,0,40258,1,1,0-1,[],[],[]
2,0,40258,2,2,0-2,[],[],[]
3,0,40258,3,3,0-3,[],[],[]
4,0,40258,4,4,0-4,[],[],[]
...,...,...,...,...,...,...,...,...
23496,2,29859,10755,2983,2-10755,[],[],[]
23497,2,29859,10756,2984,2-10756,[],[],[]
23498,2,29859,10757,2985,2-10757,[],[],[]
23499,2,29859,10758,2986,2-10758,[],[],[]


In [62]:
#df_origin.to_csv("../../data/tensorflow-great-barrier-reef/train_with_added_GT.csv",index=False)

# checking FP from best LB

In [165]:
df_origin.head()

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,new_annotations,new_added_pred
0,0,40258,0,0,0-0,[],[],[]
1,0,40258,1,1,0-1,[],[],[]
2,0,40258,2,2,0-2,[],[],[]
3,0,40258,3,3,0-3,[],[],[]
4,0,40258,4,4,0-4,[],[],[]


In [166]:
LB_pred = pd.read_csv("../input/train_oof_v79.csv")

In [167]:
LB_pred['annotations'] = df['new_annotations'].apply(ast.literal_eval)
LB_pred['image_path'] = df['image_path']
LB_pred['real_bbox'] = df['new_real_bbox']
LB_pred['wbf_pred'] = LB_pred['pred'].apply(lambda x: [list(p.values()) for p in ast.literal_eval(x)])

In [168]:
LB_pred["added_pred"] = [list([]) for _ in range(LB_pred.shape[0])]
LB_pred["new_real_bbox"] = [list([]) for _ in range(LB_pred.shape[0])]

In [169]:
for idx, row in LB_pred.iterrows():
    if len(row['wbf_pred']) == 0 or len(row['real_bbox']) == 0:
        continue
    else:
        wbf_pred = np.array(row['wbf_pred'])
        real_gt = np.array(row['real_bbox'])
        added_pred = []
        ious = util.calc_iou(np.array(wbf_pred)[:,1:], np.array(real_gt))
        ious_max = ious.max(axis=1)
        added_pred = wbf_pred[ious_max < 0.3].copy().tolist()
        LB_pred.at[idx,'wbf_pred'] = added_pred

In [170]:
# for video_id, sequence in LB_pred.groupby(["video_id","sequence"]).size().index:
#     make_video(LB_pred, video_id, sequence, 'wbf_pred', out_dir[:-1] + "_2/", 0, name=f"check-{video_id}-{sequence}",s_f=None, e_f=None)

In [171]:
# video_id = 0
# sequence = 8399
# s_f = 4722
# e_f = 4821
# make_video(LB_pred, video_id, sequence, 'wbf_pred', out_dir[:-1] + "_2/", 0, name=f"checkBlackHole-{video_id}-{sequence}-s{s_f}-e{e_f}",s_f=s_f, e_f=e_f)

In [172]:
# pic_out_dir = '/home/vincent/Kaggle/data/tensorflow-great-barrier-reef/pic_check/'
# for video_id, sequence in LB_pred.groupby(["video_id","sequence"]).size().index:
#     save_bulk_images(LB_pred, video_id, sequence, 'wbf_pred', pic_out_dir, 0, name=f"v{video_id}-s{sequence}",s_f=None, e_f=None)

# Add GT from best LB

In [173]:
# remove those unnecessary
pairs = []
pairs.extend(
[
    0, 8900,1, 8946, 0, 5762, 0, 5781,2 ,4691,2,4695,2,4699, 0,9633,
])
for i in range(85, 133):
    pairs.extend([0, i])
for i in range(85, 133):
    pairs.extend([0, i])
for i in range(220, 243):
    pairs.extend([0, i])

for i in range(11843, 11890):
    pairs.extend([0, i])
for i in range(12187, 12241):
    pairs.extend([0, i])

for i in range(1877, 1945):
    pairs.extend([0, i])
for i in range(2032, 2039):
    pairs.extend([0, i])
for i in range(2230, 2272):
    pairs.extend([0, i])
for i in range(1496, 1543):
    pairs.extend([0, i])

for i in range(4448, 4488):
    pairs.extend([0, i])
for i in range(4523, 4548):
    pairs.extend([0, i])
    
for i in range(9376, 9460):
    # remove those fish in the ocean
    pairs.extend([0, i])
for i in range(9523, 9544):
    pairs.extend([0, i])
for i in range(9599, 9630 ):
    pairs.extend([0, i])
for i in range(9041, 9212):
    pairs.extend([1, i])
for i in range(9242, 9275):
    pairs.extend([1, i])
for i in range(9344, 9372):
    pairs.extend([1, i])
for i in range(4056, 4088):
    pairs.extend([1, i])
for i in range(4197, 4265):
    pairs.extend([1, i])
for i in range(5828, 5916):
    pairs.extend([1, i])

for i in range(4658, 4685):
    pairs.extend([2, i])
for i in range(4705, 4719):
    pairs.extend([2, i])
for i in range(4705, 4719):
    pairs.extend([2, i])
for i in range(5975, 6058):
    pairs.extend([0, i])

len(pairs)

2302

In [174]:
added_GT = 0

In [175]:
appeared = set([])
removed_FP = 0
manual_process = []
for i in range(len(pairs)//2):
    video_id = pairs[i * 2]
    video_frame = pairs[i * 2 + 1]
    if (video_id, video_frame) in appeared:
        continue
    else:
        appeared.add((video_id, video_frame))
    idx = LB_pred.query("video_id == @video_id and video_frame == @video_frame").index[0]
    if len(LB_pred.loc[idx, 'wbf_pred']) <= 1:
        added_GT += len(LB_pred.loc[idx,'wbf_pred'])
        LB_pred.at[idx, 'added_pred'] = LB_pred.at[idx, "wbf_pred"]
    else:
        manual_process.extend([video_id, video_frame])
added_GT, len(manual_process)

(243, 58)

In [176]:
LB_pred[LB_pred['added_pred'].apply(len) > 0].query("sequence == 45015")
remove = [6050, 6053, 6054, 6057]
for video_frame in remove:
    LB_pred.at[LB_pred.query('sequence == 45015 and video_frame == @video_frame').index[0], "added_pred"] = []
    added_GT -= 1
added_GT

239

In [177]:
tmp = []
mannual_process_df = None
for i in range(len(manual_process)//2):
    video_id = manual_process[i * 2]
    video_frame = manual_process[i * 2 + 1]
    tmp.append(LB_pred.query("video_id == @video_id and video_frame == @video_frame").copy())
mannual_process_df = pd.concat(tmp)
mannual_process_df.sort_values(["sequence","video_frame"])[["sequence","video_frame","wbf_pred"]]

Unnamed: 0,sequence,video_frame,wbf_pred
6227,996,11867,"[[0.517, 841.0, 18.0, 46.0, 36.0], [0.408, 429..."
11035,8503,5883,"[[0.514, 178.0, 283.0, 40.0, 50.0], [0.46, 333..."
18763,26651,4695,"[[0.541, 293, 333, 27, 28], [0.535, 965, 646, ..."
3587,45015,6051,"[[0.459, 874, 324, 38, 32], [0.436, 964, 220, ..."
3591,45015,6055,"[[0.604, 899, 348, 43, 39], [0.515, 989, 229, ..."
1140,45518,1533,"[[0.63, 311, 330, 28, 20], [0.629, 693, 215, 9..."
1141,45518,1534,"[[0.662, 299, 333, 34, 22], [0.509, 232, 666, ..."
5224,53708,9401,"[[0.45, 370.0, 135.0, 30.0, 25.0], [0.45, 806...."
5262,53708,9439,"[[0.615, 174.0, 280.0, 38.0, 25.0], [0.43, 435..."
5266,53708,9443,"[[0.694, 415.0, 308.0, 40.0, 22.0], [0.416, 13..."


In [180]:
for idx, row in mannual_process_df.iterrows():
    if row['sequence'] == 996:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] >= 640]
    elif row['sequence'] == 8503:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] <= 200]
    elif row['sequence'] == 26651:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] <= 640]
    elif row['sequence'] == 45015:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] >= 920]
    elif row['sequence'] == 45518:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] <= 320 and p[2] <= 400 ]
    elif row['sequence'] == 53708:
        if row['video_frame'] in [9451, 9455]:
            mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] <= 640]
        elif row['video_frame'] in [9633]:
            mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] == 115]
        else:
            mannual_process_df.at[idx, 'added_pred'] = mannual_process_df.at[idx, 'wbf_pred']
            
            
    elif row["sequence"] == 59337:
        mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] >= 500]
    elif row['sequence'] == 60754:
        if row['video_frame'] in [9056]:
            mannual_process_df.at[idx, 'added_pred'] = []
        elif row['video_frame'] in [9064]:
            mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[1] <= 590]
        elif row['video_frame'] in [9081]:
            mannual_process_df.at[idx, 'added_pred'] = [p for p in mannual_process_df.at[idx, 'wbf_pred'] if p[0] >= 0.45]
        else:
            mannual_process_df.at[idx, 'added_pred'] = mannual_process_df.at[idx, 'wbf_pred']
            
    added_GT += len(mannual_process_df.at[idx, 'added_pred'])
added_GT

284

In [182]:
LB_pred.loc[mannual_process_df.index, 'added_pred'] = mannual_process_df['added_pred']

In [186]:
LB_pred['new_real_bbox'] = LB_pred.apply(lambda x: x['real_bbox'] + [p[1:] for p in x['added_pred']], axis=1)
LB_pred['combined_bbox'] = LB_pred['new_real_bbox'].apply(lambda pp:  [[1] + p for p in pp])

In [188]:
# make_video(LB_pred[LB_pred['added_pred'].apply(len)>0], None, None, 'combined_bbox', out_dir[:-1] + "_2/", 0, name="added_LB_part",s_f=None, e_f=None)

In [189]:
cols

['video_id', 'sequence', 'video_frame']

In [191]:
if 'mark' in df.columns:
    del df['mark']
LB_pred['mark'] = True
df = pd.merge(df, LB_pred[cols + ['mark']], on=cols, how='left')
df['mark'].fillna(False, inplace=True)

In [192]:
df["new_real_bbox_LB"] = df['new_real_bbox']
df['added_pred_LB'] = [list([]) for i in range(df.shape[0])]
df.loc[df.query('mark').index, "new_real_bbox_LB"] = LB_pred['new_real_bbox'].values
df.loc[df.query('mark').index, "added_pred_LB"] = LB_pred['added_pred'].values

In [193]:
df['new_annotations_LB'] = df['new_real_bbox_LB'].apply(from_bbox_to_str)
df['new_added_pred_LB'] = df['added_pred_LB'].apply(from_pred_to_str)

In [205]:
df_origin.to_csv("../../data/tensorflow-great-barrier-reef/train_with_added_GT.csv",index=False)

In [195]:
# sanity check

In [206]:
df_origin = pd.merge(df_origin, df[['video_id','video_frame','new_annotations_LB','new_added_pred_LB']], on=['video_id','video_frame'], how='left')
df_origin.fillna("[]", inplace=True)
df_origin.head(2)

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,new_annotations,new_added_pred,new_annotations_LB,new_added_pred_LB
0,0,40258,0,0,0-0,[],[],[],[],[]
1,0,40258,1,1,0-1,[],[],[],[],[]


In [201]:
# number of bbox
df_origin_check = df_origin.copy()
annot_cols = ['annotations', "new_annotations", "new_annotations_LB"]
for col in annot_cols:
    df_origin_check[col] = df_origin_check[col].apply(ast.literal_eval)
    print(col, df_origin_check[col].apply(len).sum())

annotations 11898
new_annotations 12897
new_annotations_LB 13181


In [203]:
(13181 / 11898 - 1)

0.10783324928559423

In [202]:
# number of image with bbox
for col in annot_cols:
    print(col, (df_origin_check[col].apply(len) > 0).sum())

annotations 4919
new_annotations 5217
new_annotations_LB 5302


In [204]:
(5302 / 4919 - 1)

0.07786135393372628

# Improve Tracking

In [None]:
# tracking needs to be improved! especially when the camera moves fast

In [None]:
??Tracker

In [None]:
iou_thr = 0.4
skip_box_thr = 0.22
wbf_models =["0204_yolov5s6_B", '0205_yolov5m6_B_LS02']
df["wbf_noTrack_pred"] = None
frame_id = 0
tracker = Tracker(
    distance_function=euclidean_distance, 
    distance_threshold=5,
    hit_inertia_min=3,
    hit_inertia_max=6,
    initialization_delay=2,
)                      
for i, idx in enumerate(df.query("sequence == 18048").index):
    row = df.loc[idx]
    bboxes_l = []
    confs_l = []    
    for model_version in wbf_models:
        BEST_CONF = best_conf_dict[model_version]
        pred_col = "pred_" + model_version 
        prd_bboxs = row[pred_col]
        prd_bboxs = [p for p in prd_bboxs if p[0] >= BEST_CONF]
        bboxes, confis = [p[1:] for p in prd_bboxs], [p[0] for p in prd_bboxs]    
        bboxes = np.array(bboxes,dtype=np.float64)
        bboxes_l.append(bboxes)
        confs_l.append(confis)
    bboxes_l = np.array(bboxes_l)
    wbf_boxes, wbf_confs, _ = run_wbf(bboxes_l, confs_l, iou_thr, skip_box_thr)
    df.at[idx, "wbf_noTrack_pred"] = [[wbf_confs[i]] + wbf_boxes[i].tolist() for i in range(len(wbf_boxes))]
    
    predictions = tracking_function(tracker, frame_id, wbf_boxes, wbf_confs, 0)
    prd_bboxs_tracking =  [[float(p) for p in pred.split(" ")] for pred in predictions]
    df.at[idx, "wbf_pred"] = prd_bboxs_tracking

    frame_id += 1

In [None]:
seq_check = 18048
df_check = df.query("sequence == @seq_check").copy()

In [None]:
df_check["pred_track_num"] = df_check["wbf_pred"].apply(len)
df_check["pred_notrack_num"] = df_check["wbf_noTrack_pred"].apply(len)

In [None]:
df_check[["video_frame","num_bbox","pred_track_num","pred_notrack_num"]].iloc[20:30]
## 11851 is where the things go wrong

# Similarity

In [None]:
import image_similarity_measures
from image_similarity_measures.quality_metrics import metric_functions

In [None]:
metric_functions

In [None]:
df.groupby("sequence").size().sort_values()

In [None]:
max_p = 255
seq_check = [18048,17665,44160,29424]
df_check = df.query("sequence in @seq_check").copy()

prev_image = cv2.imread(str(df.iloc[-1]['image_path']))
for idx, row in tqdm(df_check.iterrows()):
    curr_image = cv2.imread(str(row['image_path']))
    for name, func in metric_functions.items():
        if name in ['rmse']:
            sim = func(curr_image, prev_image, max_p)
        else:
            continue
        df_check.loc[idx, name] = sim
    prev_image = curr_image

In [None]:
fig, axes = plt.subplots(1,2,figsize=(18,9))
axes[0].plot(df_check['rmse'].values)
axes[0].twinx().plot(np.diff(df_check['sequence']) != 0, color='red' )
axes[1].plot(np.abs(np.diff(df_check['rmse'].values)))
#axes[1].twinx().plot(np.diff(df_check['sequence']) != 0, color='red' )
plt.show()

In [None]:
prev_image = cv2.imread(str(df.iloc[-1]['image_path']))
for idx, row in tqdm(df.iterrows()):
    curr_image = cv2.imread(str(row['image_path']))
    for name, func in metric_functions.items():
        if name in ['rmse']:
            sim = func(curr_image, prev_image, max_p)
        else:
            continue
        df.loc[idx, name] = sim
    prev_image = curr_image

In [None]:
fig, axes = plt.subplots(1,2,figsize=(18,9))
axes[0].plot(df['rmse'].values)
axes[0].twinx().plot(np.diff(df['sequence']) != 0, color='red' )
axes[1].plot(np.abs(np.diff(df['rmse'].values)))
#axes[1].twinx().plot(np.diff(df_check['sequence']) != 0, color='red' )
plt.show()

In [None]:
df['rmse_diff_abs'] = df['rmse'].diff().abs()
df_seq_place = df.loc[np.diff(df['sequence'], prepend=0) != 0].copy()

In [None]:
df_seq_place[['sequence','rmse_diff_abs']]#.min()

In [None]:
prev_img