## 📒 inferecne Notebooks:
* Train: [Great-Barrier-Reef: YOLOv5 [train] 🌊](https://www.kaggle.com/awsaf49/great-barrier-reef-yolov5-train)
* Infer: [Great-Barrier-Reef: YOLOv5 [infer] 🌊](https://www.kaggle.com/awsaf49/great-barrier-reef-yolov5-infer)
* F2 score : [competition metric implementation](https://www.kaggle.com/bamps53/competition-metric-implementation)

## Import Library

In [None]:
from itertools import groupby
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
import pickle
import cv2
from multiprocessing import Pool
import matplotlib.pyplot as plt
# import cupy as cp
import ast
import glob

import shutil
import sys
sys.path.append('../input/tensorflow-great-barrier-reef')

from joblib import Parallel, delayed

from IPython.display import display, HTML

from matplotlib import animation, rc
rc('animation', html='jshtml')

%matplotlib inline

## Define model weight, validation images, labels

In [None]:
os.listdir('/kaggle/input/great-barrier-reef-yolov5-train/')

In [None]:
VAL_TXT = '/kaggle/input/great-barrier-reef-yolov5-train/val.txt'
TRAIN_TXT = '/kaggle/input/great-barrier-reef-yolov5-train/train.txt'
BEST_PT = '/kaggle/input/great-barrier-reef-yolov5-train/yolov5/runs/train/exp/weights/best.pt' # best wegith of yolov5 model trained in great-barrier-reef


## Make images, labels directory on /kaggle
refer to https://www.kaggle.com/awsaf49/great-barrier-reef-yolov5-train

In [None]:
ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'
IMAGE_DIR = '/kaggle/images/' # directory to save images
LABEL_DIR = '/kaggle/labels/' # directory to save labels
!mkdir -p {IMAGE_DIR}
!mkdir -p {LABEL_DIR}

In [None]:
def get_path(row):
    row['old_image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    row['image_path'] = f'{IMAGE_DIR}/video_{row.video_id}_{row.video_frame}.jpg'
    row['label_path'] = f'{LABEL_DIR}/video_{row.video_id}_{row.video_frame}.txt'
    return row

In [None]:
# Train Data
df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df = df.progress_apply(get_path, axis=1)
df['annotations'] = df['annotations'].progress_apply(lambda x: ast.literal_eval(x))
display(df.head(2))

In [None]:
df['num_bbox'] = df['annotations'].progress_apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts(normalize=True)*100
print(f"No BBox: {data[0]:0.2f}% | With BBox: {data[1]:0.2f}%")

In [None]:
REMOVE_NOBBOX=True
if REMOVE_NOBBOX:
    df = df.query("num_bbox>0")

In [None]:
def make_copy(path):
    data = path.split('/')
    filename = data[-1]
    video_id = data[-2]
    new_path = os.path.join(IMAGE_DIR,f'{video_id}_{filename}')
    shutil.copy(path, new_path)
    return

In [None]:
image_paths = df.old_image_path.tolist()
_ = Parallel(n_jobs=-1, backend='threading')(delayed(make_copy)(path) for path in tqdm(image_paths))

In [None]:
def voc2yolo(image_height, image_width, bboxes):
    """
    voc  => [x1, y1, x2, y1]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    print(img_name)
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes

def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

def coco2yolo(image_height, image_width, bboxes):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normolizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # converstion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes


def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness)
            
    elif bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')

    return image

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row


# https://www.kaggle.com/diegoalejogm/great-barrier-reefs-eda-with-animations
def create_animation(ims):
    fig = plt.figure(figsize=(16, 12))
    plt.axis('off')
    im = plt.imshow(ims[0])

    def animate_func(i):
        im.set_array(ims[i])
        return [im]

    return animation.FuncAnimation(fig, animate_func, frames = len(ims), interval = 1000//12)

np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

In [None]:
df['bboxes'] = df.annotations.progress_apply(get_bbox)
df.head(2)

In [None]:
df['width']  = 1280
df['height'] = 720
display(df.head(2))

In [None]:
cnt = 0
all_bboxes = []
for row_idx in tqdm(range(df.shape[0])):
    row = df.iloc[row_idx]
    image_height = row.height
    image_width  = row.width
    bboxes_coco  = np.array(row.bboxes).astype(np.float32).copy()
    num_bbox     = len(bboxes_coco)
    names        = ['cots']*num_bbox
    labels       = [0]*num_bbox
    ## Create Annotation(YOLO)
    with open(row.label_path, 'w') as f:
        if num_bbox<1:
            annot = ''
            f.write(annot)
            cnt+=1
            continue
        bboxes_yolo  = coco2yolo(image_height, image_width, bboxes_coco)
        bboxes_yolo  = np.clip(bboxes_yolo, 0, 1)
        all_bboxes.extend(bboxes_yolo)
        for bbox_idx in range(len(bboxes_yolo)):
            annot = [str(labels[bbox_idx])]+ list(bboxes_yolo[bbox_idx].astype(str))+(['\n'] if num_bbox!=(bbox_idx+1) else [''])
            annot = ' '.join(annot)
            annot = annot.strip(' ')
            f.write(annot)
print('Missing:',cnt)

In [None]:
# images, labels folder check
assert os.path.exists(IMAGE_DIR)
assert os.path.exists(LABEL_DIR)

## YOLOV5 install

In [None]:
%cd /kaggle/working
!rm -r /kaggle/working/yolov5
!git clone https://github.com/ultralytics/yolov5 # clone
%cd yolov5
%pip install -qr requirements.txt  # install

from yolov5 import utils
display = utils.notebook_init()  # check

## Run YOLOV5 model in validation images 

In [None]:
!cd yolov5

In [None]:
os.listdir('/kaggle/input/great-barrier-reef-yolov5-train/yolov5/runs/train/exp/weights/')

In [None]:
# move files to /kaggle/working
!cp /kaggle/input/great-barrier-reef-yolov5-train/yolov5/runs/train/exp/weights/best.pt /kaggle/working/
!cp /kaggle/input/great-barrier-reef-yolov5-train/train.txt /kaggle/working/
!cp /kaggle/input/great-barrier-reef-yolov5-train/val.txt /kaggle/working/
!cp /kaggle/input/great-barrier-reef-yolov5-train/bgr.yaml /kaggle/working/

In [None]:
os.listdir('/kaggle/working')

In [None]:
import val
!python val.py --data ../bgr.yaml\
    --weights ../best.pt\
    --imgsz 1280\
    --conf-thres 0.01\
    --iou-thres 0.3\
    --save-txt\
    --save-conf\
    --exist-ok

## Check how predicted bounding box is created

In [None]:
# val bbox result directory
PRD_BBOX_DIR = '/kaggle/working/yolov5/runs/val/exp/labels/'
print(f'made bounding box of {len(os.listdir(PRD_BBOX_DIR))} images in validation set ')

### why predicted bounding box txt file for some images doesn't exist?

In [None]:
val_images = []
with open('/kaggle/working/val.txt', 'r') as f:
    while True:
        r = f.readline().rstrip()
        if not r:
            break
        val_images.append(os.path.basename(r))
print(f'{len(val_images)} image in validation set')

In [None]:
not_processed_images = val_images.copy()
for file in os.listdir(PRD_BBOX_DIR):
    img_name = file[:-4]+'.jpg'
    if img_name in val_images:
        not_processed_images.remove(img_name)
print(f"yolov5 model doesn't create bounding box for {len(not_processed_images)} images")

model didn't detect starfish in "not_processed_images" - it will be calculated as False Negative(FN)

run code to know that there exist ground truth bounding boxs in "not_processed_images"

In [None]:
# model didn't detect starfish in "not_processed_images" - it will be calculated as False Negative(FN)
# run code to know that there exist ground truth bounding boxs in "not_processed_images"
# in fact, /kaggle/images/ only include images which have bounding boxs
for image_name in not_processed_images[:20]:
    img = cv2.imread('/kaggle/images/'+image_name)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.title(image_name)
    plt.show()
    txt_name = image_name[:-4]+'.txt'
    with open('/kaggle/labels/'+txt_name, 'r') as f:
        r = f.read()
        count = r.count('\n')+1
        print(f"{count} ground truth bounding box exits")

## Calculate F2 score on validation set
reference : [competition metric implementation](https://www.kaggle.com/bamps53/competition-metric-implementation)

In [None]:
def calc_iou(bboxes1, bboxes2, bbox_mode='xywh'):
    assert len(bboxes1.shape) == 2 and bboxes1.shape[1] == 4
    assert len(bboxes2.shape) == 2 and bboxes2.shape[1] == 4
    
    bboxes1 = bboxes1.copy()
    bboxes2 = bboxes2.copy()
    
    if bbox_mode == 'xywh':
        bboxes1[:, 2:] += bboxes1[:, :2]
        bboxes2[:, 2:] += bboxes2[:, :2]

    x11, y11, x12, y12 = np.split(bboxes1, 4, axis=1)
    x21, y21, x22, y22 = np.split(bboxes2, 4, axis=1)
    xA = np.maximum(x11, np.transpose(x21))
    yA = np.maximum(y11, np.transpose(y21))
    xB = np.minimum(x12, np.transpose(x22))
    yB = np.minimum(y12, np.transpose(y22))
    interArea = np.maximum((xB - xA + 1), 0) * np.maximum((yB - yA + 1), 0)
    boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1)
    boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1)
    iou = interArea / (boxAArea + np.transpose(boxBArea) - interArea)
    return iou

def f_beta(tp, fp, fn, beta=2):
    return (1+beta**2)*tp / ((1+beta**2)*tp + beta**2*fn+fp)

def calc_is_correct_at_iou_th(gt_bboxes, pred_bboxes, iou_th, verbose=False):
    gt_bboxes = gt_bboxes.copy()
    pred_bboxes = pred_bboxes.copy()
    
    tp = 0
    fp = 0
    for k, pred_bbox in enumerate(pred_bboxes): # fixed in ver.7
        ious = calc_iou(gt_bboxes, pred_bbox[None, 1:])
        max_iou = ious.max()
        if max_iou > iou_th:
            tp += 1
            gt_bboxes = np.delete(gt_bboxes, ious.argmax(), axis=0)
        else:
            fp += 1
        if len(gt_bboxes) == 0:
            fp += len(pred_bboxes) - (k + 1) # fix in ver.7
            break

    fn = len(gt_bboxes)
    return tp, fp, fn

def calc_is_correct(gt_bboxes, pred_bboxes, iou_th=0.5):
    """
    gt_bboxes: (N, 4) np.array in xywh format
    pred_bboxes: (N, 5) np.array in conf+xywh format
    """
    if len(gt_bboxes) == 0 and len(pred_bboxes) == 0:
        tps, fps, fns = 0, 0, 0
        return tps, fps, fns

    elif len(gt_bboxes) == 0:
        tps, fps, fns = 0, len(pred_bboxes), 0
        return tps, fps, fns

    elif len(pred_bboxes) == 0:
        tps, fps, fns = 0, 0, len(gt_bboxes)
        return tps, fps, fns

    pred_bboxes = pred_bboxes[pred_bboxes[:,0].argsort()[::-1]] # sort by conf

    tps, fps, fns = 0, 0, 0
    tp, fp, fn = calc_is_correct_at_iou_th(gt_bboxes, pred_bboxes, iou_th)
    tps += tp
    fps += fp
    fns += fn
    return tps, fps, fns

def calc_f2_score(gt_bboxes_list, pred_bboxes_list, verbose=False):
    """
    gt_bboxes_list: list of (N, 4) np.array in xywh format
    pred_bboxes_list: list of (N, 5) np.array in conf+xywh format
    """
    f2s = []
    for iou_th in np.arange(0.3, 0.85, 0.05):
        tps, fps, fns = 0, 0, 0
        for gt_bboxes, pred_bboxes in zip(gt_bboxes_list, pred_bboxes_list):
            tp, fp, fn = calc_is_correct(gt_bboxes, pred_bboxes, iou_th)
            tps += tp
            fps += fp
            fns += fn
            if verbose:
                num_gt = len(gt_bboxes)
                num_pred = len(pred_bboxes)
                print(f'num_gt:{num_gt:<3} num_pred:{num_pred:<3} tp:{tp:<3} fp:{fp:<3} fn:{fn:<3}')
        f2 = f_beta(tps, fps, fns, beta=2)    
        print(f'f2@{iou_th}:{f2}')
        f2s.append(f2)
    return np.mean(f2s)

In [None]:
gt_bboxs_list, prd_bboxs_list = [], []
count = 0
for image_file in val_images:
    txt_name = image_file[:-4]+'.txt'
    gt_bboxs = []
    prd_bboxs = []
    with open(LABEL_DIR+txt_name, 'r') as f:
        while True:
            r = f.readline().rstrip()
            if not r:
                break
            r = r.split()[1:]
            bbox = np.array(list(map(float, r)))
            gt_bboxs.append(bbox)
    if os.path.exists(PRD_BBOX_DIR+txt_name):
        with open(PRD_BBOX_DIR+txt_name, 'r') as f:
            while True:
                r = f.readline().rstrip()
                if not r:
                    break
                r = r.split()[1:]
                r = [r[4], *r[:4]]
                bbox = np.array(list(map(float, r)))
                prd_bboxs.append(bbox)
    gt_bboxs, prd_bboxs = np.array(gt_bboxs), np.array(prd_bboxs)
    gt_bboxs_list.append(gt_bboxs)
    prd_bboxs_list.append(prd_bboxs)
    count += 1
print(f'{count} bound boxs appended to list')

In [None]:
score = calc_f2_score(gt_bboxs_list, prd_bboxs_list, verbose=False)

In [None]:
print(f'f2 score for validation set is {score}')