In [1]:
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import glob
import shutil
import sys
import torch
from sklearn.model_selection import GroupKFold
from PIL import Image
import ast
import sahi
from sahi.predict import get_sliced_prediction

# 📌 Key-Points
* One have to submit prediction using the provided **python time-series API**, which makes this competition different from previous Object Detection Competitions.
* Each prediction row needs to include all bounding boxes for the image. Submission is format seems also **COCO** which means `[x_min, y_min, width, height]`
* Copmetition metric `F2` tolerates some false positives(FP) in order to ensure very few starfish are missed. Which means tackling **false negatives(FN)** is more important than false positives(FP). 
$$F2 = 5 \cdot \frac{precision \cdot recall}{4\cdot precision + recall}$$

## Please Upvote if you find this Helpful

# 📖 Meta Data
* `train_images/` - Folder containing training set photos of the form `video_{video_id}/{video_frame}.jpg`.

* `[train/test].csv` - Metadata for the images. As with other test files, most of the test metadata data is only available to your notebook upon submission. Just the first few rows available for download.

* `video_id` - ID number of the video the image was part of. The video ids are not meaningfully ordered.
* `video_frame` - The frame number of the image within the video. Expect to see occasional gaps in the frame number from when the diver surfaced.
* `sequence` - ID of a gap-free subset of a given video. The sequence ids are not meaningfully ordered.
* `sequence_frame` - The frame number within a given sequence.
* `image_id` - ID code for the image, in the format `{video_id}-{video_frame}`
* `annotations` - The bounding boxes of any starfish detections in a string format that can be evaluated directly with Python. Does not use the same format as the predictions you will submit. Not available in test.csv. A bounding box is described by the pixel coordinate `(x_min, y_min)` of its lower left corner within the image together with its `width` and `height` in pixels --> (COCO format).

In [2]:
ROOT_DIR  = './tensorflow-great-barrier-reef'
CKPT_PATH = './yolov5/runs/train/exp67/weights/best.pt'
IMG_SIZE  = 2560
CONF      = 0.01
IOU       = 0.6
AUGMENT   = False

In [3]:
def get_path(row):
    row['image_path'] = f'{ROOT_DIR}/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    return row

In [4]:
# Train Data
df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df = df.progress_apply(get_path, axis=1)
df['annotations'] = df['annotations'].progress_apply(lambda x: ast.literal_eval(x))
display(df.head(2))

  0%|          | 0/23501 [00:00<?, ?it/s]

  0%|          | 0/23501 [00:00<?, ?it/s]

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,image_path
0,0,40258,0,0,0-0,[],./tensorflow-great-barrier-reef/train_images/v...
1,0,40258,1,1,0-1,[],./tensorflow-great-barrier-reef/train_images/v...


## Number of BBoxes

In [5]:
df['num_bbox'] = df['annotations'].progress_apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts()/len(df)*100
print(f"No BBox: {data[0]:0.2f}% | With BBox: {data[1]:0.2f}%")

  0%|          | 0/23501 [00:00<?, ?it/s]

No BBox: 79.07% | With BBox: 20.93%


In [6]:
NUM_FOLDS = 3
kf = GroupKFold(n_splits = NUM_FOLDS)
df = df.reset_index(drop=True)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(kf.split(df, y = df.video_id.tolist(), groups=df.video_id.tolist())):
    df.loc[val_idx, 'fold'] = fold

df.head(5)

Unnamed: 0,video_id,sequence,video_frame,sequence_frame,image_id,annotations,image_path,num_bbox,fold
0,0,40258,0,0,0-0,[],./tensorflow-great-barrier-reef/train_images/v...,0,2
1,0,40258,1,1,0-1,[],./tensorflow-great-barrier-reef/train_images/v...,0,2
2,0,40258,2,2,0-2,[],./tensorflow-great-barrier-reef/train_images/v...,0,2
3,0,40258,3,3,0-3,[],./tensorflow-great-barrier-reef/train_images/v...,0,2
4,0,40258,4,4,0-4,[],./tensorflow-great-barrier-reef/train_images/v...,0,2


# 🔨 Helper

In [7]:
def voc2yolo(bboxes, image_height=720, image_width=1280):
    """
    voc  => [x1, y1, x2, y1]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes

def yolo2voc(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

def coco2yolo(bboxes, image_height=720, image_width=1280):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normolizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # converstion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes

def voc2coco(bboxes, image_height=720, image_width=1280):
    bboxes  = voc2yolo(bboxes, image_height, image_width)
    bboxes  = yolo2coco(bboxes, image_height, image_width)
    return bboxes


def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None, conf=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        if conf:
            print(conf)
            cv2.putText(img, str(conf), (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
#         else:
#             cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, confs = None, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness,
                             conf = confs[idx])
            
    elif bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness,
                             conf = confs[idx])

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')
    
    return image

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row

np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

In [8]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

The syntax of the command is incorrect.
'cp' is not recognized as an internal or external command,
operable program or batch file.


In [9]:
def load_model(ckpt_path, conf=0.25, iou=0.50):
    model = torch.hub.load('./yolov5',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=True)  # local repo
    model.conf = conf  # NMS confidence threshold
    model.iou  = iou  # NMS IoU threshold
    model.classes = None   # (optional list) filter by class, i.e. = [0, 15, 16] for persons, cats and dogs
    model.multi_label = False  # NMS multiple labels per box
    model.max_det = 1000  # maximum number of detections per image
    return model

# 🔭 Inference

## Helper

In [10]:
def predict(model, img, size=1280, augment=False):
    height, width = img.shape[:2]
    results = model(img, size=size, augment=augment)  # custom inference size
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes):
        bboxes  = voc2coco(bboxes,height,width).astype(int)
        confs   = preds.confidence.values
        return bboxes, confs
    else:
        return [],[]
    
def format_prediction(bboxes, confs):
    annot = ''
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
        annot = annot.strip(' ')
    return annot

def show_img(img, bboxes, bboxes_true=False, scores=None, bbox_format='yolo'):
    names  = ['starfish']*len(bboxes)
    labels = [0]*len(bboxes)
    img    = draw_bboxes(img = img,
                           bboxes = bboxes, 
                           classes = names,
                           class_ids = labels,
                           confs = scores,
                           class_name = True, 
                           colors = (0, 255, 0), 
                           bbox_format = bbox_format,
                           line_thickness = 2)
    if bboxes_true:
        names_true  = ['starfish']*len(bboxes_true)
        labels_true = [0]*len(bboxes_true)
        img = draw_bboxes(img = img,
                               bboxes = bboxes_true, 
                               classes = names_true,
                               class_ids = labels_true,
                               class_name = True, 
                               colors = (255, 0, 0), 
                               bbox_format = bbox_format,
                               line_thickness = 2)
    return Image.fromarray(img).resize((800, 400))

In [11]:
sharp_filter = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
def gamma_enhance(img, gamma=1.0):
    lookUpTable = np.empty((1,256), np.uint8)
    for i in range(256):
        lookUpTable[0,i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
    return cv2.LUT(img, lookUpTable)

## Run Inference on **Train**

In [12]:
# model = load_model(CKPT_PATH, conf=CONF, iou=IOU)
# image_paths = df[df.num_bbox>1].iloc[0:500:100].image_path.tolist()
# for idx, path in enumerate(image_paths):
#     img = cv2.imread(path)[...,::-1]
#     bboxes, confis = predict(model, img, size=IMG_SIZE, augment=AUGMENT)
#     display(show_img(img, bboxes, bbox_format='coco'))
#     if idx>5:
#         break

In [13]:
from sahi.model import Yolov5DetectionModel
from sahi.predict import get_prediction, get_sliced_prediction, predict

In [14]:
annotations = df[df['annotations'].str.len()>1].annotations

In [15]:
area = 0
count = 0
width = 0
height = 0
max_w = 0
max_h = 0
for frame in annotations:
    for box in frame:
        area += box['width'] * box['height']
        width += box['width']
        height += box['height']
        max_w = max(box['width'], max_w)
        max_h = max(box['height'], max_h)
        count += 1
print("Average bbox area:", area/count, "Average Width:", width/count, "Average height:", height/count)
print("Max width and height:", max_w, max_h)

Average bbox area: 2198.0414422337035 Average Width: 47.68923821039903 Average height: 42.65362207321095
Max width and height: 141 126


In [19]:
# model = load_model(CKPT_PATH, conf=CONF, iou=IOU)
detection_model = Yolov5DetectionModel(
    model_path=CKPT_PATH,
    confidence_threshold=0.15,
    device="cuda:0"
)

image_paths = df[df.num_bbox>1].iloc[0:500:100].image_path.tolist()
image_paths.append(df.iloc[12716].image_path)

for idx, path in enumerate(image_paths):
    bboxes_true = [list(box.values()) for box in df[df.image_path == path].annotations.values[0]]
    result = get_sliced_prediction(
    path,
    detection_model,
    image_size = 2560,
    slice_height = 256,
    slice_width = 256,
    overlap_height_ratio = 0.2,
    overlap_width_ratio = 0.2
)
    bboxes = [pred.bbox.to_coco_bbox() for pred in result.object_prediction_list]
    scores = [pred.score.value for pred in result.object_prediction_list]
    img = np.array(result.image)
    display(show_img(img, bboxes, bboxes_true, scores, bbox_format='coco'))
    print(scores)

#     break
#     img = cv2.imread(path)[...,::-1]
#     result = get_sliced_prediction(
#     img,
#     model,
#     slice_height = 256,
#     slice_width = 256,
#     overlap_height_ratio = 0.2,
#     overlap_width_ratio = 0.2
# )
#     display(show_img(img, bboxes, bbox_format='coco'))
#     if idx>5:
#         break

AttributeError: 'NoneType' object has no attribute 'names'

In [None]:
# model = load_model(CKPT_PATH, conf=CONF, iou=IOU)

df_temp = df[df['fold'] == 0]
image_paths = df_temp[df_temp.num_bbox>1].iloc[0:500:100].image_path.tolist()
# image_paths.append(df.iloc[12716].image_path)

for idx, path in enumerate(image_paths):
    bboxes_true = [list(box.values()) for box in df_temp[df_temp.image_path == path].annotations.values[0]]
    result = get_sliced_prediction(
    path,
    detection_model,
    image_size = 2560,
    slice_height = 256,
    slice_width = 256,
    overlap_height_ratio = 0.2,
    overlap_width_ratio = 0.2
)
    bboxes = [pred.bbox.to_coco_bbox() for pred in result.object_prediction_list]
    scores = [pred.score.value for pred in result.object_prediction_list]
    img = np.array(result.image)
    display(show_img(img, bboxes, bboxes_true, bbox_format='coco'))
    print(scores)

#     break
#     img = cv2.imread(path)[...,::-1]
#     result = get_sliced_prediction(
#     img,
#     model,
#     slice_height = 256,
#     slice_width = 256,
#     overlap_height_ratio = 0.2,
#     overlap_width_ratio = 0.2
# )
#     display(show_img(img, bboxes, bbox_format='coco'))
#     if idx>5:
#         break

## Init `Env`

In [None]:
import greatbarrierreef
env = greatbarrierreef.make_env()# initialize the environment
iter_test = env.iter_test()      # an iterator which loops over the test set and sample submission

## Run Inference on **Test**

In [None]:
# model = load_model(CKPT_PATH, conf=CONF, iou=IOU)
# for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
#     bboxes, confs  = predict(model, img, size=IMG_SIZE, augment=AUGMENT)
#     annot          = format_prediction(bboxes, confs)
#     pred_df['annotations'] = annot
#     env.predict(pred_df)
# #     if idx<3:
# #         display(show_img(img, bboxes, bbox_format='coco'))

In [None]:
detection_model = Yolov5DetectionModel(
    model_path=CKPT_PATH,
    confidence_threshold=0.1,
    device="cuda:0"
)

for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
    result = get_sliced_prediction(
    img,
    detection_model,
#     image_size = 3600,
    slice_height = 256,
    slice_width = 256,
    overlap_height_ratio = 0.2,
    overlap_width_ratio = 0.2
)
    bboxes = [pred.bbox.to_coco_bbox() for pred in result.object_prediction_list]
    confs = [pred.score.value for pred in result.object_prediction_list]
    annot          = format_prediction(bboxes, confs)
    pred_df['annotations'] = annot
    env.predict(pred_df)
    if idx<3:
        display(show_img(img, bboxes, bbox_format='coco'))

# 👀 Check Submission

In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()

## Please Upvote if you find this Helpful

<img src="https://www.pngall.com/wp-content/uploads/2018/04/Under-Construction-PNG-File.png">