In [None]:
# norfair dependencies
%cd /kaggle/input/norfair031py3/
!pip install commonmark-0.9.1-py2.py3-none-any.whl -f ./ --no-index
!pip install rich-9.13.0-py3-none-any.whl

!mkdir /kaggle/working/tmp
!cp -r /kaggle/input/norfair031py3/filterpy-1.4.5/filterpy-1.4.5/ /kaggle/working/tmp/
%cd /kaggle/working/tmp/filterpy-1.4.5/
!pip install .
!rm -rf /kaggle/working/tmp

# norfair
%cd /kaggle/input/norfair031py3/
!pip install norfair-0.3.1-py3-none-any.whl -f ./ --no-index
%cd ..

In [None]:
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import glob
import shutil
import sys
sys.path.append('../input/tensorflow-great-barrier-reef')
import torch
from PIL import Image
import ast
import albumentations as albu

In [None]:
#ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'
#CKPT_PATH = '../input/yolov5m62880-cut-2880-fineturn/exp_yolov5m6_mx05_fp_2880_f2_cut_2880_fineturn/weights/best.pt'
#IMG_SIZE  = 8800
#CONF      = 0.17
#IOU       = 0.4
#dis_thr   = 30
#hit_min   = 3
#hit_max   = 6
#ini_dey   = 1
#AUGMENT   = True
#do_tracking = True
#FDA_aug = False
## CKPT_PATH = '../input/yolov5l6-fineturn/exp_yolov5l6_mx05_fp_1920_f2_1440_fineturn/weights/best.pt'



In [None]:
ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'
CKPT_PATH = '../input/yolov5l6-fineturn/exp_yolov5l6_mx05_fp_1920_f2_1440_fineturn/weights/best.pt'
IMG_SIZE  = 3600
CONF      = 0.01 #0.17
IOU       = 0.4
dis_thr   = 30
hit_min   = 3
hit_max   = 6
ini_dey   = 1
AUGMENT   = False
do_tracking = True
FDA_aug = False

In [None]:
def voc2yolo(bboxes, image_height=720, image_width=1280):
    """
    voc  => [x1, y1, x2, y1]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes

def yolo2voc(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

def coco2yolo(bboxes, image_height=720, image_width=1280):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normolizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # converstion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes

def voc2coco(bboxes, image_height=720, image_width=1280):
    bboxes  = voc2yolo(bboxes, image_height, image_width)
    bboxes  = yolo2coco(bboxes, image_height, image_width)
    return bboxes


def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness)
            
    elif bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')

    return image

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row

np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

In [None]:
##############################################################
#                      Tracking helpers                      #
##############################################################

import numpy as np
from norfair import Detection, Tracker

# Helper to convert bbox in format [x_min, y_min, x_max, y_max, score] to norfair.Detection class
def to_norfair(detects, frame_id):
    result = []
    for x_min, y_min, x_max, y_max, score in detects:
        xc, yc = (x_min + x_max) / 2, (y_min + y_max) / 2
        w, h = x_max - x_min, y_max - y_min
        result.append(Detection(points=np.array([xc, yc]), scores=np.array([score]), data=np.array([w, h, frame_id])))
        
    return result

# Euclidean distance function to match detections on this frame with tracked_objects from previous frames
def euclidean_distance(detection, tracked_object):
    return np.linalg.norm(detection.points - tracked_object.estimate)



In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
def load_model(ckpt_path, conf=0.28, iou=0.40):
    model = torch.hub.load('/kaggle/input/yolov5-lib-ds',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=True)  # local repo
    model.conf = conf  # NMS confidence threshold
    model.iou  = iou  # NMS IoU threshold
    model.classes = None   # (optional list) filter by class, i.e. = [0, 15, 16] for persons, cats and dogs
    model.multi_label = False  # NMS multiple labels per box
    model.max_det = 20  # maximum number of detections per image
    return model

# 🔭 Inference

## Helper

In [None]:
def predict(model, img, size=9000, augment=False):
    height, width = img.shape[:2]
    results = model(img, size=size, augment=augment)  # custom inference size
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes):
        bboxes  = voc2coco(bboxes,height,width).astype(int)
        confs   = preds.confidence.values
        return bboxes, confs
    else:
        return [],[]
    
def format_prediction(bboxes, confs):
    annot = ''
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
        annot = annot.strip(' ')
    return annot

def show_img(img, bboxes, bbox_format='yolo'):
    names  = ['starfish']*len(bboxes)
    labels = [0]*len(bboxes)
    img    = draw_bboxes(img = img,
                           bboxes = bboxes, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = bbox_format,
                           line_thickness = 2)
    return Image.fromarray(img).resize((800, 400))

In [None]:
def tracking_function(tracker, frame_id, bboxes, scores):
    
    detects = []
    predictions = []
    
    if len(scores)>0:
        for i in range(len(bboxes)):
            box = bboxes[i]
            score = scores[i]
            x_min = int(box[0])
            y_min = int(box[1])
            bbox_width = int(box[2])
            bbox_height = int(box[3])
            ##处理极小框及比例极大框
            w_rate = bbox_width/bbox_height
            h_rate = bbox_height/bbox_width
            
            if ((bbox_width > 12) and (bbox_height > 12) and (0.2<w_rate<5)and (0.25<h_rate<5)):
                detects.append([x_min, y_min, x_min+bbox_width, y_min+bbox_height, score])
                predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
#             print(predictions[:-1])
    # Update tracks using detects from current frame
    tracked_objects = tracker.update(detections=to_norfair(detects, frame_id))
    for tobj in tracked_objects:
        bbox_width, bbox_height, last_detected_frame_id = tobj.last_detection.data
        if last_detected_frame_id == frame_id:  # Skip objects that were detected on current frame
            continue
        # Add objects that have no detections on current frame to predictions
        xc, yc = tobj.estimate[0]
        x_min, y_min = int(round(xc - bbox_width / 2)), int(round(yc - bbox_height / 2))
        score = tobj.last_detection.scores[0]

        predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
        
    return predictions

Stage 2 model

In [None]:
import sys
sys.path.append("../input/tez-lib/")
sys.path.append("../input/timmmaster/")

import tez
import albumentations
import pandas as pd
import cv2
import numpy as np
import timm
import torch.nn as nn
from sklearn import metrics
import torch
from tez.callbacks import EarlyStopping
from tqdm import tqdm

In [None]:
class args:
    batch_size = 64
    image_size = 96
    fold = 0

class StarfishDataset:
    def __init__(self, images,  targets, augmentations):
        self.images = images
        # self.dense_features = dense_features
        self.targets = targets
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, item):
        # image = cv2.imread(self.image_paths[item])
        
        image = self.images[item]
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        # features = self.dense_features[item, :]
        targets = self.targets[item]
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            # "features": torch.tensor(features, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.float),
        }

class StarfishModel(tez.Model):
    def __init__(self, model_name):
        super().__init__()

        self.model = timm.create_model(model_name, pretrained=False, in_chans=3)
        self.model.classifier = nn.Linear(self.model.classifier.in_features, 64)
        self.dropout = nn.Dropout(0.1)
        self.dense1 = nn.Linear(64, 32)
        self.dense2 = nn.Linear(32, 1)

    def forward(self, image, targets=None):

        x = self.model(image)
        x = self.dropout(x)
        # x = torch.cat([x, features], dim=1)
        x = self.dense1(x)
        x = torch.relu(x)
        x = self.dense2(x)
        return x, 0, {}
    
    


In [None]:
model_stage2 = StarfishModel(model_name="tf_efficientnet_b0_ns")
model_stage2.load(f"../input/starfish-model/model_f{args.fold}.bin", device="cuda", weights_only=True)

## Init `Env`

In [None]:
import greatbarrierreef
env = greatbarrierreef.make_env()# initialize the environment
iter_test = env.iter_test()      # an iterator which loops over the test set and sample submission

In [None]:
cd ../working

## Run Inference on **Test**

In [None]:
#lets modify the function a bit so it doesn't show the pics each time. will speed it up a bit. Also no need to flip the BGR to RGB in the loop anymore.
def img_bb_cropper(img, annotation_fixed):
    '''Accepts an image path as a string and an annotation as a stringified list of dictionaries.
    output is saving the file to the /'''
    #get image name from the path
    #img_name = img_path[57:-4].replace('/','-')
    
    #get img from url
    #img = cv2.imread(img_path)  #[:,:,::-1]
    
    #fix stringified list annotation
    #annotation_fixed = ast.literal_eval(annotation)
    
    #save the cots image from each annotated bounding box to the crops folder
    starfish_imgs = []
    confs = []
    boxes = []
    ann_counter = 0
    for ann in annotation_fixed:
        ann_box = list(map(int,ann.split(' ')[1:]))
        conf = ann.split(' ')[0]
        x,y,w,h = ann_box[0], ann_box[1], ann_box[2], ann_box[3]
        if w <= 0 or h<=0:
            continue
        if x + w >= 1280 or y + h >= 720:
            continue
        if x + w <= 0 or y + h <= 0:
            continue
        if x <= 0:
            delta = w - abs(x)
            x = 0
            w = delta
            if w <= 0:
                continue
        if y <= 0:
            delta = h - abs(y)
            y = 0
            h = delta 
            if h <= 0:
                continue
        confs.append(conf)
        boxes.append([x,y,w,h])
        # x,y,w,h = ann['x'], ann['y'], ann['width'], ann['height']
        cropped_img = img[y:y+h,x:x+w]
        # print([x,y,w,h])
        starfish_imgs.append(cropped_img)
        # cv2.imwrite(f'cots_crops/cotscrop-{img_name}-{ann_counter}.jpg',cropped_img)
        ann_counter+=1
    return starfish_imgs, confs, boxes

In [None]:
def predict_starfish_score(test_images, test_aug):
    
    test_dataset = StarfishDataset(
        images=test_images,
        targets=np.ones(len(test_images)),
        augmentations=test_aug,
    )
    test_predictions = model_stage2.predict(test_dataset, batch_size=2*args.batch_size, n_jobs=-1)

    final_test_predictions = []
    for preds in test_predictions:
        final_test_predictions.extend(preds.ravel().tolist())

    score = final_test_predictions
    return score

In [None]:
test_aug = albumentations.Compose(
    [
        albumentations.Resize(args.image_size, args.image_size, p=1),
        albumentations.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)


tracker = Tracker(
    distance_function=euclidean_distance, 
    distance_threshold=dis_thr,#30
    hit_inertia_min=hit_min,#3
    hit_inertia_max=hit_max,#6
    initialization_delay=ini_dey,#1
)

model = load_model(CKPT_PATH, conf=CONF, iou=IOU)

if do_tracking:

    frame_id =0
    for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
        if FDA_aug:
            img = FDA_trans(image=img)['image']
        bboxes, confs  = predict(model, img, size=IMG_SIZE, augment=AUGMENT)
        #len(bboxes)
        predictions = tracking_function(tracker, frame_id, bboxes, confs)
        # print(predictions)
        images_list, confs, bboxes = img_bb_cropper(img ,predictions)
        #len(images_list)
        scores = predict_starfish_score(images_list, test_aug)
        #len(scores)
        predictions = []
        for i in range(len(scores)):
            if scores[i] > 0.5:
                box = bboxes[i]
                predictions.append('{:.2f} {} {} {} {}'.format(float(confs[i]), int(box[0]), int(box[1]), int(box[2]), int(box[3])))

        prediction_str = ' '.join(predictions)
        pred_df['annotations'] = prediction_str
        env.predict(pred_df)
        if frame_id < 3:
            if len(predictions)>0:
                box = [list(map(int,box.split(' ')[1:])) for box in predictions]
            else:
                box = []
            display(show_img(img, box, bbox_format='coco'))
    #     print('Prediction:', pred_df)
        frame_id += 1
else:
    for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
        bboxes, confs  = predict(model, img, size=IMG_SIZE, augment=AUGMENT)
        annot          = format_prediction(bboxes, confs)
        pred_df['annotations'] = annot
        env.predict(pred_df)
        if idx<3:
            display(show_img(img, bboxes, bbox_format='coco'))



In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()