# Merging public bounding box datasets 👨‍🔬
A lot of public notebooks / datasets are providing labeled images to crop the dataset. Let's merge them together !<br>
We may either want to use only the manual annotations to fine-tune a YOLOv5 model, or use all of them to directly create a cropped dataset

## Sources for this notebook
* https://www.kaggle.com/bsridatta/happierwhale
* https://www.kaggle.com/phalanx/whale2-cropped-dataset
* https://www.kaggle.com/awsaf49/happywhale-boundingbox-yolov5
* https://www.kaggle.com/yusukesueyoshi/happy-whale-and-dolphin-anotated-in-yolov5-format
* http://happywhale.theoboyer.fr/data/dataset.csv
* https://www.kaggle.com/jpbremer/fullbodywhaleannotations

<div style="background-color: #c75a5a; border-radius: 10px; text-align: center; padding: 25px">
        Previous versions were wrong ! I fixed the interpretation of the yolo coordinates in the last one
</div>

In [None]:
import numpy as np
import pandas as pd
import os
import json
import matplotlib.pyplot as plt
import cv2
from random import random

In [None]:
total_data = {
    "image": [],
    "xmin": [],
    "ymin": [],
    "xmax": [],
    "ymax": [],
    "source": [],
    "type": []
}

def submit_annotation(image, x1, y1, x2, y2, source, i_type="train", only_if_new=False):
    if x1 == x2 or y1 == y2 or (only_if_new and image in total_data["image"]):
        return
    #assert x1 >= 0
    #assert x2 >= 0
    #assert y1 <= 1
    #assert y2 <= 1
    total_data["image"].append(image)
    total_data["xmin"].append(min(x1, x2))
    total_data["ymin"].append(min(y1, y2))
    total_data["xmax"].append(max(x1, x2))
    total_data["ymax"].append(max(y1, y2))
    total_data["source"].append(source)
    total_data["type"].append(i_type)
    
def plot_bbox(image, x1, y1, x2, y2):
    path = os.path.join("../input/happy-whale-and-dolphin/train_images", image)
    image = cv2.imread(path)
    xmin = int(min(x1, x2) * image.shape[1])
    ymin = int(min(y1, y2) * image.shape[0])
    xmax = int(max(x1, x2) * image.shape[1])
    ymax = int(max(y1, y2) * image.shape[0])
    image = cv2.rectangle(
        image,
        (xmin, ymin),
        (xmax, ymax),
        (0,255,255),
        3
    )
    plt.imshow(image[:, :, ::-1])
    plt.show()
    
def plot_bbox_dic(x):
    plot_bbox(
        x["image"],
        x["xmin"],
        x["ymin"],
        x["xmax"],
        x["ymax"]
    )
    
def dump_dataset():
    return pd.DataFrame(total_data)

In [None]:
path = "../input/extra-happywhale-metadata"
data = pd.read_csv(os.path.join(path, "train.csv"), index_col="image")
TRAIN_IMG_HEIGHT = data["img_height"].to_dict()
TRAIN_IMG_WIDTH = data["img_width"].to_dict()
data = pd.read_csv(os.path.join(path, "test.csv"), index_col="image")
TEST_IMG_HEIGHT = data["img_height"].to_dict()
TEST_IMG_WIDTH = data["img_width"].to_dict()

In [None]:
path = "../input/happierwhale/yolo_annotations/yolo_annotations"
for f in os.listdir(path):
    with open(os.path.join(path, f), 'r') as file:
        d = file.read()
    if len(d):
        _, x, y, w, h = tuple(map(float, d.split("\n")[0].split(" ")))
        x1 = x - w / 2
        y1 = y - h / 2
        x2 = x + w / 2
        y2 = y + h / 2
        submit_annotation(f[:-3] + "jpg", x1, y1, x2, y2, "happierwhale")

In [None]:
path = "../input/happy-whale-and-dolphin-anotated-in-yolov5-format/whale/"
lpath = os.path.join(path, "train", "labels")
for f in os.listdir(lpath):
    with open(os.path.join(lpath, f), 'r') as file:
        d = file.read()
    if len(d):
        _, x, y, w, h = tuple(map(float, d.split("\n")[0].split(" ")))
        x1 = x - w / 2
        y1 = y - h / 2
        x2 = x + w / 2
        y2 = y + h / 2
        submit_annotation(f.split("_")[0] + ".jpg", x1, y1, x2, y2, "manual_yolo")

lpath = os.path.join(path, "valid", "labels")
for f in os.listdir(lpath):
    with open(os.path.join(lpath, f), 'r') as file:
        d = file.read()
    if len(d):
        _, x, y, w, h = tuple(map(float, d.split("\n")[0].split(" ")))
        x1 = x - w / 2
        y1 = y - h / 2
        x2 = x + w / 2
        y2 = y + h / 2
        submit_annotation(f.split("_")[0] + ".jpg", x1, y1, x2, y2, "manual_yolo")

In [None]:
path = "../input/happywhales-labelme-segmentation-dataset"

with open(os.path.join(path, "train.json")) as f:
    data = json.load(f)
    
for image, annotation in zip(data["images"], data["annotations"]):
    bbox = annotation["bbox"]
    x1 = bbox[0] / image["width"]
    y1 = bbox[1] / image["height"]
    w = bbox[2] / image["width"]
    h = bbox[3] / image["height"]
    
    x2 = x1 + w
    y2 = y1 + h
    
    x1 = max(x1, 0)
    y1 = max(y1, 0)
    x2 = min(x2, 1)
    y2 = min(y2, 1)
    
    submit_annotation(
        image["file_name"],
        x1, y1,
        x2, y2, 
        "segmentation"
    )
    
with open(os.path.join(path, "val.json")) as f:
    data = json.load(f)
    
for image, annotation in zip(data["images"], data["annotations"]):
    bbox = annotation["bbox"]
    
    x1 = bbox[0] / image["width"]
    y1 = bbox[1] / image["height"]
    w = bbox[2] / image["width"]
    h = bbox[3] / image["height"]
    
    x2 = x1 + w
    y2 = y1 + h
    
    x1 = max(x1, 0)
    y1 = max(y1, 0)
    x2 = min(x2, 1)
    y2 = min(y2, 1)
    
    submit_annotation(
        image["file_name"],
        x1, y1,
        x2, y2, 
        "segmentation"
    )

In [None]:
data = pd.read_csv("../input/fullbodywhaleannotations/fullbody_annotations.csv")
data["width"] = data["filename"].map(TRAIN_IMG_WIDTH)
data["height"] = data["filename"].map(TRAIN_IMG_HEIGHT)
for _, x in data.iterrows():
    x1 = x["x"] / x["width"]
    y1 = x["y"] / x["height"]
    x2 = (x["x"] + x["w"]) / x["width"]
    y2 = (x["y"] + x["h"]) / x["height"]
    
    x1 = max(0, x1)
    y1 = max(0, y1)
    x2 = min(1, x2)
    y2 = min(1, y2)
    
    submit_annotation(
        x["filename"],
        x1, y1,
        x2, y2,
        "fullbody",
        i_type="train" if x["filename"] in TRAIN_IMG_WIDTH else "test"
    )


In [None]:
!curl http://happywhale.theoboyer.fr/data/annotations.csv > annotations.csv
data = pd.read_csv("annotations.csv")
data = data[data["judge_decision"] == "accepted"]
!rm annotations.csv

def submit(x):
    submit_annotation(
        x["image"],
        x["x1"], x["y1"],
        x["x2"], x["y2"],
        "crowd_source",
        i_type="train",
        only_if_new=True
    )

_ = data.apply(submit, axis=1)

In [None]:
path = "../input/happywhale-boundingbox-yolov5/"

data = pd.read_csv(os.path.join(path, "train.csv"))
data = data[~data["bbox"].isna()]
data["bbox"] = data["bbox"].map(eval).map(lambda x: x[0])
data["x1"] = data["bbox"].apply(lambda x: float(x[0])) / data["width"]
data["y1"] = data["bbox"].apply(lambda x: float(x[1])) / data["height"]
data["x2"] = data["bbox"].apply(lambda x: float(x[2])) / data["width"]
data["y2"] = data["bbox"].apply(lambda x: float(x[3])) / data["height"]

def submit(x):
    submit_annotation(
        x["image"],
        x["x1"], x["y1"],
        x["x2"], x["y2"],
        "YOLOv5",
        i_type="train"
    )

_ = data.apply(submit, axis=1)

data = pd.read_csv(os.path.join(path, "test.csv"))
data = data[~data["bbox"].isna()]
data["bbox"] = data["bbox"].map(eval).map(lambda x: x[0])
data["x1"] = data["bbox"].apply(lambda x: float(x[0])) / data["width"]
data["y1"] = data["bbox"].apply(lambda x: float(x[1])) / data["height"]
data["x2"] = data["bbox"].apply(lambda x: float(x[2])) / data["width"]
data["y2"] = data["bbox"].apply(lambda x: float(x[3])) / data["height"]

def submit(x):
    submit_annotation(
        x["image"],
        x["x1"], x["y1"],
        x["x2"], x["y2"],
        "YOLOv5",
        i_type="test"
    )

_ = data.apply(submit, axis=1)

In [None]:
path = "../input/whale2-cropped-dataset"
data = pd.read_csv(os.path.join(path, "train2.csv"))
data = data[~data["box"].isna()]
data["box"] = data["box"].apply(lambda x: x.split(" "))
data["width"] = data["image"].map(TRAIN_IMG_WIDTH)
data["height"] = data["image"].map(TRAIN_IMG_HEIGHT)
data["x1"] = data["box"].apply(lambda x: float(x[0])) / data["width"]
data["y1"] = data["box"].apply(lambda x: float(x[1])) / data["height"]
data["x2"] = data["box"].apply(lambda x: float(x[2])) / data["width"]
data["y2"] = data["box"].apply(lambda x: float(x[3])) / data["height"]

def submit(x):
    submit_annotation(
        x["image"],
        x["x1"], x["y1"],
        x["x2"], x["y2"],
        "detic",
        i_type="train"
    )

_ = data.apply(submit, axis=1)

data = pd.read_csv(os.path.join(path, "test2.csv"))
data = data[~data["box"].isna()]
data["box"] = data["box"].apply(lambda x: x.split(" "))
data["width"] = data["image"].map(TEST_IMG_WIDTH)
data["height"] = data["image"].map(TEST_IMG_HEIGHT)
data["x1"] = data["box"].apply(lambda x: float(x[0])) / data["width"]
data["y1"] = data["box"].apply(lambda x: float(x[1])) / data["height"]
data["x2"] = data["box"].apply(lambda x: float(x[2])) / data["width"]
data["y2"] = data["box"].apply(lambda x: float(x[3])) / data["height"]

def submit(x):
    submit_annotation(
        x["image"],
        x["x1"], x["y1"],
        x["x2"], x["y2"],
        "detic",
        i_type="test"
    )

_ = data.apply(submit, axis=1)

# How to solve conflict
We will solve conflicts by being as conservative as possible, meaning that if we have to bounding boxes for one image, we will keep the minimum box that includes both of them

In [None]:
manual_sources = ["happierwhale", "fullbody", "crowd_source"]

In [None]:
data = dump_dataset()
data.describe()

In [None]:
manual_dataset = data[data["source"].isin(manual_sources)]
manual_dataset = manual_dataset.groupby("image").agg({
    "xmin": "min",
    "ymin": "min",
    "xmax": "max",
    "ymax": "max"
})
manual_dataset["image"] = manual_dataset.index
manual_dataset["x1"] = manual_dataset["xmin"]
manual_dataset["y1"] = manual_dataset["ymin"]
manual_dataset["x2"] = manual_dataset["xmax"]
manual_dataset["y2"] = manual_dataset["ymax"]
manual_dataset = manual_dataset[["image", "x1", "y1", "x2", "y2"]]
manual_dataset.to_csv("dataset.csv", index=False)
manual_dataset.describe()

# Visual random check 👀

In [None]:
for _, x in data[data["source"].isin(manual_sources)].sample(15).iterrows():
    plt.title(x["image"] + " from " + x["source"])
    plot_bbox_dic(x)

![](https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Thats_all_folks.svg/2560px-Thats_all_folks.svg.png)