In [1]:
import numpy as np
from sklearn.cluster import KMeans
from bs4 import BeautifulSoup
from pathlib import Path
import PIL.Image

In [2]:
images = sorted(list(Path("data/masks/images").glob("**/*.png")))
masks = sorted(list(Path("data/masks/annotations").glob("**/*.xml")))

In [3]:
boxes = []
target_w, target_h = (416, 416)
for image, mask in zip(images, masks):
    image = PIL.Image.open(image).convert("RGB")
    orig_w, orig_h = image.size
    scale = min(target_w / orig_w, target_h / orig_h)
    new_w = int(orig_w * scale)
    new_h = int(orig_h * scale)
    pad_x = (target_w - new_w) // 2
    pad_y = (target_h - new_h) // 2
    with open(mask, "r") as f:
        data = f.read()
        soup = BeautifulSoup(data, "lxml")
        objects = soup.find_all("object")
    for obj in objects:
        xmin = int(obj.find("xmin").text)
        ymin = int(obj.find("ymin").text)
        xmax = int(obj.find("xmax").text)
        ymax = int(obj.find("ymax").text)
        xmin = int(xmin * scale + pad_x)
        ymin = int(ymin * scale + pad_y)
        xmax = int(xmax * scale + pad_x)
        ymax = int(ymax * scale + pad_y)
        box_w = xmax - xmin
        box_h = ymax - ymin
        if box_w > 0 and box_h > 0:
                boxes.append((box_w, box_h))
len(boxes)

4072

In [4]:
boxes = np.array(boxes)
kmeans = KMeans(n_clusters=9, init="k-means++", max_iter=300, random_state=42)
kmeans.fit(boxes)
anchors = kmeans.cluster_centers_
anchors = np.round(anchors).astype(int)
anchors

array([[ 17,  19],
       [128, 152],
       [ 55,  59],
       [ 78,  88],
       [ 26,  28],
       [ 37,  42],
       [182, 205],
       [ 10,  11],
       [103, 124]])

In [5]:
boxes[0]

(24, 30)

In [8]:
small_boxes = [box for box in boxes if box[0]*box[1] < 1024]
kmeans = KMeans(n_clusters=3).fit(small_boxes)
anchors_small = kmeans.cluster_centers_
anchors_small = np.round(anchors_small).astype(int)
anchors_small

array([[ 9, 10],
       [26, 28],
       [17, 19]])