In [3]:
import os
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.cluster import KMeans

def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    width = int(root.find('size/width').text)
    height = int(root.find('size/height').text)
    boxes = []
    for obj in root.findall('object'):
        xmin = int(obj.find('bndbox/xmin').text)
        ymin = int(obj.find('bndbox/ymin').text)
        xmax = int(obj.find('bndbox/xmax').text)
        ymax = int(obj.find('bndbox/ymax').text)
        box_width = xmax - xmin
        box_height = ymax - ymin
        boxes.append([box_width, box_height])
    return boxes

def load_dataset(xml_dir):
    boxes = []
    for xml_file in os.listdir(xml_dir):
        if xml_file.endswith('.xml'):
            boxes.extend(parse_xml(os.path.join(xml_dir, xml_file)))
    return np.array(boxes)

def calculate_anchors(boxes, n_clusters=9):
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(boxes)
    return kmeans.cluster_centers_

if __name__ == "__main__":
    xml_dir = "/home/gxyu/yolov5-7.0/datasets/ankors/anchors_data"  # 修改为你的 XML 文件路径
    boxes = load_dataset(xml_dir)
    anchors = calculate_anchors(boxes)
    print("New anchors:", anchors)


  super()._check_params_vs_input(X, default_n_init=10)


New anchors: [[ 517.55357143  186.69642857]
 [  73.06532663  186.66331658]
 [ 270.02083333   94.21527778]
 [ 243.46153846  651.87179487]
 [1063.61111111  451.16666667]
 [ 164.95098039  371.76470588]
 [ 492.09090909 1285.09090909]
 [  84.41830065   40.02614379]
 [1922.33333333  555.66666667]]
