In [64]:
'''
Created on Feb 20, 2017

@author: jumabek
'''
from os import listdir
from os.path import isfile, join
import argparse
#import cv2
import numpy as np
import sys
import os
import shutil
import random 
import math

width_in_cfg_file = 416.
height_in_cfg_file = 416.

def IOU(x,centroids):
    similarities = []
    k = len(centroids)
    for centroid in centroids:
        c_w,c_h = centroid
        w,h = x
        if c_w>=w and c_h>=h:
            similarity = w*h/(c_w*c_h)
        elif c_w>=w and c_h<=h:
            similarity = w*c_h/(w*h + (c_w-w)*c_h)
        elif c_w<=w and c_h>=h:
            similarity = c_w*h/(w*h + c_w*(c_h-h))
        else: #means both w,h are bigger than c_w and c_h respectively
            similarity = (c_w*c_h)/(w*h)
        similarities.append(similarity) # will become (k,) shape
    return np.array(similarities) 

def avg_IOU(X,centroids):
    n,d = X.shape
    sum = 0.
    for i in range(X.shape[0]):
        #note IOU() will return array which contains IoU for each centroid and X[i] // slightly ineffective, but I am too lazy
        sum+= max(IOU(X[i],centroids))
    return sum/n

def write_anchors_to_file(centroids,X,anchor_file):
    f = open(anchor_file,'w')
    
    anchors = centroids.copy()
    print(anchors.shape)

    print('acc:{:.2f}%'.format(avg_IOU(X, anchors) * 100))
    for i in range(anchors.shape[0]):
        anchors[i][0] = round( anchors[i][0] * width_in_cfg_file)  // 760.       # 要除自己的影像大小!!!!!!!!!!!!!
        anchors[i][1] = round( anchors[i][1] * height_in_cfg_file) // 760.

    widths = anchors[:, 0]
    sorted_indices = np.argsort(widths)
    
    for i in sorted_indices[:-1]:
        f.write('%d, %d, '%(anchors[i,0],anchors[i,1]))
    #there should not be comma after last anchor, that's why
    f.write('%d, %d\n'%(anchors[sorted_indices[-1:],0],anchors[sorted_indices[-1:],1]))

    out = anchors[sorted_indices]
    print('Anchors = ', out)
    # f.write('%f\n'%(avg_IOU(X,centroids)))
   

def kmeans(X,centroids,eps,anchor_file):
    
    N = X.shape[0]  #锚框个数
    iterations = 0
    k,dim = centroids.shape
    prev_assignments = np.ones(N)*(-1)    
    iter = 0
    old_D = np.zeros((N,k))

    while True:
        D = [] 
        iter+=1           
        for i in range(N):
            d = 1 - IOU(X[i],centroids)
            D.append(d)
        D = np.array(D) # D.shape = (N,k)
        
        print("iter {}: dists = {}".format(iter,np.sum(np.abs(old_D-D))))
            
        #assign samples to centroids 
        assignments = np.argmin(D,axis=1)   # 取出最小点
        
        if (assignments == prev_assignments).all() :
            print("Centroids = ",centroids)
            write_anchors_to_file(centroids,X,anchor_file)
            return

        #calculate new centroids
        centroid_sums=np.zeros((k,dim),np.float)
        for i in range(N):
            centroid_sums[assignments[i]]+=X[i]        
        for j in range(k):            
            centroids[j] = centroid_sums[j]/(np.sum(assignments==j))
        
        prev_assignments = assignments.copy()     
        old_D = D.copy()  

def main():
 
    filelist     = "dataset/txt/20221102_anno_train_lines_0.txt"
    output_dir   = "generated_anchors"
    num_clusters = 9
    
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    
    annotation_dims = []

    size = np.zeros((1,1,3))

    f = open(filelist)
    lines = [line.rstrip('\n') for line in f.readlines()]

    for line in lines:
        x1,y1,x2,y2 = line.split(' ')[1].split(',')[:4]
        # print(line)
        # print(x1,y1,x2,y2)
        w = abs(int(x2)-int(x1))
        h = abs(int(y2)-int(y1))
        # print(w,h)
        annotation_dims.append(tuple(map(float,(w,h))))
    annotation_dims = np.array(annotation_dims)
    eps = 0.005
    
    if num_clusters == 0:
        for num_clusters in range(1,11): #we make 1 through 10 clusters 
            anchor_file = join(output_dir,'anchors%d.txt'%(num_clusters))

            indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
            centroids = annotation_dims[indices]
            kmeans(annotation_dims,centroids,eps,anchor_file)
            print('centroids.shape', centroids.shape)
    else:
        anchor_file = join(output_dir,'anchors%d.txt'%(num_clusters))
        indices = [ random.randrange(annotation_dims.shape[0]) for i in range(num_clusters)]
        centroids = annotation_dims[indices]
        kmeans(annotation_dims,centroids,eps,anchor_file)

        print('centroids.shape', centroids.shape)

if __name__=="__main__":
    main()



iter 1: dists = 1470.1660626021312
iter 2: dists = 153.1332610013493
iter 3: dists = 68.11940976210175
iter 4: dists = 43.88824397317332
iter 5: dists = 35.87669820196735
iter 6: dists = 44.88348500128592
iter 7: dists = 47.0422702531333
iter 8: dists = 22.82123134202647
iter 9: dists = 12.42467127354386
iter 10: dists = 4.851453864004565
iter 11: dists = 7.119923925031968
iter 12: dists = 4.763233847155
iter 13: dists = 5.365689602545187
iter 14: dists = 3.1298923280635393
iter 15: dists = 3.330517292323863
iter 16: dists = 3.887725488308573
iter 17: dists = 3.571411610376484
iter 18: dists = 4.0282725225669695
iter 19: dists = 2.972444130913159
Centroids =  [[665.14814815 476.22222222]
 [521.86206897 582.81034483]
 [363.12903226 426.56451613]
 [416.69491525 510.84745763]
 [287.63235294 334.88235294]
 [414.23809524 683.19047619]
 [275.21875    528.03125   ]
 [237.82352941 244.70588235]
 [429.35       282.65      ]]
(9, 2)
acc:86.29%
Anchors =  [[130. 133.]
 [150. 289.]
 [157. 183.]
 [

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  centroid_sums=np.zeros((k,dim),np.float)
