-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_anchor_boxes.py
90 lines (78 loc) · 4 KB
/
cluster_anchor_boxes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from dataset_prepare import get_notation_file_list, load_notation_file
from sklearn.cluster import MiniBatchKMeans
import numpy as np
import matplotlib.pyplot as plt
def clustering_anchor_boxes(n, train_data):
"""
Create a instance for MiniBatchKMeans (Since our testing dataset has over 2M, the MinibatchKMeans will be better than KMeas.
:param n: number of clusters
:param train_data: training data in numpy array
:return: MiniBatchKMeans instance
"""
cluster = MiniBatchKMeans(n_clusters=n, init='k-means++', max_iter=100,
batch_size=500, verbose=0, compute_labels=True,
random_state=None, tol=0.0, max_no_improvement=10,
init_size=None, n_init=3, reassignment_ratio=0.01)
cluster.fit(train_data)
return cluster
def load_all_anchor_box_size(folder_path):
"""
Loading all of the anchor box from notation file. Computer the width and height by using the Left, Top, Right, Bottom
coordinates. Then append all of the width and height into a numpy array as (n_samples, 2) for training.
:param folder_path: string: Path of the notation files
:return: numpy array with multiple samples with 2 features.
"""
anchor_box_w_h = None
file_list = get_notation_file_list(folder_path)
for file_name in file_list:
# print("Processing {} ...".format(file_name))
content_data_frame = load_notation_file(file_name)
content_data_frame['width'] = content_data_frame['right'] - content_data_frame['left']
content_data_frame['height'] = content_data_frame['bottom'] - content_data_frame['top']
width_array = content_data_frame.width.values
width_array = np.expand_dims(width_array, axis=1)
height_array = content_data_frame.height.values
height_array = np.expand_dims(height_array, axis=1)
width_height_array = np.hstack((width_array, height_array))
if anchor_box_w_h is None:
anchor_box_w_h = np.copy(width_height_array)
else:
anchor_box_w_h = np.vstack((anchor_box_w_h, width_height_array))
return anchor_box_w_h
def avg_iou_distance(box, kmeans_cluster):
"""
Compute the average distance for the clustering mode
Sum(d(box),centroid) = 1 - IOU(box, centroid)) / samples_#
:param box: numpy array of all boxes' width and height
:param kmeans_cluster: training KMeans model
:return: float: average distance of all cluster
"""
centroids = kmeans_cluster.cluster_centers_
cluster_number = len(centroids)
cluster_label = kmeans_cluster.labels_
total_distance = 0
for i in xrange(len(box)):
total_distance += 1 - abs((box[i, 0] * box[i, 1] - centroids[cluster_label[i], 0] * centroids[cluster_label[i], 1])) / (box[i, 0] * box[i, 1] + centroids[cluster_label[i], 0] * centroids[cluster_label[i], 1])
return total_distance / len(box)
def draw_avg_iou_plot(avg_iou_list):
plt.figure(1)
plt.xlabel('# of Clusters')
plt.ylabel('Avg IOU')
plt.title('Avg IOU based on different # of Clusters')
plt.grid(True)
plt.plot([x for x in xrange(1, len(avg_iou_list) + 1)], avg_iou_list, 'bo',
[x for x in xrange(1, len(avg_iou_list) + 1)], avg_iou_list, 'k')
plt.show()
if __name__ == "__main__":
bbox_WH = load_all_anchor_box_size("./datasets/vgg_face_dataset/files/")
print("There are {} anchor boxes found!".format(len(bbox_WH)))
avg_iou_dist_list = []
for n_of_cluster in xrange (1, 16):
cluster = clustering_anchor_boxes(n_of_cluster, bbox_WH)
print("Centroid of {} clusters: ".format(n_of_cluster))
print(cluster.cluster_centers_)
avg_iou_dist = avg_iou_distance(bbox_WH, cluster)
avg_iou_dist_list.append(avg_iou_dist)
print("Average IOU Distance for K-Means #{} clusters is {}".format(n_of_cluster, avg_iou_dist))
draw_avg_iou_plot(avg_iou_dist_list)
# Per displayed in the picture of "AvgIOU_analyze.png", I plan to choose '7' as the number of cluster for KMeans