In [1]:
import glob
import os
import re
import numpy as np
#import warnings
#warnings.filterwarnings("error")
import h5py
from scipy import ndimage

  from ._conv import register_converters as _register_converters


In [2]:
def get_dict_class(path):
    dict_classes = dict()
    pattern = "(.+):(\\d+)"
    with open(path,"r") as f:
        while True:
            line_str =f.readline()
            if line_str == "":
                break
            else:
                 key,value = re.findall(pattern,line_str)[0]
                 dict_classes[key] = int(value)
    return dict_classes


def get_coordinates(original_string):
    xmin = re.findall('<xmin>(\\d+)</xmin>', original_string)
    ymin = re.findall('<ymin>(\\d+)</ymin>', original_string)
    xmax = re.findall('<xmax>(\\d+)</xmax>', original_string)
    ymax = re.findall('<ymax>(\\d+)</ymax>', original_string)
    try:
        assert(len(xmin) == len(ymin) and len(ymin) == len(xmax) and len(xmax) == len(ymax))
    except:
        print(original_string)
        return 0
    for i in range(len(xmin)):
        xmin[i] = int(xmin[i])
        ymin[i] = int(ymin[i])
        xmax[i] = int(xmax[i])
        ymax[i] = int(ymax[i])
    xmin = np.array(xmin,dtype=np.int32)
    ymin = np.array(ymin,dtype=np.int32)
    xmax = np.array(xmax,dtype=np.int32)
    ymax = np.array(ymax,dtype=np.int32)

    return xmin, ymin, xmax, ymax

def read_content(xmlfile):
    with open(xmlfile,"r",encoding="utf-8") as f:
        content = f.read()
    return content
def start(xmllist):
    counter = 0
    for xmlfile in xmllist:
        counter+=1
        content = read_content(xmlfile)
        xmin, ymin, xmax, ymax = get_coordinates(content)
        if counter != 1:
            np_xmin = np.concatenate((np_xmin,xmin))
            np_xmax = np.concatenate((np_xmax,xmax))
            np_ymin = np.concatenate((np_ymin,ymin))
            np_ymax = np.concatenate((np_ymax,ymax))
        else:
            np_xmin = xmin
            np_xmax = xmax
            np_ymin = ymin
            np_ymax = ymax
    return  np_xmin, np_xmax, np_ymin, np_ymax

In [3]:
class cluster_anchor():
    def __init__(self, complete_size, k):
        """
        k is amount of cluster
        """
        self.k = k
        self.sample_location = [None for i in range(complete_size.shape[0])]  # 用以记录每个样本在哪个类
        self.complete_size = complete_size

        self.list_cluster = [None for i in range(k)]  # 用以记录每个类的样本的序号
        self.clustering()
        self.calculate_all_center()


    def clustering(self):
        amount_sample = self.complete_size.shape[0]
        for i in range(self.k):
            self.list_cluster[i] = np.array([i])
            self.sample_location[i] = i
        self.counter = amount_sample  # counter 是每次迭代中，移动的样本的个数，若其小与样本总数的5%，则可结束循环
        while self.counter > 0 * amount_sample:
            self.counter = 0
            for i in range(amount_sample):
                self.partition(i)  # 将该样本划分到某一个类里
        #print(self.list_cluster)

    def calculate_all_center(self):
        self.center_list = [0 for i in range(self.k)]
        for i in range(self.k):
            self.center_list[i] = self.cal_mean(self.list_cluster[i]) # 计算每个类的中心，即平均的(width,height)

    def partition(self, i):

        iou = [None for z in range(self.k)]
        for j in range(self.k):
            mean_ = self.cal_mean(self.list_cluster[j])  # 得到每个类的中心
            iou[j] = self.calculate_iou(self.complete_size[i], mean_)
        index = np.argmax(np.array(iou))  # iou 最大值的位置,即对应最大的类
        if index != self.sample_location[i] :  # 如果不在原本的类中
            if self.sample_location[i] is not None:
                if  iou[self.sample_location[i]] != iou[index]:  # 防止进行不必要的转移，因为index指向的类的中心与样本的距离，与原本类中心的距离相同
                    self.list_cluster[self.sample_location[i]] = np.delete(self.list_cluster[self.sample_location[i]],
                                                                           np.where(self.list_cluster[
                                                                                        self.sample_location[i]] == i),
                                                                           axis=0)
                    self.sample_location[i] = index  # 记录该样本在类中的位置
                    self.list_cluster[index] = np.concatenate((self.list_cluster[index], np.array([i])), axis=0)
                    self.counter += 1
            else:
                self.sample_location[i] = index  # 记录该样本在类中的位置
                self.list_cluster[index] = np.concatenate((self.list_cluster[index], np.array([i])), axis=0)
                self.counter += 1

    def calculate_iou(self, true_box, mean_):
        
        true_box_max = true_box/2.0
        true_box_min = - true_box_max
        anchor_box_max = mean_/2.0
        anchor_box_min = -anchor_box_max
        insect_min = np.maximum(true_box_min,anchor_box_min)
        insect_max = np.minimum(true_box_max,anchor_box_max)
        insect_wh = np.maximum(insect_max - insect_min,0.)
        insect_area = insect_wh[0]*insect_wh[1]
        
        true_box_area = true_box[0] * true_box[1]
        anchor_box_area = mean_[0] * mean_[1]
        
        assert(true_box_area>=0)
        assert(anchor_box_area>=0)
        assert(insect_area>=0)
        
        return insect_area/(true_box_area + anchor_box_area - insect_area)

    def cal_mean(self, cluster):
        mean_ = np.array([0, 0])
        for index in cluster:
            mean_ = mean_ + self.complete_size[index]
        mean_ = mean_ / cluster.shape[0]
        return mean_
    def calculate_mean_iou(self):
        mean_iou = 0.0
        for i in range(self.k):
            for j in range(self.list_cluster[i].shape[0]):
                mean_iou+=self.calculate_iou(self.complete_size[self.list_cluster[i][j]],self.center_list[i])
        mean_iou/=self.complete_size.shape[0]
        return mean_iou

In [4]:
class Writing_labels():
    """
    这个类负责实现h5文件的生成
    """

    def __init__(self, width=256, height=256):
        """

        :param width: 一张图片的宽度
        :param height: 一张图片的高度
        """
        self.width = width
        self.height = height

    def write(self, cluster_result, center_list, k, grid, xmllist, dict_classes,num_true_box, classes=3,
              parent_path=r"D:\studyINF\AI\YOLOv3\yolo_img3"):
        """
        :param cluster_result:  记载每个类的样本的序号
        :param center_list:     记载每个类的中心点，即平均的宽高
        :param classes:          我们要识别的类的数量
        :param k:               每一个 gird cell 的 anchor box 的数量
        :param grid:            一行(一列)， 有多少个 grid cell
        :param xmllist:         xml 文件的文件名列表
        :param dict_classes:    每个目标类及其对应的序号，组成的字典
        num_true_box:           每张图片最多有多少个true_box
        :param parent_path:
        :return:                nothing
        """
        img_counter = 0
        sample_counter = 0
        f = h5py.File(parent_path + '\\' + 'train_data2.h5', 'w')
        f.create_dataset('images', shape=[len(xmllist), self.height, self.width, 3], dtype=np.uint8)
        f.create_dataset('anchor_labels', shape=[len(xmllist), grid, grid, k, 5 + classes], dtype=np.float32)
        f.create_dataset('true_box_labels', shape=[len(xmllist), num_true_box, 5], dtype=np.float32)
        f.create_dataset("prior_boxes",shape = [len(xmllist),grid,grid,k,4],dtype = np.float32)
        
        images = f["images"]
        anchor_labels = f["anchor_labels"]
        true_box_labels = f["true_box_labels"]
        prior_boxes = f["prior_boxes"]
        anchor_labels[:] = np.zeros(shape=[len(xmllist), grid, grid, k, 5 + classes], dtype=np.float32)
        # 这里的 5 指[confidence, bx,by,bw,bh]
        prior_boxes[:] = np.zeros(shape = [len(xmllist),grid,grid,k,4],dtype = np.float32)
        # 这里的4 指[prior_x,prior_y,prior_w,prior_h]
        for xmlfile in xmllist:
            content = read_content(xmlfile)
            img_path = re.findall('<path>(.*)</path>', content)[0]
            #part_one, part_two = img_path.split("\\")[-2::1]
            #img_path = os.path.join(parent_path,part_one,part_one+part_two)
            image = np.array(ndimage.imread(img_path, flatten=False))
            images[img_counter] = image
            # label 部分

            # 计算中点位于哪个格子,计算相对于整一张图片的宽度,高度的中点的位置(宽度为单位长度)
            grid_list, cen_coordinate, width_height = self.cal_center_wid_hei(content)
            sample_counter += grid_list.shape[0]

            which_anchor = self.confirm_anchor(grid_list.shape[0], sample_counter,
                                               cluster_result)  # 确定其属于哪一个anchor box，


            which_class = self.confirm_classes(dict_classes, content)  # 确定是哪一个目标类

            temp_all = np.zeros(shape=[grid, grid, k, 5 + classes], dtype=np.float32)
            temp2_all = np.zeros(shape=[grid, grid, k, 4],dtype= np.float32)
            for i in range(grid_list.shape[0]):
                temp = np.zeros(shape=[5 + classes], dtype=np.float32)
                temp[0] = 1.0
                temp[1:3] = cen_coordinate[i]
                temp[3:5] = width_height[i]
                temp[5 + which_class[i]] = 1.0
                #print("grid_list[i,0] is :", grid_list[i, 0])
                #print("grid_list[i,1] is :", grid_list[i, 1])
                #print("which_anchor[i] is :", which_anchor[i])
                #print("temp is: ", temp)
                temp_all[grid_list[i, 0], grid_list[i, 1], which_anchor[i]] = temp
                
                # 华丽的分割线
                temp2 = np.zeros(shape=[4],dtype = np.float32)
                temp2[0:2] = cen_coordinate[i]
                temp2[2:4] = center_list[which_anchor[i]]/np.array([self.width,self.height])
                temp2_all[grid_list[i, 0], grid_list[i, 1], which_anchor[i]] = temp2
                # print("label is:",labels[img_counter,grid_list[i,0],grid_list[i,1],which_anchor[i]])
            anchor_labels[img_counter] = temp_all
            prior_boxes[img_counter] = temp2_all
            temp_true_box = self.write_true_box(content,dict_calsses,num_true_box)
            true_box_labels[img_counter] = temp_true_box
            img_counter += 1


        f.close()
    def write_true_box(self,original_string,dict_classes,num_true_box):
        xmin = re.findall('<xmin>(\\d+)</xmin>', original_string)
        ymin = re.findall('<ymin>(\\d+)</ymin>', original_string)
        xmax = re.findall('<xmax>(\\d+)</xmax>', original_string)
        ymax = re.findall('<ymax>(\\d+)</ymax>', original_string)
        temp = np.zeros(shape=[num_true_box,5],dtype = np.int32)
        which_class = self.confirm_classes(dict_classes,original_string)
        for i in range(len(xmin)):
            xmin[i] = int(xmin[i])
            ymin[i] = int(ymin[i])
            xmax[i] = int(xmax[i])
            ymax[i] = int(ymax[i])
            temp[i,4] = which_class[i]
            temp[i,0:4] = np.array([(xmin[i] + xmax[i])//2/self.width , (ymin[i] + ymax[i])//2/self.height , (xmax[i] - xmin[i])/self.width ,(ymax[i] - ymin[i])/self.height])
        return temp


    def cal_center_wid_hei(self, content, width_per_grid=32, height_per_grid=32):
        """

        :param content:  xml文件的内容
        :param width_per_grid:  每个grid cell 的宽度
        :param height_per_grid: 每个grid cell 的高度
        :param width:  一张图片的宽度
        :param height: 一张图片的高度
        :return: grid_list, a np.array with shape(amount_sample,2)  indexes of grid part in  the "training_y"
        :return: cen_coordinate, a np.array with shape(amount_sample,2),indexes of coordinate part in the "training_y"
        """

        xmin = re.findall('<xmin>(\\d+)</xmin>', content)
        ymin = re.findall('<ymin>(\\d+)</ymin>', content)
        xmax = re.findall('<xmax>(\\d+)</xmax>', content)
        ymax = re.findall('<ymax>(\\d+)</ymax>', content)

        box_this_img = len(xmin)  # 这张图片的box的数量
        for i in range(box_this_img):
            xmin[i] = int(xmin[i])
            ymin[i] = int(ymin[i])
            xmax[i] = int(xmax[i])
            ymax[i] = int(ymax[i])
        xmin = np.array(xmin).reshape(-1, 1)
        ymin = np.array(ymin).reshape(-1, 1)
        xmax = np.array(xmax).reshape(-1, 1)
        ymax = np.array(ymax).reshape(-1, 1)
        """

        grid_list = [[0,0] for i in range(box_this_img)]
        cen_coordinate = [[0,0] for i in range(box_this_img)]
        width_height_list = [[0,0] for i in range(box_this_img)]

        for i in range(box_this_img):
            grid_list[i] = [(xmax[i] + xmin[i])/2 //width_per_grid ,(ymax[i] - ymin[i])/2//height_per_grid]
            cen_coordinate[i] = []
        """
        grid_list = np.concatenate(((ymax + ymin) / 2 // height_per_grid, (xmax + xmin) / 2 // width_per_grid),
                                   axis=1).astype(np.int32)
        cen_coordinate = np.concatenate(((xmax + xmin) / 2 / self.width, (ymax + ymin) / 2 / self.height), axis=1)
        width_height = np.concatenate(((xmax - xmin)  / self.width, (ymax - ymin)  / self.height),axis = 1) # 以及计算相对的宽度和高度
        return grid_list, cen_coordinate, width_height

    def confirm_anchor(self, sample_this_img, sample_counter, cluster_result):
        """

        :param sample_this_img:   该图片共有多少个 box
        :param sample_counter:    到该图片为止，应有多少个box被记录进h5文件
        :param cluster_result:    a list of three(k) arrays, every array records the sequence number of samples falling into the
                                  relative cluster
        :return:                  a list of anchor box ID indicating the anchor box which is responsible for the samples
        """
        temp = []
        for sample_num in range(sample_counter - sample_this_img, sample_counter):
            for i in range(len(cluster_result)):
                if sample_num in cluster_result[i]:
                    temp.append(i)
        return temp

    def confirm_classes(self, dict_classes, content):
        keys = re.findall("<name>(.+)</name>", content)
        temp = []
        for key in keys:
            temp.append(dict_classes[key])
        return temp
    def supplement_h5file(self,h5file_path,image_dir):
        """

        :param h5file_path: The path of h5 file that we want to get data from
        :return: nothing
        这个函数是为了实现补充背景图片的数据的功能
        """
        img_path_list = glob.glob(image_dir+"\\"+"*.jpg")
        
        f = h5py.File(h5file_path, 'r')
        train_images = np.array(f['images'])
        anchor_labels = np.array(f['anchor_labels'])
        true_box_labels = np.array(f["true_box_labels"])
        prior_boxes = np.array(f["prior_boxes"])
        f.close()
        background_images = np.zeros(shape=(len(img_path_list),) + train_images.shape[1:],dtype = train_images.dtype)
        bg_anchor_labels = np.zeros(shape =(len(img_path_list),) + anchor_labels.shape[1:],dtype = anchor_labels.dtype)
        bg_true_box_labels = np.zeros(shape = (len(img_path_list),) + true_box_labels.shape[1:] ,dtype = true_box_labels.dtype)
        # bg 是背景的缩写
        bg_prior_boxes = np.zeros(shape=(len(img_path_list),) + prior_boxes.shape[1:],dtype = prior_boxes.dtype)
        for i,img_path in enumerate(img_path_list):
            background_images[i] = ndimage.imread(img_path,flatten=False)
        whole_images = np.concatenate((train_images,background_images),axis=0)
        whole_anchor_labels = np.concatenate((anchor_labels,bg_anchor_labels),axis=0)
        whole_true_box_labels = np.concatenate((true_box_labels,bg_true_box_labels),axis=0)
        whole_prior_boxes = np.concatenate((prior_boxes,bg_prior_boxes),axis =0)
        with h5py.File(h5file_path, 'w') as f:
            f.create_dataset('images', shape=whole_images.shape, dtype=whole_images.dtype)
            f.create_dataset('anchor_labels', shape=whole_anchor_labels.shape, dtype=whole_anchor_labels.dtype)
            f.create_dataset('true_box_labels', shape=whole_true_box_labels.shape, dtype=whole_true_box_labels.dtype)
            f.create_dataset("prior_boxes",shape =  whole_prior_boxes.shape,dtype = whole_prior_boxes.dtype)
            images = f["images"]
            anchor_labels = f["anchor_labels"]
            true_box_labels = f["true_box_labels"]
            prior_boxes = f["prior_boxes"]
            images[:] = whole_images
            anchor_labels[:] = whole_anchor_labels
            true_box_labels[:] = whole_true_box_labels
            prior_boxes[:] = whole_prior_boxes

In [5]:
dirname = r"D:\studyINF\AI\YOLOv3\yolo_img3"
xmllist = glob.glob(dirname+ '\\*'+'\\'+'*.xml')
print("len is:",len(xmllist))
max_value = 0
for xmlfile in xmllist:
    with open(xmlfile,"r",encoding="utf-8") as f:
        content = f.read()
        counter = content.count("bndbox")
        if counter>max_value:
            max_value=counter
max_value/=2
print(max_value)

len is: 698
10.0


In [8]:
one.center_list

[array([19.10628019, 17.5826087 ]),
 array([46.0840708 , 15.86283186]),
 array([27.35308953, 25.91424968]),
 array([17.62978723, 46.55744681]),
 array([55.46875, 28.96875]),
 array([50.859375, 85.40625 ]),
 array([92.13333333, 71.43333333]),
 array([33.04054054, 50.52702703])]

In [11]:
one.center_list

[array([19.07632094, 17.52739726]),
 array([45.97297297, 15.77927928]),
 array([27.28571429, 25.73086735]),
 array([16.81603774, 45.10849057]),
 array([53.87195122, 28.81097561]),
 array([50.42105263, 83.23684211]),
 array([92.86666667, 69.46666667]),
 array([31.02752294, 50.16055046])]

In [6]:
if __name__ == "__main__":
    #np.seterr("raise")
    dirname = r"D:\studyINF\AI\YOLOv3\yolo_img3"
    xmllist = glob.glob(dirname+ '\\*'+'\\'+'*.xml')
    #print(len(xmllist))
    
    xmin, xmax, ymin, ymax = start(xmllist)
    #print(xmin.dtype)
    #print(xmax.dtype)
    np_width = xmax - xmin
    np_height = ymax - ymin
    print(np_width.shape)
    
    complete_size = np.concatenate((np_width.reshape(np_width.shape[0], 1), np_height.reshape(np_height.shape[0], 1)),
                                  axis=1)
    amount_cluster = 8
    one  = cluster_anchor(complete_size, amount_cluster)
    print(one.calculate_mean_iou())
    instance = Writing_labels(width = 416, height=416)
    dict_calsses = get_dict_class(path=r"D:\studyINF\AI\2.7code\opencv\yolonet\yolo_img2\labels.txt")
    instance.write(one.list_cluster,one.center_list,k=amount_cluster,grid=13,xmllist=xmllist,dict_classes=dict_calsses,classes=3,num_true_box = 10,parent_path =r"D:\studyINF\AI\YOLOv3\yolo_img3")
    

(2765,)
0.7460978022454268


In [13]:
instance = Writing_labels(width = 416, height=416)

In [7]:
instance.supplement_h5file(r"D:\studyINF\AI\YOLOv3\yolo_img3\train_data2.h5",r"D:\studyINF\AI\YOLOv3\yolo_img3\background")

In [6]:
def readh5(h5_path):
    f = h5py.File(h5_path, 'r')
    #train_images = np.array(f['images'])
    train_labels = np.array(f['anchor_labels'])
    prior_boxes = np.array(f["prior_boxes"])
    f.close()
    return  train_labels, prior_boxes

In [7]:
def calculate_iou( true_box, mean_):
        true_box_max = true_box/2.0
        true_box_min = - true_box_max
        anchor_box_max = mean_/2.0
        anchor_box_min = -anchor_box_max
        insect_min = np.maximum(true_box_min,anchor_box_min)
        insect_max = np.minimum(true_box_max,anchor_box_max)
        insect_wh = np.maximum(insect_max - insect_min,0.)
        insect_area = insect_wh[0]*insect_wh[1]
        
        true_box_area = true_box[0] * true_box[1]
        anchor_box_area = mean_[0] * mean_[1]
        return insect_area/(true_box_area + anchor_box_area - insect_area)

In [8]:
all_mean = [np.array([31.68817204, 51.02150538]),
 np.array([25.90733591, 31.56756757]),
 np.array([21.43287037, 22.81712963]),
 np.array([47.12258065, 14.56129032]),
 np.array([51.58490566, 24.90566038]),
 np.array([18.62641509, 15.61132075])]

In [12]:
temp =[]
sample = np.array([63,20])
for i in range(len(all_mean)):
    temp.append(calculate_iou(sample,all_mean[i]))
all_mean[np.argmax(np.array(temp))]

array([51.58490566, 24.90566038])

In [16]:
anchors,prior_boxes = readh5(r"D:\studyINF\AI\YOLOv3\yolo_img3\train_data2.h5")

In [19]:
f = h5py.File(r"D:\studyINF\AI\YOLOv3\yolo_img3\train_data2.h5", 'r')

In [20]:
for key in f.keys():
    print(key)

anchor_labels
images
prior_boxes
true_box_labels


In [None]:
[19.07632094, 17.52739726],
[45.97297297, 15.77927928],
[27.28571429, 25.73086735],
[16.81603774, 45.10849057],
[53.87195122, 28.81097561],
[50.42105263, 83.23684211],
[92.86666667, 69.46666667],
[31.02752294, 50.16055046]

In [11]:
temp = np.ones(shape = [3,3],dtype =np.uint8)

In [13]:
a = temp.tostring()

In [14]:
a

b'\x01\x01\x01\x01\x01\x01\x01\x01\x01'