In [1]:
import torch
import cv2
import os
import xml.etree.ElementTree as ET
import numpy as np
import yaml
from xml.dom import minidom

In [19]:
homography_matrix_file = "homography_matrix.txt"
narrow_cam_details_file = "narrow_cam_details.txt"
# xml_root_path = "D:\ACA\\fyp\\codes\\bbox_concatenate\\278\\Annotations"
xml_root_path = "D:\\ACA\\fyp\\codes\\bbox_concatenate\\xml_files\\test_1"
torch_output_xml_folder = xml_root_path+"\\concat_torch"
numpy_output_xml_folder = xml_root_path+"\\concat_2"
# numpy_output_xml_folder = xml_root_path+"\\concat_numpy"

In [20]:
def get_all_class_names(class_names_file):
    with open(class_names_file) as f:
        data = yaml.safe_load(f)
        class_names = data["names"]
        print(class_names)
    return class_names

class_names = get_all_class_names('class_names.yaml')

['Green', 'Green-up', 'Green-left', 'Green-right', 'Red', 'Yellow', 'Red-Yellow', 'Count-down', 'Empty', 'Empty-count-down']


In [21]:
wide_cam_file_names = os.listdir(os.path.join(xml_root_path,"wide"))
narrow_cam_file_names = os.listdir(os.path.join(xml_root_path,"narrow"))

wide_cam_file_names.sort()
narrow_cam_file_names.sort()

wide_cam_files = [os.path.join(xml_root_path,"wide",i) for i in wide_cam_file_names]
narrow_cam_files = [os.path.join(xml_root_path,"narrow",i) for i in narrow_cam_file_names]


In [22]:
def get_xml_file_details(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    folder = root.find("folder").text
    filename = root.find("filename").text

    return {"folder":folder,"filename":filename}

In [23]:
def get_bbox_data(xml_file):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    all_bbox_data = []
    for child in root.findall(".//object"):
        bbox_data = [0,0,0,0,0] ## [xmin,ymin,xmax,ymax,class]
        for grandChild in child:
            if(grandChild.tag == 'bndbox'):
                    for coord in grandChild:
                        if(coord.tag == 'xmin'):
                            xmin = float(coord.text)
                            bbox_data[0]=xmin
                            continue
                        elif(coord.tag == 'ymin'): 
                            ymin = float(coord.text)
                            bbox_data[1] = ymin
                            continue
                        elif(coord.tag == 'xmax'): 
                            xmax = float(coord.text)
                            bbox_data[2] = xmax
                            continue
                        elif(coord.tag == 'ymax'): 
                            ymax = float(coord.text)
                            bbox_data[3] = ymax
                            continue
            if (grandChild.tag == 'name'):
                name = grandChild.text
                bbox_data[4] = class_names.index(name)
                continue

        all_bbox_data.append(bbox_data)

    all_bbox_data = np.array(all_bbox_data)

    return all_bbox_data

### numpy concatenation

In [24]:
def check_position(boxes, narrow_region):
        [topx,topy,botx,boty] = narrow_region
        keep = (boxes[:, 0]>topx) & (boxes[:, 1]>topy) & (boxes[:, 2]<botx) & (boxes[:, 3]<boty)
        return keep

def upcast(t):
        T = torch.from_numpy(t)  ## do not know an easy method to find this in numpy
        if T.is_floating_point():
            return t if t.dtype in (np.float32, np.float64) else t.float()
        else:
            return t if t.dtype in (np.int32, np.int64) else t.int()

def box_area(boxes):
        boxes = upcast(boxes)
        return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

def box_inter_union(boxes1, boxes2):
        area1 = box_area(boxes1)
        area2 = box_area(boxes2)

        lt = np.maximum(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
        rb = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]

        wh = upcast(rb - lt).clip(min=0)  # [N,M,2]
        inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

        union = area1[:, None] + area2 - inter

        return inter, union

def box_iou(boxes1, boxes2):
        inter, union = box_inter_union(boxes1, boxes2)
        iou = (inter / union)>0.5
        return iou

def clip_boxes_to_box(boxes,narrow_region):
        """
        Clip boxes so that they lie inside an image of size `size`.

        Args:
            boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format
                with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
            size (Tuple[height, width]): size of the image

        Returns:
            Tensor[N, 4]: clipped boxes
        """
        [topx,topy,botx,boty] = narrow_region

        dim = boxes.ndim
        boxes_x = boxes[..., 0::2]
        boxes_y = boxes[..., 1::2]

        boxes_x = boxes_x.clip(min=topx, max=botx)
        boxes_y = boxes_y.clip(min=topy, max=boty)

        clipped_boxes = np.stack((boxes_x, boxes_y), axis=dim)
        return clipped_boxes.reshape(boxes.shape)

In [25]:
# det = reversed(wide_bbox_data)
# narrow_det = reversed(narrow_bbox_data)
def concatenate_bboxes(narrow_bbox_data,wide_bbox_data,narrow_region):

        if (len(narrow_bbox_data)>0 and (len(wide_bbox_data)>0)):
                keep = np.logical_not(check_position(wide_bbox_data[:,:4],narrow_region))
                wide_bbox_data = wide_bbox_data[keep]  ## remove all internal bboxes
                wide_bbox_data_old = np.copy(wide_bbox_data) 
                wide_bbox_data[:,:4] = clip_boxes_to_box(wide_bbox_data[:,:4],narrow_region)
                q = box_iou(wide_bbox_data[:,:4], narrow_bbox_data[:,:4])
                narrow_bbox_data = narrow_bbox_data[np.all(np.logical_not(q),0)]
                concat_bbox_data = np.concatenate((wide_bbox_data_old, narrow_bbox_data), 0)
                # keep = torch.logical_not(check_position(wide_bbox_data[:,:4],narrow_region))
                # wide_bbox_data = wide_bbox_data[keep]  ## remove all internal bboxes
                # wide_bbox_data_old = torch.clone(wide_bbox_data) 
                # wide_bbox_data[:,:4] = clip_boxes_to_box(wide_bbox_data[:,:4])
                # q = box_iou(wide_bbox_data[:,:4], narrow_bbox_data[:,:4])
                # narrow_bbox_data = narrow_bbox_data[torch.all(torch.logical_not(q),0)]
                # concat_bbox_data = torch.cat((wide_bbox_data_old, narrow_bbox_data), 0)
        elif (len(wide_bbox_data)>0):
                concat_bbox_data = wide_bbox_data
        elif (len(narrow_bbox_data)>0):
                concat_bbox_data = narrow_bbox_data
        else:
                concat_bbox_data = None
                print("no boxes in both narrow and wide file")
        return concat_bbox_data

### torch concatenation

In [26]:
# def torch_check_position(boxes, narrow_region):
        
#         [topx,topy,botx,boty] = narrow_region
#         keep = (boxes[:, 0]>topx) & (boxes[:, 1]>topy) & (boxes[:, 2]<botx) & (boxes[:, 3]<boty)

#         return keep

# def torch_upcast(t):
#         if t.is_floating_point():
#             return t if t.dtype in (torch.float32, torch.float64) else t.float()
#         else:
#             return t if t.dtype in (torch.int32, torch.int64) else t.int()

# def torch_box_area(boxes):
#         boxes = torch_upcast(boxes)
#         return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])

# def torch_box_inter_union(boxes1, boxes2):
#         area1 = torch_box_area(boxes1)
#         area2 = torch_box_area(boxes2)

#         lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
#         rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]

#         wh = torch_upcast(rb - lt).clamp(min=0)  # [N,M,2]
#         inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]

#         union = area1[:, None] + area2 - inter

#         return inter, union

# def torch_box_iou(boxes1, boxes2):

#         inter, union = torch_box_inter_union(boxes1, boxes2)
#         iou = (inter / union)>0.5
#         return iou

# def torch_clip_boxes_to_box(boxes):
#         """
#         Clip boxes so that they lie inside an image of size `size`.

#         Args:
#             boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format
#                 with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
#             size (Tuple[height, width]): size of the image

#         Returns:
#             Tensor[N, 4]: clipped boxes
#         """
#         dim = boxes.dim()
#         boxes_x = boxes[..., 0::2]
#         boxes_y = boxes[..., 1::2]

#         boxes_x = boxes_x.clamp(min=707, max=1160)
#         boxes_y = boxes_y.clamp(min=387, max=654)

#         clipped_boxes = torch.stack((boxes_x, boxes_y), dim=dim)
#         return clipped_boxes.reshape(boxes.shape)


In [27]:
# def torch_concatenate_bboxes(narrow_bbox_data,wide_bbox_data,narrow_region):

#         if (len(narrow_bbox_data)>0 and (len(wide_bbox_data)>0)):
#                 # rev_wide_det = reversed(det)
#                 keep = torch.logical_not(torch_check_position(wide_bbox_data[:,:4],narrow_region))
#                 wide_bbox_data = wide_bbox_data[keep]
#                 wide_bbox_data_old = torch.clone(wide_bbox_data)
#                 wide_bbox_data[:,:4] = torch_clip_boxes_to_box(wide_bbox_data[:,:4])
#                 # rev_narrow_det = reversed(self.narrow_det)
#                 q = torch_box_iou(wide_bbox_data[:,:4], narrow_bbox_data[:,:4])
#                 narrow_bbox_data = narrow_bbox_data[torch.all(torch.logical_not(q),0)]
#                 concat_bbox_data = torch.cat((wide_bbox_data_old, narrow_bbox_data), 0)

#         elif (len(wide_bbox_data)>0):
#                 concat_bbox_data = wide_bbox_data
#         elif (len(narrow_bbox_data)>0):
#                 concat_bbox_data = narrow_bbox_data
#         else:
#                 concat_bbox_data = None
#                 print("no boxes in both narrow and wide file")
#         return concat_bbox_data

### narrow bbox transformation

In [28]:
def transform_narrow_bbox(bbox_coord_vect,camera_mtx_narrow,dist_coff_narrow,homography_matrix):
    w,h = 1920,1080
    narrow_transformed_coord_vect = []
    for bbox_coords in bbox_coord_vect:
        bbox_coords = bbox_coords.reshape(-1,2)
        # print("box_cords",bbox_coords, bbox_coords.shape, bbox_coords.dtype)
        # print("bbox_cords",bbox_coords)

        ## undistort
        newcameramtx_narrow, _ = cv2.getOptimalNewCameraMatrix(camera_mtx_narrow, dist_coff_narrow, (w,h), 1, (w,h))
        narrow_undist_cords =  cv2.undistortPoints(bbox_coords,camera_mtx_narrow,dist_coff_narrow,P=newcameramtx_narrow)

        ## homography transform
        # narrow_undist_cords = narrow_undist_cords.astype(int)
        narrow_undist_cords = np.float32(narrow_undist_cords).reshape(-1,1,2)
        narrow_transformed_cords = cv2.perspectiveTransform(narrow_undist_cords,homography_matrix)
        narrow_transformed_cords = narrow_transformed_cords.reshape(-1)
        narrow_transformed_cords = list(narrow_transformed_cords)
        # print("box_cords_transformed",narrow_transformed_cords)
        narrow_transformed_coord_vect.append(narrow_transformed_cords)

    narrow_transformed_coord_vect = np.array(narrow_transformed_coord_vect)
    # print("box_cords_transformed_vect",narrow_transformed_coord_vect)
    return narrow_transformed_coord_vect



In [29]:
def find_narrow_region(camera_mtx_narrow,dist_coff_narrow,homography_matrix):
    w,h = 1920,1080
    region = np.array([[0,0],[1920,1080]],dtype=np.float64)
    newcameramtx_narrow, _ = cv2.getOptimalNewCameraMatrix(camera_mtx_narrow, dist_coff_narrow, (w,h), 1, (w,h))
    narrow_undist_region =  cv2.undistortPoints(region,camera_mtx_narrow,dist_coff_narrow,P=newcameramtx_narrow)
    narrow_undist_region = np.float32(narrow_undist_region).reshape(-1,1,2)
    narrow_transformed_region = cv2.perspectiveTransform(narrow_undist_region,homography_matrix)
    narrow_transformed_region = narrow_transformed_region.reshape(-1).astype(int)
    print("narrow_region on wide frame:",narrow_transformed_region)
    return narrow_transformed_region


In [30]:
def formatter(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = ET.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="    ")

def create_root(file_data, width, height):
    filename = file_data["filename"]
    folder = file_data["folder"]
    root = ET.Element("annotation")
    ET.SubElement(root, "filename").text = "{}".format(filename)
    ET.SubElement(root, "folder").text = "{}".format(folder)
    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = "3"
    return root


def create_object_annotation(root, voc_labels):
    for voc_label in voc_labels:  ## [xmin,ymin,xmax,ymax,class]
        obj = ET.SubElement(root, "object")
        ET.SubElement(obj, "name").text = class_names[int(voc_label[4])]
        bbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bbox, "xmin").text = str(voc_label[0])
        ET.SubElement(bbox, "ymin").text = str(voc_label[1])
        ET.SubElement(bbox, "xmax").text = str(voc_label[2])
        ET.SubElement(bbox, "ymax").text = str(voc_label[3])
    return root

def create_xml_file(DESTINATION_DIR, file_prefix, width, height, voc_labels,file_data):
    root = create_root(file_data, width, height)
    root = create_object_annotation(root, voc_labels)
    with open("{}/{}.xml".format(DESTINATION_DIR, file_prefix), "w") as f:

            f.write(formatter(root))
            f.close()

### numpy main

In [31]:
homography_matrix = np.loadtxt(homography_matrix_file, usecols=range(3))
dist_coff_narrow = np.loadtxt(narrow_cam_details_file, dtype=np.float32, skiprows=19,max_rows=1, delimiter=' ')
camera_mtx_narrow = np.loadtxt(narrow_cam_details_file, dtype=np.float32, skiprows=14, max_rows=3, delimiter=' ')

# narrow_region = [707, 387,1160, 654]
narrow_region = find_narrow_region(camera_mtx_narrow,dist_coff_narrow,homography_matrix)

count = 0
for (wide_cam_xml,narrow_cam_xml) in zip(wide_cam_files,narrow_cam_files):
    file_data = get_xml_file_details(wide_cam_xml)
    wide_bbox_data = get_bbox_data(wide_cam_xml)
    narrow_bbox_data = get_bbox_data(narrow_cam_xml)
    # print(count+1)
    # for data in narrow_bbox_data:
    #     print(data)
    if (len(narrow_bbox_data)>0):
        narrow_bbox_data[:,:4] = transform_narrow_bbox(narrow_bbox_data[:,:4],camera_mtx_narrow,dist_coff_narrow,homography_matrix)

    concat_bbox_data = concatenate_bboxes(narrow_bbox_data,wide_bbox_data,narrow_region)
    output_file_prefix = wide_cam_xml[-15:-4]
    print("name",output_file_prefix," wide_count:",len(wide_bbox_data), " narrow_count: ",len(narrow_bbox_data), "concat_count: ",len(concat_bbox_data))
    create_xml_file(DESTINATION_DIR=numpy_output_xml_folder, file_prefix=output_file_prefix, width=1920, height=1080, voc_labels=concat_bbox_data,file_data=file_data)
    count += 1
    # if (count>5):
    #     break

    # break

narrow_region on wide frame: [ 704  402 1162  658]
name wide_t1_001  wide_count: 0  narrow_count:  3 concat_count:  3
name wide_t1_002  wide_count: 0  narrow_count:  3 concat_count:  3
name wide_t1_003  wide_count: 0  narrow_count:  3 concat_count:  3
name wide_t1_004  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t1_005  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t1_006  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t1_007  wide_count: 4  narrow_count:  0 concat_count:  4
name wide_t1_008  wide_count: 4  narrow_count:  0 concat_count:  4
name wide_t1_009  wide_count: 0  narrow_count:  3 concat_count:  3
name wide_t1_010  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t1_011  wide_count: 0  narrow_count:  3 concat_count:  3
name wide_t1_012  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t1_013  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t1_014  wide_count: 0  narrow_count:  4 concat_count:  4
name wide_t

### torch main

In [59]:
# homography_matrix = np.loadtxt(homography_matrix_file, usecols=range(3))
# dist_coff_narrow = np.loadtxt(narrow_cam_details_file, dtype=np.float32, skiprows=19,max_rows=1, delimiter=' ')
# camera_mtx_narrow = np.loadtxt(narrow_cam_details_file, dtype=np.float32, skiprows=14, max_rows=3, delimiter=' ')

# for (wide_cam_xml,narrow_cam_xml) in zip(wide_cam_files,narrow_cam_files):
#     file_data = get_xml_file_details(wide_cam_xml)
#     wide_bbox_data = get_bbox_data(wide_cam_xml)
#     narrow_bbox_data = get_bbox_data(narrow_cam_xml)
#     if (len(narrow_bbox_data)>0):
#         narrow_bbox_data[:,:4] = transform_narrow_bbox(narrow_bbox_data[:,:4],camera_mtx_narrow,dist_coff_narrow,homography_matrix)
#     # print("wide",wide_bbox_data.shape)
#     # print("narrow",narrow_bbox_data.shape)
#     wide_bbox_data = torch.from_numpy(wide_bbox_data)
#     narrow_bbox_data = torch.from_numpy(narrow_bbox_data)
#     concat_bbox_data = torch_concatenate_bboxes(narrow_bbox_data,wide_bbox_data,narrow_region)
#     concat_bbox_data = concat_bbox_data.cpu().detach().numpy()
#     output_file_prefix = narrow_cam_xml[-10:-4]
#     print("name",output_file_prefix," wide_count:",len(wide_bbox_data), " narrow_count: ",len(narrow_bbox_data), "concat_count: ",len(concat_bbox_data))
#     create_xml_file(DESTINATION_DIR=torch_output_xml_folder, file_prefix=output_file_prefix, width=1920, height=1080, voc_labels=concat_bbox_data,file_data=file_data)

#     # break

name t1_001  wide_count: 0  narrow_count:  3 concat_count:  3
name t1_002  wide_count: 0  narrow_count:  3 concat_count:  3
name t1_003  wide_count: 0  narrow_count:  3 concat_count:  3
name t1_004  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_005  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_006  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_007  wide_count: 4  narrow_count:  0 concat_count:  4
name t1_008  wide_count: 4  narrow_count:  0 concat_count:  4
name t1_009  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_010  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_011  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_012  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_013  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_014  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_015  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_016  wide_count: 0  narrow_count:  4 concat_count:  4
name t1_