In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import xml.etree.ElementTree as ET
import copy
import json

In [2]:
### parse xml files of labels
def xml_parse(directName, room_dict):
    script_dict = {}   ### record of each figure and each room of it
    room_dict = room_dict
    room_dict_fixed = True
    if(len(room_dict)==0):
        room_dict_fixed = False
    for file in os.listdir(directName):
        if file.endswith(".xml"):
            fileNamePre = file[:-4]
            tree = ET.parse(os.path.join(directName, file))
            root = tree.getroot()
            script_dict[fileNamePre] = {}
            script_dict[fileNamePre]['img_name'] = fileNamePre+'.png'
            script_dict[fileNamePre]['points'] = []
            for item in root.findall('object'):
                rooms = [None]*5
                for child in item:
                    if(child.tag == 'name'):
                        room_type = child.text
                        room_type_digit = -1
                        if(room_type in room_dict):
                            room_type_digit = room_dict[room_type]
                        else:
                            room_type_digit = len(room_dict)
                            if(not room_dict_fixed):
                                room_dict[room_type] = room_type_digit                       
                        rooms[4] = room_type_digit
                    elif(child.tag == 'bndbox'):
                        for grandchild in child:
                            if grandchild.tag=='xmin':
                                rooms[0] = int(grandchild.text)
                            elif grandchild.tag=='xmax':
                                rooms[2] = int(grandchild.text)
                            elif grandchild.tag=='ymin':
                                rooms[1] = int(grandchild.text)
                            elif grandchild.tag=='ymax':
                                rooms[3] = int(grandchild.text)
                script_dict[fileNamePre]['points'].append(rooms)
    return script_dict, room_dict

In [4]:
### cropping each figure to be 10% extra of the smallest/largest label coords, 
### then scale it and save new figures and write corresponding labels to json file

def cropNjson(rescale, directName, newDirectName, script_dict):
    script_dict_rescale = {}
    for fileName in script_dict.keys():
        filePath = os.path.join(directName, script_dict[fileName]['img_name'])
        
        def rescale_dict(_fileName, script_dict_rescale):
            script_dict_rescale[_fileName] = {}
            script_dict_rescale[_fileName]['img_name'] = _fileName+f'_{rescale}.png'
            script_dict_rescale[_fileName]['points'] = []
        
        rescale_dict(fileName, script_dict_rescale)
        rescale_dict(fileName+'_90', script_dict_rescale)
        rescale_dict(fileName+'_180', script_dict_rescale)
        rescale_dict(fileName+'_270', script_dict_rescale)
        
        img = cv2.imread(filePath, cv2.IMREAD_GRAYSCALE)
        labels = script_dict[fileName]['points']
        x_min = img.shape[0]
        x_max = 0
        y_min = img.shape[1]
        y_max = 0
        for room in labels:
            rx_max = max(room[0],room[2])
            rx_min = min(room[0],room[2])
            ry_max = max(room[1],room[3])
            ry_min = min(room[1],room[3])
            if(rx_max>x_max):
                x_max = rx_max
            if(ry_max>y_max):
                y_max = ry_max
            if(rx_min<x_min):
                x_min = rx_min
            if(ry_min<y_min):
                y_min = ry_min
        x_offset = int((x_max-x_min)*0.1)
        y_offset = int((y_max-y_min)*0.1)
        left_bound_new = max(x_min-x_offset,0)
        right_bound_new = min(x_max+x_offset,img.shape[1])

        down_bound_new = max(y_min-y_offset,0)
        up_bound_new = min(y_max+y_offset,img.shape[0])
        img_level_2 = cv2.resize(img[down_bound_new:up_bound_new, left_bound_new:right_bound_new], (rescale,rescale), interpolation=cv2.INTER_CUBIC)
    
        scale_x = (right_bound_new - left_bound_new)/rescale
        scale_y = (up_bound_new - down_bound_new)/rescale

        for room in labels:
            script_dict_rescale[fileName]['points'].append([int((room[0]-left_bound_new)/scale_x),int((room[1]-down_bound_new)/scale_y), int((room[2]-left_bound_new)/scale_x), int((room[3]-down_bound_new)/scale_y), room[4]])
            script_dict_rescale[fileName+'_90']['points'].append([int((room[1]-down_bound_new)/scale_y), rescale-int((room[0]-left_bound_new)/scale_x), int((room[3]-down_bound_new)/scale_y), rescale-int((room[2]-left_bound_new)/scale_x), room[4]])
            script_dict_rescale[fileName+'_180']['points'].append([rescale-int((room[0]-left_bound_new)/scale_x),rescale-int((room[1]-down_bound_new)/scale_y), rescale-int((room[2]-left_bound_new)/scale_x), rescale-int((room[3]-down_bound_new)/scale_y), room[4]])
            script_dict_rescale[fileName+'_270']['points'].append([rescale-int((room[1]-down_bound_new)/scale_y), int((room[0]-left_bound_new)/scale_x), rescale-int((room[3]-down_bound_new)/scale_y), int((room[2]-left_bound_new)/scale_x), room[4]])

        cv2.imwrite(os.path.join(newDirectName, script_dict_rescale[fileName]['img_name']), img_level_2)
        cv2.imwrite(os.path.join(newDirectName, script_dict_rescale[fileName+'_90']['img_name']), np.rot90(img_level_2,1))
        cv2.imwrite(os.path.join(newDirectName, script_dict_rescale[fileName+'_180']['img_name']), np.rot90(img_level_2,2))
        cv2.imwrite(os.path.join(newDirectName, script_dict_rescale[fileName+'_270']['img_name']), np.rot90(img_level_2,3))
        del img,img_level_2
    
    with open(os.path.join(newDirectName, 'data.json'), 'w') as outfile:
        json.dump(script_dict_rescale, outfile)
    return script_dict_rescale

In [16]:
room_dict = {'bathroom':0,'bedroom':1,'living room':2,'stairs':3} 
directName = './data_1'

In [17]:
script_dict, _ = xml_parse(directName, room_dict)

In [18]:
script_dict_rescale = cropNjson(512, directName, './data_512_train', script_dict)