In [1]:
%run Topo_treatment.ipynb
%run Utility_general.ipynb
import glob
import struct
import os
from pathlib import Path
from PIL import Image as PImg

dlength_dict = {'f': 4, 'd': 8}

class FileIO(object):
    
    @staticmethod
    def read_binary(path, shape, dtype='d'):
        file_in  = open(path, "rb")
        data_arr = struct.unpack(str(np.prod(shape))+dtype, file_in.read(dlength_dict[dtype]*np.prod(shape)))
        data_arr = np.reshape(data_arr, shape)
        file_in.close()
        if dtype=='f':
            data_arr = np.float32(data_arr)
        return data_arr
    
    @staticmethod
    def write_binary(path, data, shape, dtype='d'):
        '''
        data has to be flattened.
        '''
        file_out = open(path, "wb")
        file_out.write(struct.pack(str(np.prod(shape))+dtype, *(data)))
        file_out.close()
        
    @staticmethod
    def read_matrix_binary(path, dtype='d'):
        file_in = open(path, "rb")
        dims    = struct.unpack('I', file_in.read(4))[0]
        if dims == 0:
            return None
        shape   = struct.unpack(str(dims)+'I', file_in.read(4 * dims))
        mat     = struct.unpack(str(np.prod(shape))+dtype, file_in.read(dlength_dict[dtype]*np.prod(shape)))
        mat     = np.reshape(mat, shape)
        return mat
        
    @staticmethod
    def write_matrix_binary(path, mat, dtype='d'):
        dims  = len(mat.shape)
        shape = mat.shape
        file_out = open(path, "wb")
        file_out.write(struct.pack('I', dims))
        file_out.write(struct.pack(str(dims)+'I', *(shape)))
        file_out.write(struct.pack(str(np.prod(shape))+dtype, *(mat.flatten())))
        file_out.close()
    
    @staticmethod
    def compute_pim_save(dir_in, dir_out, ext_in, params, reg=0, writeout=True, give_status=False):
        address_book_in  = []
        address_book_out = []
        os.chdir(dir_in)
        for file in glob.glob("*."+ext_in):
            address_book_in.append(os.path.join(dir_in, file))
            address_book_out.append(os.path.join(dir_out, file+".dat"))
        if writeout:
            Path(dir_out).mkdir(parents=True, exist_ok=True)
        
        max_rec = []
        min_rec = []
        et      = Edges_(params, False)
        num_    = len(address_book_in)
        for i in range(num_):
            img = np.expand_dims(skio.imread(address_book_in[i]), 0)
            pim = np.squeeze(et.persimg_batch(img, binarize=True))
            pim = (pim - reg) / reg
            if writeout:
                FileIO.write_binary(address_book_out[i], pim.flatten(), pim.shape)
            if i % 49 == 0:
                print("%d/%d" %(i+1, num_))
            if give_status:
                max_rec.append(np.amax(pim))
                min_rec.append(np.amin(pim))
        
        if give_status:
            print(np.amax(max_rec), np.amin(min_rec))
            
    @staticmethod
    def compute_pd_save(dir_in, dir_out, ext_in, params, dim, reg):
        '''
        reg: if True, images will be regularized
        '''
        address_book_in  = []
        address_book_out = []
        os.chdir(dir_in)
        for file in glob.glob("*."+ext_in):
            address_book_in.append(os.path.join(dir_in, file))
            address_book_out.append(os.path.join(dir_out, file+".dat"))
        Path(dir_out).mkdir(parents=True, exist_ok=True)
        
        et   = Edges_(params, False)
        num_ = len(address_book_in)
        for i in range(num_):
            img = skio.imread(address_book_in[i])
            if reg:
                img = Utility_general.normalize_data_([img], 127.5)[0]
            img = np.expand_dims(img, 0)
            _, _, _, _, pd = et.pd_batch(img, dim, debug=False, old_form=True, binarize=True, disttrfm=True)
            pd = np.squeeze(np.asarray(pd))
            FileIO.write_matrix_binary(address_book_out[i], pd)
            if i % 49 == 0:
                print("%d/%d" %(i+1, num_))
                
    @staticmethod
    def read_pd_subset(dir_in, ext_in, percentage, shuffle=True, dummyifempty=False):
        '''
        Note, there are pds with one point or zero point from time to time.
        if dummyifempty is True, EMPTY PDs will be inserted [0., .5].
        '''
        address_book_in = []
        filename_book   = []
        os.chdir(dir_in)
        for file in glob.glob("*."+ext_in):
            address_book_in.append(os.path.join(dir_in, file))
            filename_book.append(file[:-(len(ext_in)+1)])
        file_num = len(address_book_in)
        read_num = int(np.floor(file_num * percentage))
        if shuffle:
            ind_list = random.sample(range(file_num), read_num)
        else:
            ind_list = np.arange(read_num)
        
        pd_set = []
        f_set  = []
        for i in range(read_num):
            pd_ = FileIO.read_matrix_binary(address_book_in[ind_list[i]])
            if pd_.any() != None:
                if len(pd_.shape) == 1:
                    pd_ = np.expand_dims(pd_, axis=0)
                if pd_.shape[1] == 0 and dummyifempty:
                    pd_ = np.expand_dims(np.array([0., .5]), axis=0)
                pd_set.append(pd_)
                f_set.append(filename_book[ind_list[i]])
        return pd_set, f_set
    
    @staticmethod
    def read_pd_pathlist(pd_pathlist):
        '''
        Read persistence diagrams from the given path list.
        Each path should be a full path.
        '''
        file_num = len(pd_pathlist)
        pd_set   = [None] * file_num
        for i in range(file_num):
            pd_set[i] = FileIO.read_matrix_binary(pd_pathlist[i])
        return pd_set
    
    @staticmethod
    def save_image_batch(data, folder, prefix, scalor, number_offset, num=-1):
        '''
        data: the image data to be saved
        folder: the path to the folder where the images are saved
        prefix: prefix_00000.png
        scalor: data * scalor + scalor
        number_offset: the index of the image to start saving from (included)
        num: number of images to save out, -1 means all
        '''
        data = np.squeeze(data)
        assert(len(data.shape) == 3 or len(data.shape) == 4)
        batch_size  = data.shape[0]
        image_shape = data.shape[-2:]
        
        if num == -1 or num >= batch_size:
            out_num = batch_size
        else:
            out_num = num
            
        for idx in range(out_num):
            out_name = ''
            for _ in range(5 - len(str(idx + number_offset))):
                out_name += '0'
            out_name = folder + "/" + prefix + "_" + out_name + str(idx + number_offset) + ".png"           
            dat_ = np.reshape(data[idx], image_shape)
            dat_ = dat_ * scalor + scalor
            dat_ = dat_.astype(np.uint8)
            cv2.imwrite(out_name, dat_)
        print("Data write out complete.")
        
    @staticmethod
    def make_mask_unet(source_folder, target_folder, source_ext, target_ext):
        '''
        The mask required by unet is 0 or 1, convert 0 - 255 mask to 0 or 1.
        '''
        Path(target_folder).mkdir(parents=True, exist_ok=True)
        os.chdir(source_folder)
        for file in glob.glob("*." + source_ext):
            name = os.path.join(source_folder, file)
            img  = PImg.open(name)
            img  = np.array(img)
            img[img <= 127.5] = 0
            img[img  > 127.5] = 1
            
            out_name = file[:-len(source_ext)-1] + "_mask." + target_ext
            out_name = os.path.join(target_folder, out_name)
            res = PImg.fromarray(img.astype(np.uint8))
            res.save(out_name)
    
    @staticmethod
    def convert_google_maps_2_binary(source_folder, source_ext, target_folder, target_ext, threshold, radius, iter_num):
        '''
        This function converts RGB google maps to grayscale maps.
        threshold, integer, blow which pixels considered background (252 usually).
        radius, integer, kernel radius (2 usually).
        iter_num, number of iterations for the dilate and erode (5 usually).
        '''
        Path(target_folder).mkdir(parents=True, exist_ok=True)
        os.chdir(source_folder)
        kernel = np.ones((radius, radius), np.uint8)
        for file in glob.glob("*." + source_ext):
            name = os.path.join(source_folder, file)
            img  = cv2.imread(name, cv2.IMREAD_GRAYSCALE)
            img[img < threshold] = 0
            img = cv2.dilate(img, kernel, iterations=iter_num)
            img = cv2.erode(img, kernel, iterations=iter_num)
            img = 255 - img

            out_name = file[:-len(source_ext)-1] + "." + target_ext
            out_name = os.path.join(target_folder, out_name)
            cv2.imwrite(out_name, img)
            
    @staticmethod
    def convert_facades_2_binary(source_folder, source_ext, target_folder, target_ext,
        red_thresh, gray_thresh, radius, iter_num, resize_height, resize_width, border_thickness=0):
        '''
        This function converts facades to grayscale maps.
        red_thresh, integer, controls red channel below which set to 0 (150)
        gray_thresh, integer, watershed for 0 or 255 (80)
        radius, integer, kernel radius (2 usually).
        iter_num, number of iterations for the dilate and erode (5 usually).
        '''
        Path(target_folder).mkdir(parents=True, exist_ok=True)
        os.chdir(source_folder)
        kernel = np.ones((radius, radius), np.uint8)
        for file in glob.glob("*." + source_ext):
            name = os.path.join(source_folder, file)
            img  = cv2.imread(name, cv2.IMREAD_GRAYSCALE)
            imc  = cv2.imread(name, cv2.IMREAD_COLOR)
            
            c_r  = imc[:,:,2]
            c_r[c_r < red_thresh] = 0
            img  = img + c_r
            img[img < gray_thresh] = 0
            img[img >= gray_thresh] = 255
            bnd, hcy, red = Utility_topo.compute_bnd_red_cv(img, 0, 255, 8)
            
            h = img.shape[0]
            w = img.shape[1]
            cur = np.ones((h, w)) * 255
            for i in range(len(bnd)):
                for j in range(len(bnd[i])):
                    cur[bnd[i][j][0][1], bnd[i][j][0][0]] = 0
            
            cur = cv2.erode(cur, kernel, iterations=iter_num)
            cur = cv2.resize(cur,(resize_height,resize_width))
            cur[cur < 255] = 0
            if border_thickness > 0:
                cv2.rectangle(cur, (0, 0), (cur.shape[0]-1, cur.shape[1]-1), (0), border_thickness)

            out_name = file[:-len(source_ext)-1] + "." + target_ext
            out_name = os.path.join(target_folder, out_name)
            cv2.imwrite(out_name, cur)