In [1]:
import os
import json
import numpy as np
import glob
import cv2
import re
import matplotlib.pyplot as plt

from skimage.io import imread, imshow
from matplotlib.image import imsave
from tensorflow import keras

In [2]:
PATH_TO_TRAININGSET = "/Users/denyskononenko/Documents/build_proc/images/training/building_facade/"
PATH_TO_TRAININGSET_2 = "/Users/denyskononenko/Documents/build_proc/images/training/building_complex/"
# directory with processed buildings mask and init images 
PATH_TO_BUILDINGS_TRAININSET = "/Users/denyskononenko/Documents/build_proc/images/training/building_facade_buildings/"

# masks of init rgb image and segmented png mask
INIT_PATTERN = "ADE_train_xxxxxxxx.jpg"
MASK_PATTERN = "ADE_train_xxxxxxxx_seg.png"

BUILDING_PIXEL = [10, 56]
HOUSE_PIXEL = [40, 252]
SKYSCKRAPPER_PIXEL = [90, 119]

image_masks = [f for f in glob.glob(PATH_TO_TRAININGSET + "*.png")]
images = [f for f in glob.glob(PATH_TO_TRAININGSET + "*.jpg")]

# get id of RGB images from training set 
image_id = [re.search(r"train_(.+?).jpg", file.split("/")[-1]).group(1) for file in images]

# get actual ids from trainingset 
os.chdir(PATH_TO_BUILDINGS_TRAININSET)
image_ids = os.listdir()
#image_ids = [re.search(r"train_(.+?).jpg", file.split("/")[-1]).group(1) for file in images]
#image_ids.remove(".DS_Store")
#print(image_ids)




In [4]:
class DataGenerator(keras.utils.Sequence):
    """
    Generator of data class for u-net neural network 
    Images are obteined from the ADE20K dataset http://sceneparsing.csail.mit.edu/
    
    Structure of trainig dataset:
    image id = xxxxxxxx
    rgb initial image "ADE_train_id.jpg"
    segmented image "ADE_train_id.jpg"
    Object class category is coded by the r, g channels of the each pixel. For building [10, 56, *], for house [40, 252]
    """
    
    def __init__(self, ids, path, batch_size=8, image_size=128):
        self.ids = ids
        self.path = path
        self.batch_size = batch_size
        self.image_size = image_size
        self.on_epoch_end()
        
    def _load_image_mask_pair(self, image_id):
        """
        Load image and its mask
        @param image_id image id form self.ids
        """
        image = DataGenerator.read_img(self.path + "/{}/".format(image_id) + "img_{}.png".format(image_id))
        mask = DataGenerator.read_img(self.path + "/{}/".format(image_id) + "mask_{}.png".format(image_id))
        
        
        image = cv2.resize(image, (self.image_size, self.image_size))
        mask = cv2.resize(mask, (self.image_size, self.image_size))
        
        return image[:,:,:3]/255.0, mask[:,:,:1]/255.0
    
    def __getitem__(self, index):
        
        images = []
        masks = []
        
        batch_indices = self.ids[index*self.batch_size : (index+1)*self.batch_size]
        
        for img_id in batch_indices:
            img, mask = self._load_image_mask_pair(img_id)
            images.append(img)
            masks.append(mask)
        
        return np.array(images), np.array(masks)
    
    def on_epoch_end(self):
        pass
    
    def __len__(self):
        return int(np.ceil(len(self.ids)/float(self.batch_size)))
    
    @staticmethod
    def read_img(img_path):
        """
        Read image from the img_path (URL or file). 
        Returns numpy array.
        """
        img = imread(img_path)
        return img
    
    @staticmethod
    def make_path_by_id(img_id):
        """
        Generate path to the image and its segmented variant by id
        @param img_id id of the image
        @return {"init_image": name, "segments": segmented image name}
        """
        return {"init_image": re.sub(r"xxxxxxxx", str(img_id), INIT_PATTERN), "segments": re.sub(r"xxxxxxxx", str(img_id), MASK_PATTERN)}
    
    @staticmethod
    def show_img_mask(img, mask, image_size=128):
        """
        Display image and mask.
        @param img in the form of numpy array 
        @param mask in the form of numpy array 
        @param image_size size of image
        """
        plt.tight_layout(pad=0.5, w_pad=10.0, h_pad=0.5) # minimize overlapping of subplots
        
        plt.figure(figsize=(10, 10))
        plt.subplot(131)
        plt.title("Image", fontsize=18)
        plt.imshow(img)

        plt.subplot(132)
        plt.title("Mask", fontsize=18)
        plt.imshow(np.reshape(mask, (image_size, image_size)), cmap="gray")
       
        # show mask and iamge overlapping 
        plt.subplot(133)
        plt.title("Overlapping", fontsize=18)
        plt.imshow(np.reshape(mask, (image_size, image_size)), cmap="gray")
        plt.imshow(img, cmap="jet", alpha=0.5) 
        
    
    @staticmethod 
    def display_valid_imgs(img, background, result, image_size=128):
        """
        Display init image, background truth and prediction of NN
        """
        plt.figure(figsize=(10, 10))
        
        plt.subplot(131)
        plt.title("init image", fontsize=18)
        plt.imshow(img)
        
        plt.subplot(132)
        plt.title("Background truth", fontsize=18)
        plt.imshow(np.reshape(background, (image_size, image_size)), cmap="gray")
        
        plt.subplot(133)
        plt.title("Background truth", fontsize=18)
        plt.imshow(np.reshape(result, (image_size, image_size)), cmap="gray")
    
    @staticmethod
    def show_imgs(img1, img2, img3, img_id):
        """
        Dispaly an image pairt.
        @param img1 url or numpy array for image #1
        @param img2 url or numpy array for image #2
        @param img3 url or numpy array for image #3 
        """
        if type(img1) == str and type(img2) == str and type(img3) == str:
            # case of link on the image
            imarr_1 = DataGenerator.read_img(img1)
            imarr_2 = DataGenerator.read_img(img2)
            imarr_3 = DataGenerator.read_img(img3)
            name1 = img1.split("/")[-1]
            name2 = img2.split("/")[-1]
            name3 = img2.split("/")[-1]
        else:
            # case of numpy array 
            imarr_1 = img1
            imarr_2 = img2
            imarr_3 = img3
            name1 = str(img_id)
            name2 = "Segmented"
            name3 = "Mask"
        
        plt.tight_layout(pad=0.5, w_pad=10.0, h_pad=0.5) # minimize overlapping of subplots
        
        plt.figure(figsize=(10, 10))
        plt.subplot(131)
        plt.title("Image 1 \nid: {}".format(name1), fontsize=18)
        plt.imshow(imarr_1)

        plt.subplot(132)
        plt.title("Image 2 \nname: {}".format(name2), fontsize=18)
        plt.imshow(imarr_2)
        
        plt.subplot(133)
        plt.title("Image 3 \nname: {}".format(name3), fontsize=18)
        plt.imshow(imarr_3, cmap="gray")
    
    @staticmethod
    def get_mask(img, image_size=128):
        """
        Detect particular class mask.
        @param img url of image or its numpy array
        @param image_size size of image
        @return mask geayscale image with seleced mask of appropriate mask object
        """
        building_pixels = [] # pixels of building class
        if type(img) == str:
            mask = DataGenerator.read_img(img)
        else:
            mask = img
        # get pixels, unique pixels and its grayscale variants of mask
        pixels = np.reshape(mask, (mask.shape[0] * mask.shape[1], 3))
        unique_pixels = np.unique(pixels, axis=0)
        gray_unique_pixels = cv2.cvtColor(np.array([unique_pixels]), cv2.COLOR_BGR2GRAY)
        # check presence of buildings related to the building class
        for pixel in unique_pixels:
            if pixel[0] == BUILDING_PIXEL[0] and pixel[1] == BUILDING_PIXEL[1] or pixel[0] == HOUSE_PIXEL[0] and pixel[1] == HOUSE_PIXEL[1] or pixel[0] == SKYSCKRAPPER_PIXEL[0] and pixel[1] == SKYSCKRAPPER_PIXEL[1]:
                building_pixels.append(pixel) 
        # grayscale of buildig class pixels
        gray_building_pixels = cv2.cvtColor(np.array([building_pixels]), cv2.COLOR_BGR2GRAY)[0]
        #print(gray_building_pixels)
        
        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
        init_mask = np.copy(mask)
        mask = (init_mask == gray_building_pixels[0])*1.0
        if len(gray_building_pixels) > 1:
            for pixel in gray_building_pixels[1:]:
                mask += (init_mask == pixel)*1.0
        mask = cv2.resize(mask, (image_size, image_size)) 
        return mask
    
    @staticmethod
    def make_img_mask_pair(id_arr, image_size):
        """
        @param id_arr array of images id
        @param image_size size of image
        """
        for imd in id_arr:
            image = DataGenerator.read_img(PATH_TO_TRAININGSET + DataGenerator.make_path_by_id(imd)["init_image"])
            image_name = "img_{}.png".format(imd)
            if image.shape != (self.image_size, self.image_size, 3):
                image = cv2.resize(image, (image_size, image_size))

            segmented_image = DataGenerator.read_img(PATH_TO_TRAININGSET + DataGenerator.make_path_by_id(imd)["segments"])
            mask = DataGenerator.get_mask(segmented_image)
            mask_name = "mask_{}.png".format(imd)
            #save images and masks in separate directory
            os.chdir(PATH_TO_BUILDINGS_TRAININSET)
            os.mkdir(str(imd))
            imsave(PATH_TO_BUILDINGS_TRAININSET + "/{}/".format(str(imd)) + image_name, image)
            imsave(PATH_TO_BUILDINGS_TRAININSET + "/{}/".format(str(imd)) + mask_name, mask, cmap="gray")
    
    @staticmethod
    def parse_labelbox_json(path):
        """
        Select image mask urls dic from labelbox array.
        @param path to the labelbox data
        @return list of dictionaries of label--mask pair urls 
        """
        res  = []
        f = open(path, "r", encoding="utf-8")
        data = json.load(f)
        
        for item in data:
            temp = {}
            if "Masks" in item:
                temp["image"] = item["Labeled Data"]
                temp["mask"] = item["Masks"]["building"]
                res.append(temp)
        return res
    
    @staticmethod
    def download_labelbox_data(label_box_data, image_size=128):
        """
        Method for uploading labelbox mask image pairs and appending 
        them to the existing dataset with preservation of current directories nonation 
        @param path path to the trainingset
        """
        path = PATH_TO_BUILDINGS_TRAININSET
        os.chdir(path)
        dir_list = [int(x) for x in os.listdir()]
        dir_list.sort()
        
        last_id = dir_list[-1]
        last_dir_name = "0000" + str(last_id)
        
        for item in label_box_data:
            # download and resize data
            img = DataGenerator.read_img(item["image"]) 
            mask = DataGenerator.read_img(item["mask"])
            mask = cv2.cvtColor(mask[:,:,:3], cv2.COLOR_BGR2GRAY)

            img = cv2.resize(img, (image_size, image_size))
            mask = cv2.resize(mask, (image_size, image_size))
            
            
            # make new id
            last_id += 1 
            last_dir_name = "0005" + str(last_id)
            
            image_name = "img_{}.png".format(last_dir_name)
            mask_name = "mask_{}.png".format(last_dir_name)
            
            print(last_dir_name)
            print(image_name)
            print(mask_name)
            #save images and masks in separate directory
            os.chdir(PATH_TO_BUILDINGS_TRAININSET) 
            os.mkdir(last_dir_name)
            imsave(PATH_TO_BUILDINGS_TRAININSET + "/{}/".format(last_dir_name) + image_name, img)
            imsave(PATH_TO_BUILDINGS_TRAININSET + "/{}/".format(last_dir_name) + mask_name, mask, cmap="gray")

            #DataGenerator.show_img_mask(img, mask)



In [None]:
#labelbox_data = "/Users/denyskononenko/Documents/build_proc/label_box_data/export-2019-07-18.json"
#data = DataGenerator.parse_labelbox_json(labelbox_data)

In [None]:
#test_data = data[1]
#print(test_data["image"])
#print(test_data["mask"])
#DataGenerator.download_labelbox_data(data) 


In [None]:
#DataGenerator.make_img_mask_pair(image_id)
data_gen = DataGenerator(image_ids, PATH_TO_BUILDINGS_TRAININSET)
x, y = data_gen.__getitem__(3)
#print(x.shape, y.shape)

DataGenerator.show_img_mask(x[0], y[0])
#print(x[0])
#testim = DataGenerator.read_img(PATH_TO_BUILDINGS_TRAININSET + "/{}/".format(str(image_ids[0])) + "mask_{}.png".format(image_ids[0]))


In [None]:
#print(len(image_id))
#for imd in image_id[20:40]:
#    image = DataGenerator.read_img(PATH_TO_TRAININGSET + DataGenerator.make_path_by_id(imd)["init_image"]) 
#    segmented_image = DataGenerator.read_img(PATH_TO_TRAININGSET + DataGenerator.make_path_by_id(imd)["segments"])
#    mask = DataGenerator.get_mask(segmented_image)
#    DataGenerator.show_img_pair(image, segmented_image, mask, imd)
    

In [None]:
 def test_dataset(testid):
    building_pixels = []
    nm = DataGenerator.read_img(PATH_TO_TRAININGSET_2 + DataGenerator.make_path_by_id(testid)["segments"])

    pixels = np.reshape(nm, (nm.shape[0] * nm.shape[1], 3))
    unique_pixels = np.unique(pixels, axis=0)
    gray_unique_pixels = cv2.cvtColor(np.array([unique_pixels]), cv2.COLOR_BGR2GRAY)
    # check presence of buildings related to the building class
    for pixel in unique_pixels:
        if pixel[0] == 10 and pixel[1] == 56 or pixel[0] == 40 and pixel[1] == 252 or pixel[0] == 90 and pixel[1] == 119:
            building_pixels.append(pixel)

    print(unique_pixels) 
    print(gray_unique_pixels)


    gray_building_pixels = cv2.cvtColor(np.array([building_pixels]), cv2.COLOR_BGR2GRAY)[0]
    print(gray_building_pixels)
    mask = (nm == [10, 56, 51])*1.0
    for pixel in building_pixels:
        mask += (nm == pixel)*1.0
    print(mask.shape)

    gray = cv2.cvtColor(nm, cv2.COLOR_BGR2GRAY)
    init_gray = cv2.cvtColor(nm, cv2.COLOR_BGR2GRAY)
    #gray = (mask == [0, 0, 0])*1.0 
    gray = (init_gray == gray_building_pixels[0])*1.0
    for gray_pix in gray_building_pixels[1:]:
        print(gray_pix)
        gray += (init_gray == gray_pix)*1.0
    print(len(gray_building_pixels))

    gray = cv2.resize(gray, (256, 256)) 
    print(np.amax(gray))
    print(np.amax(gray.shape))
    DataGenerator.show_imgs(nm, mask, gray, testid)



In [None]:
#test_id = ["00004591", "00004688", "00004598", "00004750", "00004605", "00004731"]
#test_dataset(test_id[0])