In [1]:
%matplotlib notebook

import cv2, matplotlib
import numpy as np
import matplotlib.pyplot as plt

from os.path import expanduser, splitext
from os import scandir, makedirs

import random

In [2]:
def get_color_mask(bgr=np.zeros((1,1,3),dtype='uint8'), \
                   colors={'colorname': {'Lab': ([0,0,0], [255,255,255]),
                                         'HSV': ([0,0,0], [255,255,255])}}):
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2Lab)

    blur = {}
    blur['Lab'] = cv2.bilateralFilter(lab,15,25,150)
    blur['BGR'] = cv2.cvtColor(blur['Lab'], cv2.COLOR_Lab2BGR)
    blur['HSV'] = cv2.cvtColor(blur['BGR'], cv2.COLOR_BGR2HSV)
    
    # get masks matching any of the colors matching all descriptions

    mask = np.zeros_like(bgr[:,:,0])
    for color_dict in colors.values():
        mask_color = np.ones_like(mask)*255
        for colorspace, limits in color_dict.items():
            mask_colorspace = cv2.inRange(blur[colorspace], \
                                          np.array(limits[0]), np.array(limits[1]))
            mask_color = cv2.bitwise_and(mask_color, mask_colorspace)
            
        mask = cv2.bitwise_or(mask, mask_color)
    
    return mask

In [3]:
def get_LDK_mask(bgr=np.zeros((1,1,3),dtype='uint8')):
    floor_colors = {'floor_light': {'Lab': ([180,130,160], [220,150,190]),
                                    'HSV': ([0,65,180], [20,255,255])}, 
                    'floor_dark': {'Lab': ([120,130,150], [180,155,190]),
                                   'HSV': ([0,90,100], [20,255,230])},
                    'floor_watermark': {'Lab': ([220,125,145], [240,145,165]),
                                        'HSV': ([0,65,220], [20,255,255])}}

    mask = get_color_mask(bgr, floor_colors)
    
    # remove noise and fill holes
    
    kernel5r = np.ones((5,5), np.uint8)
    for x in [0,-1]:
        for y in [0,-1]:
            kernel5r[x,y] = 0
        
    mask = cv2.dilate(mask, kernel5r, iterations=1)
    mask = cv2.erode(mask, kernel5r, iterations=2)
    mask = cv2.dilate(mask, kernel5r, iterations=1)
        
    _, contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP, \
                                              cv2.CHAIN_APPROX_NONE)
       
    holes = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3]>=0]
    cv2.drawContours(mask, holes, -1, 255, -1)
    
    smallbits = [contours[i] for i in range(len(contours)) \
                 if hierarchy[0][i][3]==-1 and cv2.contourArea(contours[i]) <= 300]
    cv2.drawContours(mask, smallbits, -1, 0, -1)
    
    mask = cv2.dilate(mask, kernel5r, iterations=2)
    mask = cv2.erode(mask, kernel5r, iterations=2)
                
    return mask

In [20]:
def get_bedroom_mask(bgr=np.zeros((1,1,3),dtype='uint8')):
    colors = {#'room_light': {'Lab': ([180,130,160], [220,150,190]),
                   #               'HSV': ([0,65,180], [20,255,255])}, 
                     'bedroom': {'Lab': ([170,124,140], [250,130,165]),
                              'HSV': ([19,23,180], [25,100,254])}}

    mask = get_color_mask(bgr, colors)
    
    # remove noise and fill holes
    
    kernel5r = np.ones((5,5), np.uint8)
    for x in [0,-1]:
        for y in [0,-1]:
            kernel5r[x,y] = 0
        
    mask = cv2.dilate(mask, kernel5r, iterations=1)
    mask = cv2.erode(mask, kernel5r, iterations=2)
    mask = cv2.dilate(mask, kernel5r, iterations=1)
        
    _, contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP, \
                                              cv2.CHAIN_APPROX_NONE)
       
    holes = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3]>=0]
    cv2.drawContours(mask, holes, -1, 255, -1)
    
    smallbits = [contours[i] for i in range(len(contours)) \
                 if hierarchy[0][i][3]==-1 and cv2.contourArea(contours[i]) <= 300]
    cv2.drawContours(mask, smallbits, -1, 0, -1)
    
    mask = cv2.dilate(mask, kernel5r, iterations=2)
    mask = cv2.erode(mask, kernel5r, iterations=2)
                
    return mask

# main

In [23]:
parent_dir = '~/PycharmProjects/NaverApartmentScraper/'

dir_ID_from = 'fp_img'
exp_ID_from = expanduser(parent_dir+dir_ID_from+'/')

dir_hof = 'hall_of_fame'
exp_dir_hof = expanduser(parent_dir+dir_hof+'/')

dirs_ID_exclude = ['fp_img_exclude', 'fp_img_multi-level', 'fp_img_multi-unit']

dir_from = 'fp_img'
exp_dir_from = expanduser(parent_dir+dir_from+'/')

dir_to = 'fp_img_1102_5'
exp_dir_to = expanduser(parent_dir+dir_to+'/')
ext_to = '.png'


IDs = []

# adding all of the plans
#IDs += [splitext(f.name)[0] for f in scandir(exp_ID_from) if f.is_file()]

# adding random sample
random.seed('euisoon1')
from_IDs = [splitext(f.name)[0] for f in scandir(exp_dir_from) if f.is_file()]
IDs += random.sample(from_IDs, 50)

# adding hall of famers
hof_IDs = [splitext(f.name)[0] for f in scandir(exp_dir_hof) if f.is_file()]
#IDs += hof_IDs

IDs = list(set(IDs))
print(len(IDs))


IDs_excl = []
for dir_excl in dirs_ID_exclude:
    exp_dir_excl = expanduser(parent_dir+dir_excl+'/')
    IDs_excl += [splitext(f.name)[0] for f in scandir(exp_dir_excl) if f.is_file()]

IDs = list(set(IDs) - set(IDs_excl))
print(len(IDs))


ext_from_dict = {splitext(f.name)[0]: splitext(f.name)[1] \
                 for f in scandir(exp_ID_from) if f.is_file()}

makedirs(exp_dir_to, exist_ok=True)

for ID in IDs:
    # workaround for non-ascii filenames
    
    stream = open(exp_dir_from+ID+ext_from_dict[ID], "rb")
    bytes = bytearray(stream.read())
    numpyarray = np.asarray(bytes, dtype=np.uint8)
    bgr = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED)
        
    # LDK
    
    mask = get_bedroom_mask(bgr)
    
    # dilate and merge slightly partitioned LDK
    
    kernel_c = np.zeros((37,37), np.uint8)
    cv2.circle(kernel_c, (18,18), 18, 1, -1)

    mask_d = cv2.dilate(mask, kernel_c, iterations=1)
    
    _, contours_d, _ = cv2.findContours(mask_d, cv2.RETR_EXTERNAL, \
                                        cv2.CHAIN_APPROX_NONE)
           
        
    print(ID, ': ', len(contours_d), 'room(s)')  

    # fill the mask and save file

    bgr[:,:,0] = cv2.bitwise_or(bgr[:,:,0], mask_d)
    bgr[:,:,1] = cv2.bitwise_and(bgr[:,:,1], cv2.bitwise_not(mask))
    bgr[:,:,2] = cv2.bitwise_or(bgr[:,:,2], mask)

    # workaround for non-ascii filenames
    
    _, numpyarray2 = cv2.imencode(ext_to, bgr)
    with open(exp_dir_to+ID+ext_to, "wb") as file:
        file.write(numpyarray2)

print('Done')

50
48
108771_84C :  2 room(s)
101107_150 :  2 room(s)
100673_110B :  2 room(s)
104311_99B :  2 room(s)
111324_51 :  0 room(s)
22952_109A :  2 room(s)
101375_150B :  2 room(s)
1601_114 :  3 room(s)
14221_92B :  3 room(s)
16099_72 :  3 room(s)
25230_99 :  3 room(s)
108729_112 :  2 room(s)
4877_141 :  3 room(s)
107694_112B :  2 room(s)
101294_109D :  2 room(s)
1097_109 :  3 room(s)
107728_79B :  3 room(s)
27380_160 :  2 room(s)
102312_110H :  2 room(s)
10327_111 :  3 room(s)
1301_145 :  4 room(s)
913_81A :  3 room(s)
1366_77 :  3 room(s)
7485_105 :  3 room(s)
105153_176 :  2 room(s)
5621_144 :  2 room(s)
1873_90B :  3 room(s)
101182_113 :  3 room(s)
18959_128 :  3 room(s)
108763_102B :  2 room(s)
10657_99B :  3 room(s)
102502_85B :  3 room(s)
23107_56 :  2 room(s)
107807_130C :  3 room(s)
100689_163 :  2 room(s)
22286_160 :  3 room(s)
14301_110A :  1 room(s)
102634_123 :  3 room(s)
3340_104C :  2 room(s)
9769_72B :  2 room(s)
103572_101 :  3 room(s)
17098_109 :  2 room(s)
23589_95 :  3 ro