In [1]:
%matplotlib notebook

import cv2, matplotlib
import numpy as np
import matplotlib.pyplot as plt

from os.path import expanduser, splitext
from os import scandir, makedirs

import random

import csv

In [2]:
def read_from_csv(filepath):
    readlist = []

    with open(filepath, 'r', newline='', encoding='utf-8-sig') as csvfile:
        listreader = csv.reader(csvfile)
        rows = list(listreader)

    columns = rows[0]

    for item in rows[1:]:
        readlist.append(tuple(item))

    return columns, readlist

In [5]:
def get_color_mask(bgr=np.zeros((1,1,3),dtype='uint8'), \
                   colors={'colorname': {'Lab': ([0,0,0], [255,255,255]),
                                         'HSV': ([0,0,0], [255,255,255])}}):
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2Lab)

    blur = {}
    blur['Lab'] = cv2.bilateralFilter(lab,15,25,150)
    blur['BGR'] = cv2.cvtColor(blur['Lab'], cv2.COLOR_Lab2BGR)
    blur['HSV'] = cv2.cvtColor(blur['BGR'], cv2.COLOR_BGR2HSV)
    
    # get masks matching any of the colors matching all descriptions

    mask = np.zeros_like(bgr[:,:,0])
    for color_dict in colors.values():
        mask_color = np.ones_like(mask)*255
        for colorspace, limits in color_dict.items():
            mask_colorspace = cv2.inRange(blur[colorspace], \
                                          np.array(limits[0]), np.array(limits[1]))
            mask_color = cv2.bitwise_and(mask_color, mask_colorspace)
            
        mask = cv2.bitwise_or(mask, mask_color)

        
    # fill holes and remove noise
    
            
    _, contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, \
                                              cv2.CHAIN_APPROX_NONE)
       
    holes = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3]>=0]
    cv2.drawContours(mask, holes, -1, 255, -1)
    
    smallbits = [contours[i] for i in range(len(contours)) \
                 if hierarchy[0][i][3]==-1 and cv2.contourArea(contours[i]) <= 300]
    cv2.drawContours(mask, smallbits, -1, 0, -1)
           
    # removing imperfections
    
    _, contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, \
                                              cv2.CHAIN_APPROX_NONE)
    kernel_c = np.zeros((9,9), np.uint8)
    krn_c_x, krn_c_y = kernel_c.shape[0], kernel_c.shape[1]
    cv2.circle(kernel_c, (krn_c_x//2,krn_c_y//2), krn_c_x//2, 1, -1)
    
    for c in contours:
        mask_single_c = np.zeros_like(mask)
        cv2.drawContours(mask_single_c, c, -1, 255, -1)
        mask_single_c = cv2.dilate(mask_single_c, kernel_c, iterations=3)
        mask_single_c = cv2.erode(mask_single_c, kernel_c, iterations=3)
        mask = mask | mask_single_c
    
    return mask



def get_LDK_mask(bgr=np.zeros((1,1,3),dtype='uint8')):
    floor_colors = {'floor_light': {'Lab': ([180,130,160], [220,150,190]),
                                    'HSV': ([0,65,180], [20,255,255])}, 
                    'floor_dark': {'Lab': ([120,130,150], [180,155,190]),
                                   'HSV': ([0,90,100], [20,255,230])},
                    'floor_watermark': {'Lab': ([220,125,145], [240,145,165]),
                                        'HSV': ([0,65,220], [20,255,255])}}

    mask = get_color_mask(bgr, floor_colors)
    
    

    
    return mask


def get_bedroom_mask(bgr=np.zeros((1,1,3),dtype='uint8')):
    bedroom_boundary = {'bedroom_boundary': {'Lab': ([180,120,132], [254,135,165]),
                                             'HSV': ([10,1,200], [30,110,255])}}
    bedroom_dark = {'bedroom_dark': {'Lab': ([160,124,139], [250,130,165]),
                                     'HSV': ([10,30,200], [30,90,250])}}
    balcony_colors = {'balcony': {'Lab': ([240,125,130], [254,135,140])}}
    
    bedroom_boundary_mask = get_color_mask(bgr, bedroom_boundary)
    bedroom_dark_mask = get_color_mask(bgr, bedroom_dark)
    balcony_mask = get_color_mask(bgr, balcony_colors)

    # remove balconies which is similarily colored
    
    mask_bedroom_only = np.zeros_like(bedroom_boundary_mask)
    
    _, contours, _ = cv2.findContours(bedroom_boundary_mask, \
                                      cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    
    for c in contours:
        mask_single_c = np.zeros_like(mask_bedroom_only)
        cv2.drawContours(mask_single_c, [c], -1, 255, -1)
        
        c_area = cv2.countNonZero(mask_single_c)
        dark_area = cv2.countNonZero(mask_single_c & bedroom_dark_mask)
        balcony_area = cv2.countNonZero(mask_single_c & balcony_mask)    
        
        if dark_area >= 1000:
                mask_bedroom_only |= mask_single_c
                
    return mask_bedroom_only

# main

In [6]:
parent_dir = '~/PycharmProjects/NaverApartmentScraper/'

dir_ID_from = 'fp_img'
exp_ID_from = expanduser(parent_dir+dir_ID_from+'/')

dir_hof = 'hall_of_fame'
exp_dir_hof = expanduser(parent_dir+dir_hof+'/')

dirs_ID_exclude = ['fp_img_exclude', 'fp_img_multi-level', 'fp_img_multi-unit']

dir_from = 'fp_img'
exp_dir_from = expanduser(parent_dir+dir_from+'/')

dir_to = 'fp_img_1107_4'
exp_dir_to = expanduser(parent_dir+dir_to+'/')
ext_to = '.png'

path_fp = 'floorplans.csv'
exp_path_fp = expanduser(parent_dir+path_fp)


IDs = []

# adding all of the plans
#IDs += [splitext(f.name)[0] for f in scandir(exp_ID_from) if f.is_file()]

# adding random sample
random.seed('euisoon3')
from_IDs = [splitext(f.name)[0] for f in scandir(exp_dir_from) if f.is_file()]
IDs += random.sample(from_IDs, 500)

# adding hall of famers
hof_IDs = [splitext(f.name)[0] for f in scandir(exp_dir_hof) if f.is_file()]
IDs += hof_IDs

IDs = list(set(IDs))
print(len(IDs))


IDs_excl = []
for dir_excl in dirs_ID_exclude:
    exp_dir_excl = expanduser(parent_dir+dir_excl+'/')
    IDs_excl += [splitext(f.name)[0] for f in scandir(exp_dir_excl) if f.is_file()]

IDs = list(set(IDs) - set(IDs_excl))
print(len(IDs))


ext_from_dict = {splitext(f.name)[0]: splitext(f.name)[1] \
                 for f in scandir(exp_ID_from) if f.is_file()}


_, fp_list = read_from_csv(exp_path_fp)
bedroom_dict = {i[0]+'_'+i[1]: int(i[4]) for i in fp_list} # {ID: N of bedrooms}



makedirs(exp_dir_to, exist_ok=True)

for ID in IDs:
    # workaround for non-ascii filenames
    
    stream = open(exp_dir_from+ID+ext_from_dict[ID], "rb")
    bytes = bytearray(stream.read())
    numpyarray = np.asarray(bytes, dtype=np.uint8)
    bgr = cv2.imdecode(numpyarray, cv2.IMREAD_UNCHANGED)
        
    # get mask
    
    mask_LDK = get_LDK_mask(bgr)
    mask_bedroom = get_bedroom_mask(bgr)
    
    
    '''
    _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, \
                                        cv2.CHAIN_APPROX_NONE)
    if len(contours)>0:
        min_mask = min(map(cv2.contourArea, contours))
    else:
        min_mask = -1
    
    if 0<min_mask<=5000:
        print(ID, ': ', len(contours_d), 'room(s),', 'min_mask=', min_mask)
    ''' 

    # fill the mask and save file
    
    #bgr[:,:,0] &= ~mask_LDK
    #bgr[:,:,0] &= ~mask_bedroom
    
    bgr[:,:,1] |= mask_bedroom
    bgr[:,:,1] &= ~mask_LDK
    
    bgr[:,:,2] |= mask_LDK
    bgr[:,:,2] &= ~mask_bedroom



    # workaround for non-ascii filenames

    _, numpyarray2 = cv2.imencode(ext_to, bgr)
    with open(exp_dir_to+ID+ext_to, "wb") as file:
        file.write(numpyarray2)

    '''
    else:
        print('.', end='')
    '''

print('Done')

526
510
Done
