See https://github.com/NVIDIA/DIGITS/blob/master/digits/extensions/data/objectDetection/README.md

In [None]:
%pylab inline 

In [None]:
import os
from glob import glob

In [None]:
%%file labelnames.txt
dontcare
window


In [None]:
PROJECT_ROOT = os.path.expanduser('/home/shared/Projects/Facades')
CMP_BASE_HOME = os.path.join(PROJECT_ROOT, 'data/raw/CMP_facade_DB_base')
CMP_EXTENDED_HOME = os.path.join(PROJECT_ROOT, 'data/raw/CMP_facade_DB_extended')
LAMA_LONDON_HOME = os.path.join(PROJECT_ROOT, 'data/raw/london_dataset')

In [None]:
def replace_ext(a, ext):
    return os.path.splitext(a)[0] + ext

In [None]:
CMP_BASE_JPGS = glob(os.path.join(CMP_BASE_HOME, 'base', '*.jpg'))
CMP_BASE_XMLS = [replace_ext(f, '.xml') for f in CMP_BASE_JPGS]

In [None]:
CMP_EXTENDED_JPGS = glob(os.path.join(CMP_EXTENDED_HOME, 'extended', '*.jpg'))
CMP_EXTENDED_XMLS = [replace_ext(f, '.xml') for f in CMP_EXTENDED_JPGS]

In [None]:
CMP_EXTENDED_JPGS.index('/home/shared/Projects/Facades/data/raw/CMP_facade_DB_extended/extended/cmp_x0050.jpg')

In [None]:
import xmltodict
from bunch import Bunch

In [None]:
os.listdir(CMP_BASE_HOME)

In [None]:
with open(os.path.join(CMP_BASE_HOME, "label_names.txt")) as f:
    _ignored, CMP_LABELS, CMP_ZORDER = zip(*[label.strip().lower().split() for label in f.readlines()])
CMP_ZORDER = {label:int(z) for (label, z) in zip(CMP_LABELS, CMP_ZORDER)}
CMP_ZORDER

In [None]:
def convert_xml_object(o, shape):
    """Convert XML records from CMP to the KITI format used
    by DIGITS for detection
    """
    H, W = shape[:2]
    UNUSED = 0
    
    result = Bunch()
    result.type = o['labelname']
    result.truncated = UNUSED
    result.occluded = UNUSED
    result.alpha = UNUSED
    
    XY = array([o['points']['x'], o['points']['y']]).astype(float)
    result.bbox = Bunch()
    result.bbox.top  = int(XY[0][0]*H)
    result.bbox.left   = int(XY[1][0]*W)
    result.bbox.bottom = int(XY[0][1]*H)
    result.bbox.right = int(XY[1][1]*W)
    
    result.dimensions=Bunch()
    result.dimensions.height = UNUSED
    result.dimensions.width = UNUSED
    result.dimensions.length = UNUSED
    
    result.location = Bunch()
    result.location.x = UNUSED
    result.location.y = UNUSED
    result.location.z = UNUSED
    
    result.rotation_y = UNUSED
    result.score = UNUSED
    
    return result
    
def boxes_from_xml(filename, shape):
    with open(filename) as f:
        contents = f.read()
    xml = '<missing_root_element>{}</missing_root_element>'.format(contents)
    objects = xmltodict.parse(xml)['missing_root_element']['object']
    
    objects = [convert_xml_object(o, shape)  for o in objects]
    return objects

In [None]:
def object_to_string(o):
    return ('{type} {truncated} {occluded} {alpha} {bbox.left:4} {bbox.top:4} {bbox.right:4} {bbox.bottom:4} '+
            '{location.x} {location.y} {location.z} {rotation_y} {score}\n').format(**o)

In [None]:
def debug_cmp_image(index, extended=False):
    if not extended:
        print CMP_BASE_JPGS[index]
        image = imread(CMP_BASE_JPGS[index])
        meta = boxes_from_xml(CMP_BASE_XMLS[index], image.shape)
    else:
        print CMP_EXTENDED_JPGS[index]
        image = imread(CMP_EXTENDED_JPGS[index])
        meta = boxes_from_xml(CMP_EXTENDED_XMLS[index], image.shape)
        
    windows = [object_to_string(o) for o in meta if o.type=='window']
    
    figure()
    imshow(image)
    ax = gca()
    for b in meta:
        if b.type != 'window':
            continue
        r = Rectangle((b.bbox.left, b.bbox.top), b.bbox.right-b.bbox.left, b.bbox.bottom - b.bbox.top, alpha=0.5, color='red')
        ax.add_patch(r)

debug_cmp_image(10)
debug_cmp_image(20)
debug_cmp_image(30)
debug_cmp_image(30, extended=True)



In [None]:
LAMA_RECTIFIED = r'/home/shared/Projects/Facades/data/raw/london_dataset/rectified'
LAMA_GROUNDTRUTH = r'/home/shared/Projects/Facades/data/raw/london_dataset/groundtruth'
LAMA_JPGS = glob(os.path.join(LAMA_RECTIFIED, '*.jpg'))
LAMA_GROUNDTRUTH_PNGS = [os.path.join(LAMA_GROUNDTRUTH, replace_ext(os.path.basename(jpg), '.png')) for jpg in LAMA_JPGS]
LAMA_WINDOW_COLOR = (1,0,0)

In [None]:
import errno

def mkdir(p):
    try:
        os.makedirs(p)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

In [None]:
from skimage.measure import regionprops, label

In [None]:
def lama_extract_boxes(image):
    boxes = [r.bbox for r in regionprops(label(np.all(image[:,:]==LAMA_WINDOW_COLOR, axis=2)))]
    strings = ['window 0 0 0 {:4} {:4} {:4} {:4} 0 0 0 0 0'.format(l, t, r, b) for (t, l, b, r) in boxes]
    return strings

In [None]:
def debug_lama_image(index):
    image = imread(LAMA_JPGS[index])
    gt = imread(LAMA_GROUNDTRUTH_PNGS[index])
    boxes = [r.bbox for r in regionprops(label(np.all(gt[:,:]==LAMA_WINDOW_COLOR, axis=2)))]

    figure()
    imshow(image)
    ax = gca()
    for b in boxes:
        top, left, bottom, right = b
        r = Rectangle((left, top), right-left, bottom-top,  color='red', alpha=0.5)
        ax.add_patch(r)

debug_lama_image(10)
debug_lama_image(20)
debug_lama_image(50)

In [None]:
TRAIN_ROOT = os.path.join(PROJECT_ROOT, 'data', 'detect-windows', 'train')
TRAIN_IMAGES = os.path.join(TRAIN_ROOT, 'images')
TRAIN_LABELS = os.path.join(TRAIN_ROOT, 'labels')
mkdir(TRAIN_ROOT)
mkdir(TRAIN_IMAGES)
mkdir(TRAIN_LABELS)

In [None]:
for i, (jpg, xml) in enumerate(zip(CMP_BASE_JPGS, CMP_BASE_XMLS)):
    image = imread(jpg)
    meta = boxes_from_xml(xml, image.shape)
    windows = [object_to_string(o) for o in meta if o.type=='window']
    
    image_name = os.path.join(TRAIN_IMAGES, 'cmpb-{:06}.jpg'.format(i))
    label_name = os.path.join(TRAIN_LABELS, 'cmpb-{:06}.txt'.format(i))
    
    with open(label_name, 'w') as f:
        f.writelines(windows)
    imsave(image_name, image)
    print '\r', i+1, 'of', len(CMP_BASE_JPGS),
print

In [None]:
for i, (jpg, xml) in enumerate(zip(CMP_EXTENDED_JPGS, CMP_EXTENDED_XMLS)):
    image = imread(jpg)
    meta = boxes_from_xml(xml, image.shape)
    windows = [object_to_string(o) for o in meta if o.type=='window']
    
    image_name = os.path.join(TRAIN_IMAGES, 'cmpx-{:06}.jpg'.format(i))
    label_name = os.path.join(TRAIN_LABELS, 'cmpx-{:06}.txt'.format(i))
    
    with open(label_name, 'w') as f:
        f.writelines(windows)
    imsave(image_name, image)
    print '\r', i+1, 'of', len(CMP_EXTENDED_JPGS),
print

In [None]:
for i, (jpg, gt) in enumerate(zip(LAMA_JPGS, LAMA_GROUNDTRUTH_PNGS)):
    image = imread(jpg)
    windows = lama_extract_boxes(imread(gt))
    
    image_name = os.path.join(TRAIN_IMAGES, 'lama-{:06}.jpg'.format(i))
    label_name = os.path.join(TRAIN_LABELS, 'lama-{:06}.txt'.format(i))
    
    with open(label_name, 'w') as f:
        f.writelines(windows)
    imsave(image_name, image)
    print '\r', i+1, 'of', len(LAMA_JPGS),
print

In [None]:
SEGNET_SHAPE = (512, 512, 3)
SEGNET_DATA = os.path.join(PROJECT_ROOT, 'data', 'segnet-facade-512x512', 'train')

In [None]:
from skimage.transform import rescale

def split_images(image, shape):
    scale = shape[0]/float(image.shape[0])
    simg = rescale(image, (scale, scale))
    if simg.shape[1] < shape[1]:
        padding = -simg.shape[1]%shape[1]
        if len(simg.shape) == 3:
            simg = np.pad(simg, ((0,0),(padding/2,padding-padding/2),(0,0)), mode='constant')
        else:
            simg = np.pad(simg, ((0,0),(padding/2,padding-padding/2)), mode='constant')
    
    for x in linspace(0, simg.shape[1]-shape[1], int(ceil(simg.shape[1]/float(shape[1]) ))):
        yield simg[:, int(x):int(x)+shape[1]]

for img in split_images(imread(CMP_BASE_JPGS[4]), SEGNET_SHAPE):
    figure()
    imshow(img)
    grid()

In [None]:
IGNORE_INDEX = 0
WINDOW_INDEX = 1
DOOR_INDEX = 2
FACADE_EDGE_INDEX = 3
OTHER_INDEX = 4

EDGE_THICKNESS=3

def make_cmp_segnet_inputs(jpg, xml, scale=1.):
    image = imread(jpg)
    image = rescale(image, SEGNET_SHAPE[0]/float(image.shape[0]))
    meta = boxes_from_xml(xml, image.shape)
    
    windows = [o for o in meta if o.type=='window']
    doors = [o for o in meta if o.type=='door']
    shops  = [o for o in meta if o.type=='shop']
    facade = [o for o in meta if o.type=='facade']
    
    labels = np.zeros(image.shape[:2], dtype=np.uint8)
    
    for o in facade:
        labels[o.bbox.top:o.bbox.bottom, o.bbox.left:o.bbox.right] = OTHER_INDEX
    for o in shops:
        labels[o.bbox.top:o.bbox.bottom, o.bbox.left:o.bbox.right] = IGNORE_INDEX
    for o in facade:
        labels[o.bbox.top:o.bbox.bottom, o.bbox.right-EDGE_THICKNESS/2:o.bbox.right+EDGE_THICKNESS/2+1] = FACADE_EDGE_INDEX  
        labels[o.bbox.top:o.bbox.bottom, o.bbox.left-EDGE_THICKNESS/2:o.bbox.left+EDGE_THICKNESS/2+1] = FACADE_EDGE_INDEX  
    for o in doors:
        labels[o.bbox.top:o.bbox.bottom, o.bbox.left:o.bbox.right] = DOOR_INDEX
    for o in windows:
        labels[o.bbox.top:o.bbox.bottom, o.bbox.left:o.bbox.right] = WINDOW_INDEX
    
    out_images = list(split_images(image, SEGNET_SHAPE))
    out_labels = list(split_images(labels, SEGNET_SHAPE))
    
    for out_image, out_label in zip(out_images, out_labels):
        if not np.all(out_label == IGNORE_INDEX):
            yield out_image, out_label

In [None]:
figsize(10,20)
for image, labels in make_cmp_segnet_inputs(CMP_BASE_JPGS[8],CMP_BASE_XMLS[8]):
    figure()
    subplot(1,2,1)
    imshow(image)
    grid()
    subplot(1,2,2)
    imshow(labels)
    grid()

In [None]:
figsize(10,20)
for image, labels in make_cmp_segnet_inputs(CMP_EXTENDED_JPGS[8],CMP_EXTENDED_XMLS[8]):
    figure()
    subplot(1,2,1)
    imshow(image)
    grid()
    subplot(1,2,2)
    imshow(labels)
    grid()

In [None]:
from skimage.morphology import binary_dilation, disk

In [None]:
LAMA_FACADE_COLOR = (1,1,0)
def make_lama_segnet_inputs(jpg, gt_path):
    image = imread(jpg)
    gt = imread(gt_path)
    windows = np.all(gt[:,:]==LAMA_WINDOW_COLOR, axis=2) 
    facade = np.all(gt[:,:]==LAMA_FACADE_COLOR, axis=2) | windows
    edges = np.repeat([(facade[:, 1:] != facade[:, :-1]).max(axis=0)] , facade.shape[0], axis=0)
    edges *= binary_dilation(facade, disk(1))[:,:-1]
    edges = np.pad(edges, ((0,0), (0,1)), mode='constant')
    edges = binary_dilation(edges, disk(EDGE_THICKNESS))
    
    labels = np.zeros(image.shape[:2], dtype=np.uint8)
    
    labels[facade] = OTHER_INDEX
    labels[edges] = IGNORE_INDEX  
    labels[windows] = WINDOW_INDEX
    
    out_images = list(split_images(image, SEGNET_SHAPE))
    out_labels = list(split_images(labels, SEGNET_SHAPE))
    
    for out_image, out_label in zip(out_images, out_labels):
        if not np.all(out_label == IGNORE_INDEX):
            yield out_image, out_label    
    
    
figsize(10,20)
for image, labels in make_lama_segnet_inputs(LAMA_JPGS[350],LAMA_GROUNDTRUTH_PNGS[350]):
    figure()
    subplot(1,2,1)
    imshow(image)
    grid()
    subplot(1,2,2)
    imshow(labels)
    grid()

In [None]:
COMBINED_CMP_JPG = CMP_BASE_JPGS + CMP_EXTENDED_JPGS
COMBINED_CMP_XML = CMP_BASE_XMLS + CMP_EXTENDED_XMLS
print len(COMBINED_CMP_JPG)

In [None]:
mkdir(SEGNET_DATA)
print SEGNET_DATA
mkdir(os.path.join(SEGNET_DATA, 'figs'))
mkdir(os.path.join(SEGNET_DATA, 'data'))
mkdir(os.path.join(SEGNET_DATA, 'labels'))

In [None]:
def generate_images(name, img, label):   
    img = img_as_ubyte(img.clip(0,1))
    label = img_as_ubyte(label)
    
    figsize(12, 5)
    suptitle(name)
    subplot(121)
    imshow(img)
    title('data')
    axis('off')
    subplot(122)
    title('labels')
    imshow(label, vmin=0, vmax=4)
    axis('off')
    
    print '\r processing image', name,

    
    savefig(os.path.join(SEGNET_DATA, 'figs', name + '.png'))
    
    img_name = os.path.join(SEGNET_DATA, 'data', name + '.png')
    label_name = os.path.join(SEGNET_DATA, 'labels', name + '.png')

    imsave(img_name, img)
    imsave(label_name, label)
    
    return img_name, label_name  
        

In [None]:
from skimage.util import img_as_ubyte
from skimage.filters import gaussian
from skimage.transform import warp, AffineTransform, ProjectiveTransform

In [None]:
def scale_in_place(img, scale):
    return warp(img[::-1], AffineTransform(scale=(scale, scale), translation=(((1-scale)/2)*800,0)).inverse)[::-1]

import gc

def augmented_segnet_images(name, img, labels):
    generate_images(name+'-orig', img, labels)
    generate_images(name+'-dark',img*0.5, labels)
    generate_images(name+'-blurry', gaussian(img, 3), labels)
    generate_images(name+'-shrunk', scale_in_place(img, 0.9), scale_in_place(labels, 0.9))
    
def process_lama_image(bundle):
    jpg, gt_path = bundle
    stem = os.path.splitext(os.path.basename(jpg))[0]
    for i, (img, labels) in enumerate(make_lama_segnet_inputs(jpg, gt_path)):
        augmented_segnet_images(stem + '-{}'.format(i), img, labels)
    gc.collect()

def process_cmp_image(bundle):
    jpg, xml=bundle
    stem = os.path.splitext(os.path.basename(jpg))[0]
    for i, (img, labels) in enumerate(make_cmp_segnet_inputs(jpg, xml)):
        augmented_segnet_images(stem + '-{}'.format(i), img, labels)
    gc.collect()

In [None]:
process_cmp_image((COMBINED_CMP_JPG[-2], COMBINED_CMP_XML[-2]))

In [None]:
for i in range(3,40):
    print i
    process_lama_image((LAMA_JPGS[i], LAMA_GROUNDTRUTH_PNGS[i]))
    print

In [None]:
from multiprocessing import Pool

try:
    workers = Pool()
    for i, x in enumerate(workers.imap_unordered(process_cmp_image, zip(COMBINED_CMP_JPG, COMBINED_CMP_XML))):
        print '\r CMP', i, 'of', len(COMBINED_CMP_JPG),
    for  i, x in enumerate(workers.imap_unordered(process_lama_image, zip(LAMA_JPGS, LAMA_GROUNDTRUTH_PNGS)) ):
        print '\r Lama', i, 'of', len(COMBINED_CMP_JPG),
finally:
    workers.terminate()    

In [None]:
SEGNET_TRAIN_DB = os.path.join(SEGNET_DATA, '../train.txt')
SEGNET_TEST_DB = os.path.join(SEGNET_DATA, '../test.txt')

In [None]:
data_paths = glob(os.path.join(SEGNET_DATA, 'data', '*.png'))
label_paths =  [os.path.abspath(os.path.join(SEGNET_DATA, 'data', '..', 'labels', os.path.split(fn)[1])) for fn in data_paths]
training_data = zip(data_paths, label_paths)
for name, labels in training_data:
    assert os.path.isfile(labels)

In [None]:
# Try to fix the fact that I did not split the data for testing

In [None]:
raw_files = array(training_data)
grouped_files = raw_files.reshape(-1, 4, 2)
np.random.shuffle(grouped_files)

training_set = grouped_files[:-100].reshape(-1, 2)
test_set = grouped_files[-100:].reshape(-1, 2)

np.random.shuffle(training_set)
np.random.shuffle(test_set)

print 'train:', len(training_set), '   test', len(test_set)

In [None]:
with open(SEGNET_TRAIN_DB, 'w') as f:
    f.writelines([' '.join(record) + '\n' for record in training_set])

In [None]:
with open(SEGNET_TEST_DB, 'w') as f:
    f.writelines([' '.join(record) + '\n' for record in test_set])

In [None]:
from PIL import Image
for data, label in training_set:
    im = Image.open(data)
    lb = Image.open(label)
    assert im.size == (800, 600)
    assert lb.size == (800, 600)
    print '\r', os.path.split(data)[1], 'ok',
        