In [1]:
from PIL import Image # (pip install Pillow)


In [4]:
def create_sub_masks(mask_image):
    width, height = mask_image.size
    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x,y))[:3]

            # If the pixel is not black...
            if pixel != (0, 0, 0):
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_masks[pixel_str].putpixel((x+1, y+1), 1)

    return sub_masks

In [40]:
def create_sub_masks_grayscale(mask_image):
    width, height = mask_image.size
    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x,y))

            # If the pixel is not black...
            if pixel != 0:
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_masks[pixel_str].putpixel((x+1, y+1), 1)

    return sub_masks

In [5]:
import numpy as np                                 # (pip install numpy)
from skimage import measure                        # (pip install scikit-image)
from shapely.geometry import Polygon, MultiPolygon # (pip install Shapely)

In [57]:
def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(sub_mask, 0.5, positive_orientation='low')

    segmentations = []
    polygons = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        if poly.exterior != None:
            polygons.append(poly)
            segmentation = np.array(poly.exterior.coords).ravel().tolist()
            segmentations.append(segmentation)
    # Combine the polygons to calculate the bounding box and area
    multi_poly = MultiPolygon(polygons)
    x, y, max_x, max_y = multi_poly.bounds
    width = max_x - x
    height = max_y - y
    bbox = (x, y, width, height)
    area = multi_poly.area

    annotation = {
        'segmentation': segmentations,
        'iscrowd': is_crowd,
        'image_id': image_id,
        'category_id': category_id,
        'id': annotation_id,
        'bbox': bbox,
        'area': area
    }

    return annotation

In [46]:
import json

mass_building_1 = '22678915_15.tif'
mass_building_2 = '22678930_15.tif'

inria_building_1 = 'austin1.tif'
inria_building_2 = 'austin2.tif'

mass_building_1_image = Image.open(mass_building_1)
mass_building_2_image = Image.open(mass_building_2)

inria_building_1_image = Image.open(inria_building_1)
inria_building_2_image = Image.open(inria_building_2)

mask_images_mass = [mass_building_1_image,mass_building_2_image]
mask_images_inria = [inria_building_1_image,inria_building_2_image]

In [67]:
# Define which colors match which categories in the images
building_mass_id, building_inria_id = [1, 2]
category_ids = {
    1: {
        '(255, 0, 0)': building_mass_id,
    },
    2: {
        '(255, 0, 0)': building_mass_id,
    }
}

is_crowd = 0

In [68]:
create_sub_masks(mass_building_1_image)

{'(255, 0, 0)': <PIL.Image.Image image mode=1 size=1502x1502 at 0x10C49F510>}

In [49]:
create_sub_masks_grayscale(inria_building_1_image)

{'255': <PIL.Image.Image image mode=1 size=5002x5002 at 0x10C49FED0>}

In [72]:
# These ids will be automatically increased as we go
annotation_id = 1
image_id = 1

# Create the annotations
annotations = []
for mask_image in mask_images_mass:
    sub_masks = create_sub_masks(mask_image)
    print(len(sub_masks.items()))
    for color, sub_mask in sub_masks.items():
        category_id = category_ids[image_id][color]
        annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
        annotations.append(annotation)
        annotation_id += 1
    image_id += 1

print(json.dumps(annotations))

1
1
[{"segmentation": [[1187.0, 12.5, 1190.5, 0.0, 1183.0, -0.5, 1179.5, 11.0, 1187.0, 12.5], [1210.0, 8.5, 1217.5, 1.0, 1217.0, -0.5, 1203.5, 0.0, 1202.5, 2.0, 1210.0, 8.5], [1379.0, 14.5, 1380.0, 5.5, 1358.0, 2.5, 1354.0, -0.5, 1333.5, 0.0, 1333.0, 9.5, 1353.0, 12.5, 1355.0, 10.5, 1379.0, 14.5], [1324.0, 35.5, 1324.0, 31.5, 1319.5, 32.0, 1320.0, 35.5, 1324.0, 35.5], [1491.0, 53.5, 1499.5, 48.0, 1499.5, 43.0, 1497.0, 39.5, 1485.5, 45.0, 1491.0, 53.5], [897.0, 178.5, 902.5, 144.0, 904.0, 142.5, 908.0, 143.5, 909.5, 142.0, 911.5, 125.0, 908.5, 123.0, 908.5, 118.0, 917.5, 65.0, 916.0, 63.5, 807.0, 45.5, 798.5, 95.0, 781.5, 109.0, 802.0, 132.5, 835.5, 138.0, 832.0, 159.5, 838.5, 160.0, 841.0, 145.5, 853.0, 147.5, 855.5, 149.0, 855.5, 153.0, 857.0, 154.5, 890.0, 159.5, 890.5, 166.0, 888.5, 177.0, 897.0, 178.5], [665.0, 170.5, 714.0, 137.5, 715.5, 136.0, 708.5, 126.0, 717.5, 120.0, 706.0, 103.5, 697.5, 108.0, 686.5, 91.0, 693.5, 85.0, 684.5, 72.0, 682.0, 70.5, 675.0, 75.5, 659.5, 53.0, 656.

In [70]:
annotation_id

3

In [71]:
image_id

3

In [62]:
# Define which colors match which categories in the images
building_inria_id = 1
category_ids = {
    1: {
        '255': building_inria_id,
    },
    2: {
        '255': building_inria_id,
    }
}

is_crowd = 0

In [64]:
# These ids will be automatically increased as we go
annotation_id = 1
image_id = 1

# Create the annotations
annotations = []
for mask_image in mask_images_inria:
    sub_masks = create_sub_masks_grayscale(mask_image)
    for color, sub_mask in sub_masks.items():
        category_id = category_ids[image_id][color]
        annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
        annotations.append(annotation)
        annotation_id += 1
    image_id += 1

print(json.dumps(annotations))

[{"segmentation": [[130.0, 12.5, 136.0, -0.5, 104.5, 0.0, 130.0, 12.5], [221.0, 14.5, 229.0, -0.5, 194.5, 0.0, 221.0, 14.5], [426.0, 49.5, 434.5, 36.0, 431.5, 34.0, 432.5, 32.0, 434.0, 30.5, 436.0, 31.5, 438.5, 28.0, 454.0, -0.5, 334.5, 0.0, 426.0, 49.5], [732.0, 29.5, 749.5, 1.0, 749.0, -0.5, 698.5, 0.0, 694.5, 7.0, 697.0, 9.5, 732.0, 29.5], [835.0, 26.5, 852.5, 0.0, 834.0, -0.5, 832.0, 1.5, 830.0, -0.5, 816.0, -0.5, 808.5, 10.0, 833.0, 26.5, 835.0, 26.5], [997.0, 4.5, 1000.0, -0.5, 990.5, 0.0, 997.0, 4.5], [1652.0, 24.5, 1659.5, 10.0, 1663.0, 10.5, 1667.0, 5.5, 1684.0, 15.5, 1692.5, 0.0, 1611.5, 0.0, 1619.5, 5.0, 1619.0, 7.5, 1652.0, 24.5], [2191.0, 28.5, 2199.5, 12.0, 2201.0, 10.5, 2205.0, 12.5, 2209.5, 5.0, 2199.0, -0.5, 2176.0, -0.5, 2167.5, 17.0, 2191.0, 28.5], [2240.0, 4.5, 2242.0, -0.5, 2230.5, 0.0, 2240.0, 4.5], [2490.0, 11.5, 2495.5, 3.0, 2494.0, 0.5, 2477.0, -0.5, 2475.5, 1.0, 2474.5, 3.0, 2476.0, 4.5, 2490.0, 11.5], [2625.0, 14.5, 2633.0, -0.5, 2600.5, 0.0, 2607.0, 5.5, 262