### from https://github.com/waspinator/pycococreator/blob/d29534e36aad6c30d7e4dadd9f4f7b0e344a774c/pycococreatortools/pycococreatortools.py
### and https://patrickwasp.com/create-your-own-coco-style-dataset/

In [1]:
import os
import re
import datetime
import numpy as np
from itertools import groupby
from skimage import measure
from PIL import Image
from pycocotools import mask

convert = lambda text: int(text) if text.isdigit() else text.lower()
natrual_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]

def resize_binary_mask(array, new_size):
    image = Image.fromarray(array.astype(np.uint8)*255)
    image = image.resize(new_size)
    return np.asarray(image).astype(np.bool_)

def close_contour(contour):
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour

def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
                counts.append(0)
        counts.append(len(list(elements)))

    return rle

def binary_mask_to_polygon(binary_mask, tolerance=0):
    """Converts a binary mask to COCO polygon representation
    Args:
        binary_mask: a 2D binary numpy array where '1's represent the object
        tolerance: Maximum distance from original points of polygon to approximated
            polygonal chain. If tolerance is 0, the original coordinate array is returned.
    """
    polygons = []
    # pad mask to close contours of shapes which start and end at an edge
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)
    contours = np.subtract(contours, 1)
    for contour in contours:
        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        # after padding and subtracting 1 we may get -0.5 points in our segmentation 
        segmentation = [0 if i < 0 else i for i in segmentation]
        polygons.append(segmentation)

    return polygons

def create_image_info(image_id, file_name, image_size, 
                      date_captured=datetime.datetime.utcnow().isoformat(' '),
                      license_id=1, coco_url="", flickr_url=""):

    image_info = {
            "id": image_id,
            "file_name": file_name,
            "width": image_size[0],
            "height": image_size[1],
            "date_captured": date_captured,
            "license": license_id,
            "coco_url": coco_url,
            "flickr_url": flickr_url
    }

    return image_info

def create_annotation_info(annotation_id, image_id, category_info, binary_mask, 
                           image_size=None, tolerance=2, bounding_box=None):

    if image_size is not None:
        binary_mask = resize_binary_mask(binary_mask, image_size)

    binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))

    area = mask.area(binary_mask_encoded)
    if area < 1:
        return None

    if bounding_box is None:
        bounding_box = mask.toBbox(binary_mask_encoded)

    if category_info["is_crowd"]:
        is_crowd = 1
        segmentation = binary_mask_to_rle(binary_mask)
    else :
        is_crowd = 0
        segmentation = binary_mask_to_polygon(binary_mask, tolerance)
        if not segmentation:
            return None

    annotation_info = {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_info["id"],
        "iscrowd": is_crowd,
        "area": area.tolist(),
        "bbox": bounding_box.tolist(),
        "segmentation": segmentation,
        "width": binary_mask.shape[1],
        "height": binary_mask.shape[0],
    } 

    return annotation_info

In [2]:
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
ROOT_DIR = '/mnt/cropmaskperm/test-landsat/chips/'
IMAGE_DIR = "image"
ANNOTATION_DIR = "mask"

INFO = {
    "description": "nebraska center pivots",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2005,
    "contributor": "rbavery",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

CATEGORIES = [
    {
        'id': 1,
        'name': 'agriculture',
        'supercategory': 'shape',
    },
]



In [3]:
from pathlib import Path
img_files = []
label_files = []
for filename in Path(ROOT_DIR).glob('**/*.tif'):
    if 'label' not in filename.as_posix():
        img_files.append(filename)
    else:
        label_files.append(filename)
        

coco_output = {
    "info": INFO,
    "licenses": LICENSES,
    "categories": CATEGORIES,
    "images": [],
    "annotations": []
}

import skimage.io as skio
import numpy as np
# go through each image
image_id = 1
for image_filename, label_filename in zip(img_files, label_files):
    
    segmentation_id = 1

    image = skio.imread(image_filename.as_posix()) # concats along the instance dim
    jpeg_img_filename = os.path.splitext(image_filename.as_posix())[0]+".jpeg"
    binary_mask = skio.imread(label_filename.as_posix())
    if len(binary_mask.shape) > 2:

        for maskid in np.arange(binary_mask.shape[-1]):

            image_info = create_image_info(
                image_id,os.path.basename(jpeg_img_filename), (512,512))
            coco_output["images"].append(image_info)

            category_info = {'id': 1, 'is_crowd': False}

            annotation_info = create_annotation_info(
                segmentation_id, image_id, category_info, binary_mask[:,:,maskid],
                (512,512), tolerance=2)

            if annotation_info is not None:
                coco_output["annotations"].append(annotation_info)

            segmentation_id = segmentation_id + 1

        image_id = image_id + 1
    
    else:

        image_info = create_image_info(
            image_id, os.path.basename(jpeg_img_filename), (512,512))
        coco_output["images"].append(image_info)

        category_info = {'id': 1, 'is_crowd': False}

        annotation_info = create_annotation_info(
            segmentation_id, image_id, category_info, binary_mask,
            (512,512), tolerance=2)

        if annotation_info is not None:
            coco_output["annotations"].append(annotation_info)

        image_id = image_id + 1
        

with open('{}/instances_shape_nebraska.json'.format(ROOT_DIR), 'w') as output_json_file:
    json.dump(coco_output, output_json_file)

In [4]:
'{}/instances_shape_nebraska.json'.format(ROOT_DIR)

'/mnt/cropmaskperm/test-landsat/chips//instances_shape_nebraska.json'