### from https://github.com/waspinator/pycococreator/blob/d29534e36aad6c30d7e4dadd9f4f7b0e344a774c/pycococreatortools/pycococreatortools.py
### and https://patrickwasp.com/create-your-own-coco-style-dataset/

In [7]:
import cropmask.misc as misc
CHIP_PATH = '/mnt/cropmaskperm/test-landsat/chips'
train_val_ids, test_ids = misc.train_test_split(img_files, 43, .1)

In [5]:
from pathlib import Path
import skimage.io as skio
import numpy as np
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
from pycococreatortools import pycococreatortools
import cropmask.misc as misc
from shutil import copyfile

def create_coco_meta():
    INFO = {
        "description": "nebraska center pivots",
        "url": "https://github.com/waspinator/pycococreator",
        "version": "0.1.0",
        "year": 2005,
        "contributor": "rbavery",
        "date_created": datetime.datetime.utcnow().isoformat(' ')
    }

    LICENSES = [
        {
            "id": 1,
            "name": "Attribution-NonCommercial-ShareAlike License",
            "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
        }
    ]

    CATEGORIES = [
        {
            'id': 1,
            'name': 'agriculture',
            'supercategory': 'shape',
        },
    ]

    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
    return coco_output

def get_paths_from_preprocessed_chips(chips_dir, glob_pattern='**/*.tif'):
    """
    Recursively finds the mask and image tifs in folders created
    from PreprocessWorflow class. Source folders are of shape
    ../chips/landsat_id/image/tile.tif and are not compatible with COCO
    format. COCO format needed for tensorpack MaskRCNN.
    
    Arg:
        root_dir: path to chips folder
        glob_pattern: pattern to search for tif files, shouldn't need to change
            if chips folder is used.
    """
    img_files = []
    label_files = []
    for filename in Path(chips_dir).glob(glob_pattern):
        if 'label' not in filename.as_posix():
            img_files.append(filename)
        else:
            label_files.append(filename)
    return img_files, label_files



def create_coco_json(img_files, label_files, coco_output, fileext=".tif"):
    """
    Creates coco json from chips folder after Preprocess workflow is done.
    fileext argument is for creating json from tif (for training) or png (for)
    coco annotator.
    """
    image_id = 1
    for image_filename, label_filename in zip(img_files, label_files):

        segmentation_id = 1

        img_filename = os.path.splitext(image_filename.as_posix())[0]+fileext
        binary_mask = skio.imread(label_filename.as_posix())
        if len(binary_mask.shape) > 2:
            for maskid in np.arange(binary_mask.shape[-1]):

                image_info = pycococreatortools.create_image_info(
                    image_id,os.path.basename(img_filename), (512,512))
                coco_output["images"].append(image_info)

                category_info = {'id': 1, 'is_crowd': False}

                annotation_info = pycococreatortools.create_annotation_info(
                    segmentation_id, image_id, category_info, binary_mask[:,:,maskid],
                    (512,512), tolerance=2)

                if annotation_info is not None:
                    coco_output["annotations"].append(annotation_info)

                segmentation_id = segmentation_id + 1

            image_id = image_id + 1

        else:
            image_info = pycococreatortools.create_image_info(
                image_id, os.path.basename(img_filename), (512,512))
            coco_output["images"].append(image_info)

            category_info = {'id': 1, 'is_crowd': False}

            annotation_info = pycococreatortools.create_annotation_info(
                segmentation_id, image_id, category_info, binary_mask,
                (512,512), tolerance=2)

            if annotation_info is not None:
                coco_output["annotations"].append(annotation_info)

            image_id = image_id + 1

    with open('{}/instances_shape_nebraska.json'.format(ROOT_DIR), 'w') as output_json_file:
        json.dump(coco_output, output_json_file)
        
def copy_chips(matterport_chip_paths, coco_chip_paths):
    """
    Used for copying train or test chips to new folders for COCO.
    """
    for m_path, c_path in zip(matterport_chip_paths, coco_chip_paths):
        copyfile(m_path, c_path)
        
CHIP_PATH = '/mnt/cropmaskperm/test-landsat/chips'
ROOT_DIR = '/mnt/cropmaskperm/test-landsat/'
coco_meta = create_coco_meta()
img_files, label_files = get_paths_from_preprocessed_chips(CHIP_PATH)
misc.make_dirs([ROOT_DIR+"train", ROOT_DIR+"test"])
train_val_paths, test_paths = misc.train_test_split(img_files, 43, .1)

In [12]:
create_coco_json(img_files, label_files, )

3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
2
3
3
3
3
3
3
3
2
3
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
2
3
3
3
3
3
3
3
2
3
3
2
3
3
3
3
3
3
3
3
2
3
3
3
3
3
2
3
3
3
3
3
2
3
3
3
3
3
3
2
2
2
2
2
3
3
3
3
3
3
2
2
2
3
3
3
3
3
3
3
3
3
3
2
2
2
3
3
3
3
3
3
3


In [10]:
import inspect
inspect.getsource(pycococreatortools.)

In [4]:
'{}/instances_shape_nebraska.json'.format(ROOT_DIR)

'/mnt/cropmaskperm/test-landsat/chips//instances_shape_nebraska.json'