# Creating COCO dataset from labelme annotations

In [2]:
import os
import json
import labelme2coco
import shutil
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.model_selection import train_test_split
from pycocotools.coco import COCO

In [3]:
# first set up directories 
dataset_path = os.path.join(os.getcwd(), 'dataset')
coco_path = os.path.join(os.getcwd(), 'COCO_dataset')

ann_path = os.path.join(coco_path, "annotations")
img_path = os.path.join(coco_path, "images")

In [18]:
if not os.path.exists(coco_path):   
    os.mkdir(coco_path)  
    
if not os.path.exists(ann_path):
    os.mkdir(ann_path)
    
if not os.path.exists(img_path):
    os.mkdir(img_path)

In [10]:
# this conversion to coco format will take ~5 mins on a modern cpu with 50 images as provided
labelme_folder = os.path.join(dataset_path, "Labelme_annotations")
save_json_path = os.path.join(coco_path, "annotations") + os.sep +  "COCO_complete.json"
labelme2coco.convert(labelme_folder, save_json_path)
print("completed Labelme to COCO format conversion")

completed Labelme to COCO format conversion


# split images/annotations based on scan quality

In [20]:
def save_coco_json(dest, images, annotations, licenses, categoies, info):
    with open(dest, 'wt', encoding='UTF-8') as coco_output:
        json.dump({'images': images, 'annotations': annotations, 'licenses': licenses, 'categories': categories, 'info': info}, coco_output, indent=2, sort_keys=True)  
    
def save_subset_images(dest_dir, images):
    
    for img in images:
        img_path = img['file_name']
        shutil.copy(img_path, dest_dir)
        
def extract_annotations_from_images(images, annotations):
    annotations_to_return = []
    for img in images:
        img_id = int(img['id'])
        
        for ann in annotations:
            if img_id == int(ann['image_id']):
                annotations_to_return.append(ann)
            
    return annotations_to_return
        
#takes in annotations and splits images and coco json into ideal/nonideal folders
with open(save_json_path, 'rt', encoding='UTF-8') as ann:
    coco=json.load(ann)
    images= coco['images']
    annotations= coco['annotations']
    categories= coco['categories']
    
    #extract the ideal filenames so we can segregate images/annotations in separate folders/jsons
    ideal_filenames = []
    with open('ideal_imaging_filenames.txt', 'r') as ideal_files:
        for filename in ideal_files:
            
            ideal_filenames.append(filename.strip())
            
    ideals = []
    nonideals = []
    
    for img in images:
        if img['file_name'].split(os.sep)[-1] in ideal_filenames:
            ideals.append(img)
        else:
            nonideals.append(img)
        
    for name, split in [["ideal", ideals], ["nonideal", nonideals], ["combined", images]]:
        
        save_img_path = img_path + os.sep + name
        
        if os.path.exists(save_img_path):
            shutil.rmtree(save_img_path)
            os.mkdir(save_img_path)
        else:
            os.mkdir(save_img_path)
            
        save_coco_json(ann_path + os.sep+ name + ".json", split, extract_annotations_from_images(split, annotations), [], categories, [])
        save_subset_images(save_img_path, split) 
        