In [1]:
"""
从Mask文件中输出Box信息，生成COCO json文件
输出：COCO json
"""

import os
import json
from PIL import Image
import numpy as np
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import cv2
import tqdm

dataset = {'categories':[],'images':[],'annotations':[]}
dataset['categories'].append({'id': 0, 'name': 'nodule', 'supercategory': 'mark'})
# Label ids of TN-SCUI2020 Dataset
nodule_id = 1
outlier_id = 0
category_ids = {
    '(255, 255, 255)': nodule_id,
    '(1, 1, 1)': nodule_id,
    '(0, 0, 0)': outlier_id
}

In [2]:
def create_sub_masks(mask_image, width, height):
    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x, y))[:3]
            newpixel = ((pixel[0] > 128) * 255, (pixel[1] > 128) * 255, (pixel[2] > 128) * 255)

            # If the pixel is not black...
            if newpixel != (0, 0, 0):
                # Check to see if we've created a sub-mask...
                pixel_str = str(newpixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width, height))

                # Set the pixel value to 1 (default is 0), accounting for padding
#                 sub_masks[pixel_str].putpixel((x+1, y+1), 1)
                sub_masks[pixel_str].putpixel((x, y),1)

    return sub_masks

In [3]:
def create_sub_mask_annotation(sub_mask):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(sub_mask, 0.5, positive_orientation='low')

    polygons = []
    j = 0
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)

        if(poly.is_empty):
            # Go to next iteration, dont save empty values in list
            continue

        polygons.append(poly)

    return polygons

In [4]:
def absolute_file_paths(image_path, mask_path):
    mask_images = []

    for root, dirs, files in os.walk(os.path.abspath(mask_path)):
        for file in files:
            if 'ground' in file:
                dstFile = file.replace('_groundtruth_(1)_','').replace('.jpg_','_')
                os.rename(mask_path + '/'+ file, mask_path + '/'+ dstFile)
    
    for root, dirs, files in os.walk(os.path.abspath(image_path)):
        for file in files:
            if 'original' in file:
                dstFile = file.replace('_original_','_').replace('.jpg_','_')
                os.rename(image_path + '/'+ file, image_path + '/'+ dstFile)
                
    for root, dirs, files in os.walk(os.path.abspath(image_path)):
        for file in files:
            if '.jpg' in file:
                mask_images.append(os.path.join(mask_path, file))
    return mask_images

In [5]:
def create_image_annotation(file_name, width, height, image_id):
    images = {
        'file_name': file_name,
        'height': height,
        'width': width,
        'id': image_id
    }
    return images

In [None]:
# Get 'images' and 'annotations' info
def images_annotations_info(image_path, mask_path):
    # This id will be automatically increased as we go
    annotation_id = 1

    annotations = []
    images = []

    # Get absolute paths of all files in a directory
    mask_images = absolute_file_paths(image_path, mask_path)

    length = len(mask_images)
    
    for image_id, mask_image in enumerate(mask_images, 1):
        file_name = image_path + '/' + os.path.basename(mask_image).split('.')[0] + ".jpg"
#         file_name = '/root/workspace/Thyroid_Solid_Nodule/data/preprocess/chenzhou/mask' + os.path.basename(mask_image).split('.')[0] + ".jpg"
#         mask_image_open = cv2.imread(mask_image)
#         mask_image_open = (mask_image_open > 128) * 255
        print(str(image_id) + '/' + str(length))
        # image shape
        mask_image_open = Image.open(mask_image)
        w, h = mask_image_open.size

        # 'images' info
        image = create_image_annotation(os.path.basename(mask_image).split('.')[0] + ".jpg", w, h, image_id)
        images.append(image)

        sub_masks = create_sub_masks(mask_image_open.convert('RGBA'), w, h)
        for color, sub_mask in sub_masks.items():
            category_id = category_ids[color]

            # 'annotations' info
            sub_mask =  np.array(sub_mask)
            polygons = create_sub_mask_annotation(sub_mask)

            for i in range(len(polygons)):
                min_x, min_y, max_x, max_y = polygons[i].bounds
                width = max_x - min_x
                height = max_y - min_y
                bbox = (min_x, min_y, width, height)
                area = polygons[i].area

                segmentation = np.array(
                    polygons[i].exterior.coords).ravel().tolist()

                annotation = {
                    'segmentation': [segmentation],
                    'area': area,
                    'iscrowd': 0,
                    'image_id': image_id,
                    'bbox': bbox,
                    'category_id': category_id,
                    'id': annotation_id
                }

                annotations.append(annotation)
                annotation_id += 1

    return images, annotations

In [None]:
#     for keyword in ['train', 'val']:
TRAIN_PATH = '/root/workspace/TN-SCUI2020-Challenge/data/train'
ORIGIN_PATH = '/root/workspace/Thyroid_Solid_Nodule/data/preprocess/chenzhou_aug'
MASK_PATH = ORIGIN_PATH + '/mask'
# for keyword in ['val', 'test', 'train']:
for keyword in ['images']:
    IMAGE_PATH = ORIGIN_PATH + '/{}'.format(keyword)
    dataset['images'], dataset['annotations'] = images_annotations_info(IMAGE_PATH, MASK_PATH)
    with open(ORIGIN_PATH + '/annotations/image.json', 'w') as outfile:
        json.dump(dataset, outfile)

1/19218
2/19218
3/19218
4/19218
5/19218
6/19218
7/19218
8/19218
9/19218
10/19218
11/19218
12/19218
13/19218
14/19218
15/19218
16/19218
17/19218
18/19218
19/19218
20/19218
21/19218
22/19218
23/19218
24/19218
25/19218
26/19218
27/19218
28/19218
29/19218
30/19218
31/19218
32/19218
33/19218
34/19218
35/19218
36/19218
37/19218
38/19218
39/19218
40/19218
41/19218
42/19218
43/19218
44/19218
45/19218
46/19218
47/19218
48/19218
49/19218
50/19218
51/19218
52/19218
53/19218
54/19218
55/19218
56/19218
57/19218
58/19218
59/19218
60/19218
61/19218
62/19218
63/19218
64/19218
65/19218
66/19218
67/19218
68/19218
69/19218
70/19218
71/19218
72/19218
73/19218
74/19218
75/19218
76/19218
77/19218
78/19218
79/19218
80/19218
81/19218
82/19218
83/19218
84/19218
85/19218
86/19218
87/19218
88/19218
89/19218
90/19218
91/19218
92/19218
93/19218
94/19218
95/19218
96/19218
97/19218
98/19218
99/19218
100/19218
101/19218
102/19218
103/19218
104/19218
105/19218
106/19218
107/19218
108/19218
109/19218
110/19218
111/1921