In [1]:
import glob
from tqdm import tqdm

from src.create_annotations import *

# Label ids of the dataset
category_ids = {
    "General trash": 1,
    "Paper": 2,
    "Paper pack": 3,
    "Metal": 4,
    "Glass": 5,
    "Plastic": 6,
    "Styrofoam": 7,
    "Plastic bag": 8,
    "Battery": 9,
    "Clothing": 10
}

# Define which colors match which categories in the images
'''
category_colors = {
    "(0, 0, 0)": 0, # Outlier
    "(255, 0, 0)": 1, # Window
    "(255, 255, 0)": 2, # Wall
    "(128, 0, 255)": 3, # Balcony
    "(255, 128, 0)": 4, # Door
    "(0, 0, 255)": 5, # Roof
    "(128, 255, 255)": 6, # Sky
    "(0, 255, 0)": 7, # Shop
    "(128, 128, 128)": 8 # Chimney
}'''
category_colors = {
    "1": 1,
    "2": 2,
    "3": 3,
    "4": 4,
    "5": 5,
    "6": 6,
    "7": 7,
    "8": 8,
    "9": 9,
    "10": 10
}

# Define the ids that are a multiplolygon. In our case: wall, roof and sky
multipolygon_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Get "images" and "annotations" info 
def images_annotations_info(maskpath):
    # This id will be automatically increased as we go
    annotation_id = 0
    image_id = 0
    annotations = []
    images = []
    

    for mask_image in glob.glob(maskpath + "/*.png"):
        # The mask image is *.png but the original image is *.jpg.
        # We make a reference to the original file in the COCO JSON file
        original_file_name = os.path.basename(mask_image).split(".")[0] + ".jpg"
        
        batch = maskpath.split('/')

        for b in batch:
            if 'batch' in b:
                batch = b

        original_file_name = os.path.join(batch, original_file_name)

        # Open the image and (to be sure) we convert it to RGB
        mask_image_open = Image.open(mask_image).convert("RGB")
        w, h = mask_image_open.size
        
        # "images" info 
        image = create_image_annotation(original_file_name, w, h, image_id)
        images.append(image)

        sub_masks = create_sub_masks(mask_image_open, w, h)
        for color, sub_mask in sub_masks.items():
            
            if color == '0':
                continue

            category_id = category_colors[color]

            # "annotations" info
            polygons, segmentations = create_sub_mask_annotation(sub_mask)

            # Check if we have classes that are a multipolygon
            if category_id in multipolygon_ids:
                # Combine the polygons to calculate the bounding box and area
                multi_poly = MultiPolygon(polygons)
                                
                annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)

                annotations.append(annotation)
                annotation_id += 1
            else:
                for i in range(len(polygons)):
                    # Cleaner to recalculate this variable
                    segmentation = [np.array(polygons[i].exterior.coords).ravel().tolist()]
                    
                    annotation = create_annotation_format(polygons[i], segmentation, image_id, category_id, annotation_id)
                    
                    annotations.append(annotation)
                    annotation_id += 1
        image_id += 1
    return images, annotations, annotation_id

if __name__ == "__main__":
    # Get the standard COCO JSON format
    coco_format = get_coco_json_format()
        
    # Create category section
    coco_format["categories"] = create_category_annotation(category_ids)
    
    # Create images and annotations sections
    batch_path = ['batch_01_vt', 'batch_02_vt']

    for b in batch_path:
        mask_path = '../../../../data/pseudo_mask/'
        mask_path = os.path.join(mask_path, b)
        coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path)

        with open(f"./{b}_pseudo.json","w") as outfile:
            json.dump(coco_format, outfile, indent=1)
        
        print("Created %d annotations for images in folder: %s" % (annotation_cnt, mask_path))


Created 430 annotations for images in folder: ../../../../data/pseudo_mask/batch_01_vt
Created 962 annotations for images in folder: ../../../../data/pseudo_mask/batch_02_vt


In [2]:
import copy

# Get the standard COCO JSON format
coco_format = get_coco_json_format()
        
# Create category section
coco_format["categories"] = create_category_annotation(category_ids)

with open('./batch_01_vt_pseudo.json', 'r', encoding='UTF-8') as pseudo1:
    pseudo1 = json.load(pseudo1)
    
    coco_format['images'] = pseudo1['images']
    coco_format['annotations'] = pseudo1['annotations']

with open('./batch_02_vt_pseudo.json', 'r', encoding='UTF-8') as pseudo2:
    pseudo2 = json.load(pseudo2)

    images = pseudo2['images']
    annotations = pseudo2['annotations']


annotation_id = coco_format['annotations'][-1]['id'] + 1
image_id = coco_format['images'][-1]['id'] + 1

for img in images:
    for ann in annotations:
        if ann['image_id'] == img['id']:
            coco_format['annotations'].append(copy.deepcopy(ann))
            coco_format['annotations'][-1]['id'] = annotation_id
            coco_format['annotations'][-1]['image_id'] = image_id
            annotation_id += 1

    coco_format['images'].append(copy.deepcopy(img))
    coco_format['images'][-1]['id'] = image_id
    image_id += 1

In [3]:
with open(f"../../../../data/pseudo.json","w") as outfile:
    json.dump(coco_format, outfile, indent=1)