# Semantic Segmentation Job output conversion to COCO format
This notebook walks through converting the output from SageMaker Ground Truth Semantic segmentation task into Common Objects in Context (COCO) format. The output manifest of the semantic segmentation task contains a reference to a PNG file masks for the objects that has been annotated and saved in an Amazon S3 bucket. In this notebook, I will download a sample manifest file, convert the mask into a pixelated format as in the COCO format.

## Get the output.manifest file

In [None]:
import boto3


JOBNAME = '' #Replace it with the labeling job name
REGION = ''
client = boto3.client('sagemaker',region_name=REGION)

response = client.describe_labeling_job(LabelingJobName= JOBNAME)
file = response['LabelingJobOutput']['OutputDatasetS3Uri']
output_manifest = ntpath.basename(file)
!aws s3 cp $file ./

## Read the manifest file line by line and create "Images" key

In [None]:
#Replace the file name with your downloaded manifest file
file_name = "./" + output_manifest


import json
import ntpath
data_objs  = []
input_files = []
images = []

#Assuming all images are reshaped to have the same dimensions
height = 3956
width = 5280

with open(file_name) as out_manifest:
    for line in out_manifest:
        data_objs.append(json.loads(line))

for line in data_objs:
        input_files.append(ntpath.basename(line['source-ref']))
      
images_key = { 
              "coco_url": "",
              "date_captured": "",
              "flickr_url": "",
              "license": 0,
              "id": 0,
              "file_name": "",
              "height": height,
              "width": width
             }    
    
for img_id, input_file in enumerate(input_files):
    images_key["file_name"] = input_file
    images_key["id"] = img_id
    images.append(images_key.copy())
images

## Identify and Get Categories

In [None]:
categories = []
classnamesids= []
ids = []
names = []
mapping = {}
cat_hex_map = []

category = {
      "id": '',
      "name": '',
      "supercategory": ""
            }

for key in data_objs[0][JOBNAME + '-ref-metadata']['internal-color-map']:
    classname = data_objs[0][JOBNAME + '-ref-metadata']['internal-color-map'][key]['class-name']
    hexcolor = data_objs[0][JOBNAME + '-ref-metadata']['internal-color-map'][key]['hex-color']
    
    
    if classname == 'BACKGROUND':
        continue
    else:
        classnamesids.append((key, classname))
        mapping.update({hexcolor: key})

for idd, classname in classnamesids:   
    category['id'] = idd
    category['name'] = classname
    categories.append(category.copy())   

## Other static variables:

In [None]:
licenses =  [
    {
      "name": "",
      "id": 0,
      "url": ""
    }
  ]
info =  {
    "contributor": "",
    "date_created": "2020-01-23",
    "description": "test",
    "url": "",
    "version": 3,
    "year": "2020"
  }

## Function to generate segmentation

In [None]:
import json
import numpy as np
from pycocotools import mask
from skimage import measure

from skimage import io

def generate_segmentation(img, category_id, idd, image_id):
    seg_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    seg_img = np.uint8(seg_img)

    f_bmask = np.asfortranarray(seg_img)
    encoded_GT = mask.encode(f_bmask)
    area_GT = mask.area(encoded_GT)
    bb_GT = mask.toBbox(encoded_GT)
    contours = measure.find_contours(seg_img, 0.5)
    annotation = {
            "category_id": category_id,
            "id": idd,
            "image_id": image_id,
            "iscrowd": 0,
            "segmentation": [],
            "area": area_GT.tolist(),
            "bbox": bb_GT.tolist(),
        }

    for contour in contours:
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        annotation["segmentation"].append(segmentation)
    return annotation

## Download the mask output from S3

In [None]:
%matplotlib inline
import time
annotations = []


path = "/".join(data_objs[0][JOBNAME+'-ref'].split("/")[:-1]) + "/"
!aws s3 cp --recursive $path ./output_mask/

## Generate segmentation data

In [None]:
import os


import matplotlib
from skimage import io
from skimage.viewer import ImageViewer
import matplotlib.pyplot as plt
from skimage.color import rgb2gray


idd = 0
all_annotations = []
annotations = []
for image_id, file in enumerate(data_objs):
    
    img_file = 'output_mask/'+ntpath.basename(file[JOBNAME + '-ref']) #subdir + os.sep + file
    print(img_file)
    a = io.imread(img_file,plugin='matplotlib')
    colors = np.unique(a.reshape(-1, a.shape[2]), axis=0)
    segments = []
    for i in range(colors.shape[0]):
        if colors[i][:-1].mean() < 1:
            segments.append(colors[i])
    
    for idds, seg in enumerate(range(len(segments))):

        color_hex = matplotlib.colors.to_hex(segments[seg])
        category_id = mapping[color_hex]
        img = io.imread(img_file, plugin='matplotlib')
        msk = segments[seg]  
        masked_img = cv2.inRange(img, msk, msk)
        
        if len(segments) > 1:
            img[masked_img>0]=(255,255,255)
        annot = generate_segmentation(img,category_id, idd, image_id)
        idd+=1
        annotations.append(annot)
print(annotations)


## Construct the input COCO file

In [None]:
COCO_json = {
    "licenses": licenses,
    "info": info,
    "categories": categories,
    "images": images,
    "annotations": annotations
    
}
with open('COCO_file.json', 'w') as json_file:
        json.dump(COCO_json, json_file)

## Test and Visualize Each Unique Category Segmentation

This section is based on this notebook https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoDemo.ipynb

In [None]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

annFile='COCO_file.json'
coco=COCO(annFile)
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))


catIds = coco.getCatIds(catNms=['bike','motorbike']);
imgIds = coco.getImgIds(catIds=catIds );
imgIds = coco.getImgIds(imgIds = [1]) #<<<<<<<<<<<< Change the number for different test images
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]
print(img)

I = io.imread(img['file_name'])
h, w, c = I.shape
print(h)
print(w)
plt.axis('off')
plt.imshow(I)
plt.show()

## Print out the annotations

In [None]:
plt.imshow(I); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)

## Print and show the segmentation using COCO format

In [None]:
import pycocotools.coco as coco
from pycocotools.coco import COCO
import cv2

for annotation in annotations:
    with open('COCO_file.json', 'w') as json_file:
        json.dump(annotation, json_file)
    coco=COCO('COCO_file.json')
    I = io.imread('<sample_image>')
    plt.imshow(I)
    coco.showAnns([annotation])

In [None]:
from skimage import io
from skimage.viewer import ImageViewer
import matplotlib.pyplot as plt
from skimage.color import rgb2gray


a = io.imread(base64.decodebytes(img_data), plugin='imageio')
grayscale = rgb2gray(a)
grayscale[grayscale > 0] = 1
plt.imshow(a)
plt.show()