In [1]:
import json
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import datetime
import os
from pycocotools import coco
import xml.etree.ElementTree as ET
import xml
from pycocotools import mask as comask
import cv2
from itertools import groupby

In [7]:
### CATEGORY INFORMATION
dict_of_categories = {'face': 91}

In [11]:
json_dir = "Datasets/coco/annotations_old/instances_train2014.json"

# For viewing the content of the new json file.
# json_dir = "Datasets/coco/annotations/instances_train2014.json"

minival2014 = json.load(open(json_dir))

{'coco_url': 'http://images.cocodataset.org/train2014/COCO_train2014_000000475546.jpg',
 'date_captured': '2013-11-25 21:20:23',
 'file_name': 'COCO_train2014_000000475546.jpg',
 'flickr_url': 'http://farm1.staticflickr.com/167/423175046_6cd9d0205a_z.jpg',
 'height': 375,
 'id': 475546,
 'license': 4,
 'width': 500}

In [19]:
# minival2014['images'] = []
# minival2014['annotations'] = []

In [13]:
# Adds the classes, view the category list at first!!
minival2014['categories'].append({'id': 91, 'name': 'face', 'supercategory': 'person'})
# minival2014['categories'].append({'id': 92, 'name': 'fish', 'supercategory': 'animal'})
# minival2014['categories']


In [14]:
# Some functions to be used:
def isjpg(string):
    if string[-4:]==".jpg":
        return True 
    
def ispng(string):
    if string[-4:]==".png":
        return True 
    
def DateCaptured():
    dt = str(datetime.datetime.now())
    date, time = dt.split()
    time = time.split(".")[0]
    return date+" "+time

def coco_bbox_creator(x, y):
    x = list(map(lambda x: float(x), x))
    y = list(map(lambda x: float(x), y))
    x_min = min(x)
    y_min = min(y)
    w = max(x) - x_min
    h = max(y) - y_min
    return [x_min, y_min, w, h]

def PolyArea(x,y):
    """
    Parameters:   x, y: The inputs are representing the nd-array of coordinates
    of a polygon.
    
    Returns: The area of the polygon
    """
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

def xml_retreiver(tree_object, key_name):
    for iterator in tree_object.iter(key_name):
        return iterator.text
    
def iou_calculator(bbox1, bbox2):
    """
    Parameters:   bbox1, bbox2: list or numpy array of bounding box coordinates.
    The input should contain the top-left corner's x and y coordinates and 
    width and height of the bounding boxes.
    
    Assertations: width and height informations of bbox1 and bbox2 should be 
    larger than 0.
    
    Returns:      iou: A floating point decimal representing the IoU ratio, which
    is the division of bounding box areas of intersection to their union.
    """
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2
    assert w1 and w2 > 0
    assert w1 and h2 > 0
    
    iou = 0
    if (((x1>x2 and x1<x2+w2) or (x1+w1>x2 and x1+w1<x2+w2) or 
        (x2>x1 and x2<x1+w1) or (x2+w2>x1 and x2+w2<x1+w1)) and 
        ((y1>y2 and y1<y2+h2) or (y1+h1>y2 and y1+h1<y2+h2) or
        (y2>y1 and y2<y1+h1) or (y2+h2>y1 and y2+h2<y1+h1))):
        iou_xmin = float(max(x1, x2))
        iou_xmax = float(min(x1+w1, x2+w2))
        iou_ymin = float(max(y1, y2))
        iou_ymax = float(min(y1+h1, y2+h2))
        intersection_area = (iou_ymax - iou_ymin)*(iou_xmax - iou_xmin)
        total_area = float(w1)*float(h1) + float(w2)*float(h2) - intersection_area
        iou = intersection_area/total_area
    return iou

def txt_bbox_parser(input_location):
    """
    Parameters: input_location: The input will be a text file denoting the bounding
    boxes for every frame in such format:
    frame_id, x1, y1, x2, y2, x3, y3, x4, y4
    
    Returns: image_nr: A list containing the the frame numbers. 
             xywh: A list containing the bounding boxes in (x1, y1, w, h) format,
                   where w and h are the width and height of a bounding box respectively.
    """
    with open(input_location) as f:
        bboxes = f.readlines()
    image_nr = []
    xywh = []
    for d, bbox in enumerate(bboxes):
        image_nr.append(bbox.split(",")[0])
        coords = np.array(bbox.split(",")[1:]).reshape((-1, 2))
        x, y = coords[:,0], coords[:,1]
        xywh.append(coco_bbox_creator(x, y))
    return image_nr, xywh

def mask2poly(mask):
    _, mask = cv2.threshold(mask,1,1,cv2.THRESH_BINARY)  #threshold binary image
    kernel = np.ones((5,5),np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    _,countours,_ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    polygons = []
    for countour in countours:
        if countour.size >=6:
            polygons.append(countour.flatten().tolist())
            
    return polygons, mask

def binary_mask_to_rle(binary_mask):
    rle = {'counts': [], 'size': list(binary_mask.shape)}
    counts = rle.get('counts')
    for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
        if i == 0 and value == 1:
                counts.append(0)
        counts.append(len(list(elements)))
    return rle

In [20]:
# LabelMe type
dataset_dir = "/home/mspr/Desktop/Mask_RCNN/Datasets/"  # Will be changed with argparse, includes directories for video names and corresponding frames inside
dataset_name = "MSPR_Dataset/"
annot_dir = "Annotations/"
mask_dir = "Masks/"
image_dir = "Images/"

target_dataset_name = os.path.join(dataset_dir, "coco/train2014/")

##########################################################################
##########################################################################
##########################
# TO DO, MATCHING AND CALCULATING IOU'S BETWEEN OZAN'S AND FILIZ'S ANNOTATIONS
##########################
avg_iou = []

##########################
# ANNOTATIONS OF FILIZ   #
##########################
filiz_annotations = os.path.join("/home/mspr/Desktop/Mask_RCNN/Datasets/MSPR_Dataset/face/filiz_Annotations",
                                 "pascal_voc_face annotation.txt")

image_nrs, bbox_filiz = txt_bbox_parser(filiz_annotations)
##########################################################################
##########################################################################

# Added image and annotation ids will start from 600k. 
image_id = 600000
annotation_id = 600000

for class_name in dict_of_categories.keys():
    annot_all_files = sorted(os.listdir(os.path.join(dataset_dir, dataset_name, class_name, annot_dir)))
    for xml_file in annot_all_files:
        annot_dir = os.path.join(dataset_dir, dataset_name, class_name, "Annotations/")
        mask_dir = os.path.join(dataset_dir, dataset_name, class_name, "Masks/")
        image_dir = os.path.join(dataset_dir, dataset_name, class_name, "Images/")

        tree = ET.parse(os.path.join(annot_dir, xml_file))
        root = tree.getroot()

        delete_flags = root.findall("./object/deleted")
        sum_deleted = sum(map(int, [delete_flag.text for delete_flag in delete_flags]))

        xmins = root.findall("./object/segm/box/xmin")
        ymins = root.findall("./object/segm/box/ymin")
        xmaxs = root.findall("./object/segm/box/xmax")
        ymaxs = root.findall("./object/segm/box/ymax")

        mask_filenames = root.findall("./object/segm/mask")
        # IMAGES
        # Image will be copied to MS COCO train directory and file name will be hold
        image_filename = xml_retreiver(root, 'filename')
        image = io.imread(os.path.join(image_dir, image_filename))
        if image_id<1000000:
            image_target_filename = "COCO_train2014_000000"+str(image_id)+".jpg"
        elif (image_id>=1000000 or image_id<=10000000):
            image_target_filename = "COCO_train2014_00000"+str(image_id)+".jpg"

        if os.path.isfile(os.path.join(target_dataset_name, image_target_filename))==False:
            io.imsave(os.path.join(target_dataset_name, image_target_filename), image)


        # Image height and width
        height = int(xml_retreiver(root, 'nrows'))
        width = int(xml_retreiver(root, 'ncols'))

        assert height == image.shape[0]
        assert width == image.shape[1]


        # Miscellaneous metadata
        date_captured = DateCaptured()
        coco_url = 'n/a'
        flickr_url = 'n/a'
        license = np.random.randint(8)

        # Appending to 'images'
        minival2014['images'].append({'coco_url': coco_url, 'file_name': image_target_filename, 
                                     'date_captured': date_captured, 'flickr_url': flickr_url,
                                     'height': height, 'id': image_id, 'license': license, 
                                     'width': width})
        # Polygon Handling

        for d, delete_flag in enumerate(delete_flags):
            delete_flag = int(delete_flag.text)
            if not delete_flag:


                # ANNOTATIONS
                # Bbox
                bbox_x = int(xmins[d].text)
                bbox_y = int(ymins[d].text)
                bbox_w = int(xmaxs[d].text) - bbox_x
                bbox_h = int(ymaxs[d].text) - bbox_y
                bbox_list = [bbox_x, bbox_y, bbox_w, bbox_h]

##########################################################################
##########################################################################
#                 queries_findid = [d for d, image_nr in enumerate(image_nrs) if image_nr==image_filename[:-4]]
#                 for query in queries_findid:
#                     print(bbox_filiz[query])
#                     print(bbox_list)
#                 iou_for_each_xml_object = [iou_calculator(bbox_filiz[query], bbox_list) for query in queries_findid]
#                 avg_iou.append(iou_calculator(bbox_filiz[query], bbox_list) for query in queries_findid)    
#                 print(iou_for_each_xml_object)
#                 print("")


#                 print(bbox_x, bbox_y, bbox_w, bbox_h)

#                 with open("bbox_info.txt", "a+") as f:
#                     f.write(image_filename[:-4]+","+str(bbox_x)+","+str(bbox_y)+","+
#                             str(bbox_x+bbox_w)+","+str(bbox_y)+","+
#                             str(bbox_x)+","+str(bbox_y+bbox_h)+","+
#                            str(bbox_x+bbox_w)+","+str(bbox_y+bbox_h)+"\n")

##########################################################################
##########################################################################

                # Class
                category_id = dict_of_categories[class_name]

                # Masks Segmentation
                instance_seg_info = []
                mask = cv2.imread(os.path.join(mask_dir, mask_filenames[d].text), 0)
                polygons, binary_image = mask2poly(mask) 
                RLE_mask = binary_mask_to_rle(mask)
                RLE_mask_coco = comask.encode(np.asfortranarray(binary_image))

                # Area
                area = comask.area(RLE_mask_coco)

                #  Iscrowd
                if (len(mask_filenames) - sum_deleted) > 1:
                    iscrowd = 1
                    minival2014['annotations'].append({'iscrowd': iscrowd, 'bbox': bbox_list, 'id': annotation_id,
                                                       'image_id': image_id, 'segmentation': RLE_mask,
                                                       'area': area, 'category_id': dict_of_categories[class_name]})
                else:
                    iscrowd = 0
                    minival2014['annotations'].append({'iscrowd': iscrowd, 'bbox': bbox_list, 'id': annotation_id,
                                                  'image_id': image_id, 'segmentation': polygons,
                                                  'area': area, 'category_id': dict_of_categories[class_name]})

                # Appending into annotations
                
                annotation_id+=1
        image_id+=1

                
#                 print(bbox_x, bbox_y, bbox_w, bbox_h)
# print("Average IoU = {}".format(sum(avg_iou)/len(avg_iou)))

2
1
2
1
1


In [21]:
# Storing the data in memory and then writes them into a file. 

# from decimal import Decimal
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.2f')

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)

with open("Datasets/coco/annotations/instances_train2014.json","w") as f:
# with open("Datasets/coco/annotations/instances_valminusminival2014.json","w") as f:
    data = json.dumps(minival2014, cls=MyEncoder, indent=4)
    f.write(data)
