In [1]:
import json
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import datetime
import os
from pycocotools import coco
import xml.etree.ElementTree as ET
import xml

In [2]:
### CATEGORY INFORMATION
### !!! NEED HELP FOR MATCHING THE CLASSES IN VOT2016 TO THEIR CORRESPONDING ONES IN MS-COCO !!! ### 
### Legend ###
### * --> Will be defined into Mask R-CNN as a new category
### ! --> Might be some issues with the assignment, they have to be rechecked
dict_of_categories = {'face': 91}

In [3]:
json_dir = "Datasets/coco/annotations_old/instances_train2014.json"

# For viewing the content of the new json file.
# json_dir = "Datasets/coco/annotations/instances_train2014.json"

# For debugging, then please restore it.
# json_dir = "Datasets/coco/annotations_old/instances_valminusminival2014.json"


minival2014 = json.load(open(json_dir))

minival2014.keys()


dict_keys(['images', 'licenses', 'info', 'annotations', 'categories'])

In [17]:
# minival2014['images'] = []
# minival2014['annotations'] = []
# minival2014['images'].append({'date_captured': '2013-11-14 11:18:45', 'url': 'http://farm9.staticflickr.com/8186/8119368305_4e622c8349_z.jpg', 'id': 391895, 'height': 360, 'width': 640, 'file_name': 'COCO_val2014_000000391895.jpg', 'license': 3})
# minival2014['annotations'].append({'category_id': 18, 'image_id': 42, 'id': 1817255, 'bbox': [214.15, 41.29, 348.26, 243.78], 'area': 53481.5118, 'segmentation': [[382.48, 268.63, 330.24, 229.93, 278.97, 205.75, 228.66, 143.83, 214.15, 140.93, 225.76, 134.16, 257.69, 123.52, 277.03, 82.89, 328.3, 48.06, 433.75, 41.29, 502.43, 79.99, 561.44, 168.02, 547.9, 216.39, 562.41, 246.38, 542.09, 285.07, 510.17, 285.07, 467.61, 223.16, 419.24, 253.15, 394.09, 264.76]], 'iscrowd': 0})

In [5]:
# list = minival2014['annotations'][604864]
# print(list['segmentation'])
# for d, i in enumerate(list):
#     if i['iscrowd']:
#         print(d)
#         print(i['segmentation'])

In [6]:
# Adds the classes, view the category list at first!!
minival2014['categories'].append({'id': 91, 'name': 'face', 'supercategory': 'person'})
# minival2014['categories'].append({'id': 92, 'name': 'fish', 'supercategory': 'animal'})
# minival2014['categories']


In [7]:
# Shows the added class at the bottom.
# minival2014['images'].pop()
# (minival2014['images'][-20:])


In [8]:
# Some functions to be used:
def isjpg(string):
    if string[-4:]==".jpg":
        return True 
    
def ispng(string):
    if string[-4:]==".png":
        return True 
    
def DateCaptured():
    dt = str(datetime.datetime.now())
    date, time = dt.split()
    time = time.split(".")[0]
    return date+" "+time

def coco_bbox_creator(x, y):
    x = list(map(lambda x: float(x), x))
    y = list(map(lambda x: float(x), y))
    x_min = min(x)
    y_min = min(y)
    w = max(x) - x_min
    h = max(y) - y_min
    return [x_min, y_min, w, h]

def PolyArea(x,y):
    """
    Parameters:   x, y: The inputs are representing the nd-array of coordinates
    of a polygon.
    
    Returns: The area of the polygon
    """
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

def xml_retreiver(tree_object, key_name):
    for iterator in tree_object.iter(key_name):
        return iterator.text
    
def iou_calculator(bbox1, bbox2):
    """
    Parameters:   bbox1, bbox2: list or numpy array of bounding box coordinates.
    The input should contain the top-left corner's x and y coordinates and 
    width and height of the bounding boxes.
    
    Assertations: width and height informations of bbox1 and bbox2 should be 
    larger than 0.
    
    Returns:      iou: A floating point decimal representing the IoU ratio, which
    is the division of bounding box areas of intersection to their union.
    """
    x1, y1, w1, h1 = bbox1
    x2, y2, w2, h2 = bbox2
    assert w1 and w2 > 0
    assert w1 and h2 > 0
    
    iou = 0
    if (((x1>x2 and x1<x2+w2) or (x1+w1>x2 and x1+w1<x2+w2) or 
        (x2>x1 and x2<x1+w1) or (x2+w2>x1 and x2+w2<x1+w1)) and 
        ((y1>y2 and y1<y2+h2) or (y1+h1>y2 and y1+h1<y2+h2) or
        (y2>y1 and y2<y1+h1) or (y2+h2>y1 and y2+h2<y1+h1))):
        iou_xmin = float(max(x1, x2))
        iou_xmax = float(min(x1+w1, x2+w2))
        iou_ymin = float(max(y1, y2))
        iou_ymax = float(min(y1+h1, y2+h2))
        intersection_area = (iou_ymax - iou_ymin)*(iou_xmax - iou_xmin)
        total_area = float(w1)*float(h1) + float(w2)*float(h2) - intersection_area
        iou = intersection_area/total_area
    return iou



def txt_bbox_parser(input_location):
    """
    Parameters: input_location: The input will be a text file denoting the bounding
    boxes for every frame in such format:
    frame_id, x1, y1, x2, y2, x3, y3, x4, y4
    
    Returns: image_nr: A list containing the the frame numbers. 
             xywh: A list containing the bounding boxes in (x1, y1, w, h) format,
                   where w and h are the width and height of a bounding box respectively.
    """
    with open(input_location) as f:
        bboxes = f.readlines()
    image_nr = []
    xywh = []
    for d, bbox in enumerate(bboxes):
        image_nr.append(bbox.split(",")[0])
        coords = np.array(bbox.split(",")[1:]).reshape((-1, 2))
        x, y = coords[:,0], coords[:,1]
        xywh.append(coco_bbox_creator(x, y))
    return image_nr, xywh
    
import pycocotools._mask as _mask
encode_mask = _mask.encode

In [9]:
# LabelMe type
dataset_dir = "/home/mspr/Desktop/Mask_RCNN/Datasets/"  # Will be changed with argparse, includes directories for video names and corresponding frames inside
dataset_name = "MSPR_Dataset/"
annot_dir = "Annotations/"
mask_dir = "Masks/"
image_dir = "Images/"

target_dataset_name = os.path.join(dataset_dir, "coco/train2014/")

##########################
# ANNOTATIONS OF FILIZ   #
##########################
filiz_annotations = os.path.join("/home/mspr/Desktop/Mask_RCNN/Datasets/MSPR_Dataset/face/filiz_Annotations",
                                 "pascal_voc_face annotation.txt")

image_nrs, bbox_filiz = txt_bbox_parser(filiz_annotations)

avg_iou = []

# Added image and annotation ids will start from 600k. 
image_id = 600000
annotation_id = 600000

for class_name in dict_of_categories.keys():
    annot_all_files = sorted(os.listdir(os.path.join(dataset_dir, dataset_name, class_name, annot_dir)))
    for xml_file in annot_all_files:
        annot_dir = os.path.join(dataset_dir, dataset_name, class_name, "Annotations/")
        mask_dir = os.path.join(dataset_dir, dataset_name, class_name, "Masks/")
        image_dir = os.path.join(dataset_dir, dataset_name, class_name, "Images/")

        tree = ET.parse(os.path.join(annot_dir, xml_file))
        root = tree.getroot()

        delete_flags = root.findall("./object/deleted")

        xmins = root.findall("./object/segm/box/xmin")
        ymins = root.findall("./object/segm/box/ymin")
        xmaxs = root.findall("./object/segm/box/xmax")
        ymaxs = root.findall("./object/segm/box/ymax")

        mask_filenames = root.findall("./object/segm/mask")

        # IMAGES
        # Image will be copied to MS COCO train directory and file name will be hold
        image_filename = xml_retreiver(root, 'filename')
        image = io.imread(os.path.join(image_dir, image_filename))
        if image_id<1000000:
            image_target_filename = "COCO_train2014_000000"+str(image_id)+".jpg"
        elif (image_id>=1000000 or image_id<=10000000):
            image_target_filename = "COCO_train2014_00000"+str(image_id)+".jpg"

        if os.path.isfile(os.path.join(target_dataset_name, image_target_filename))==False:
            io.imsave(os.path.join(target_dataset_name, image_target_filename), image)


        # Image height and width
        height = int(xml_retreiver(root, 'nrows'))
        width = int(xml_retreiver(root, 'ncols'))

        assert height == image.shape[0]
        assert width == image.shape[1]


        # Miscellaneous metadata
        date_captured = DateCaptured()
        coco_url = 'n/a'
        flickr_url = 'n/a'
        license = np.random.randint(8)

        # Appending to 'images'
        minival2014['images'].append({'coco_url': coco_url, 'file_name': image_target_filename, 
                                     'date_captured': date_captured, 'flickr_url': flickr_url,
                                     'height': height, 'id': image_id, 'license': license, 
                                     'width': width})
        # Polygon Handling

        for d, delete_flag in enumerate(delete_flags):
            delete_flag = int(delete_flag.text)
            if not delete_flag:


                # ANNOTATIONS
                # Bbox
                bbox_x = int(xmins[d].text)
                bbox_y = int(ymins[d].text)
                bbox_w = int(xmaxs[d].text) - bbox_x
                bbox_h = int(ymaxs[d].text) - bbox_y
                bbox_list = [bbox_x, bbox_y, bbox_w, bbox_h]

#                 queries_findid = [d for d, image_nr in enumerate(image_nrs) if image_nr==image_filename[:-4]]
#                 for query in queries_findid:
#                     print(bbox_filiz[query])
#                     print(bbox_list)
#                 iou_for_each_xml_object = [iou_calculator(bbox_filiz[query], bbox_list) for query in queries_findid]
#                 avg_iou.append(iou_calculator(bbox_filiz[query], bbox_list) for query in queries_findid)    
#                 print(iou_for_each_xml_object)
#                 print("")


#                 print(bbox_x, bbox_y, bbox_w, bbox_h)

#                 with open("bbox_info.txt", "a+") as f:
#                     f.write(image_filename[:-4]+","+str(bbox_x)+","+str(bbox_y)+","+
#                             str(bbox_x+bbox_w)+","+str(bbox_y)+","+
#                             str(bbox_x)+","+str(bbox_y+bbox_h)+","+
#                            str(bbox_x+bbox_w)+","+str(bbox_y+bbox_h)+"\n")

                # Class
                category_id = dict_of_categories[class_name]

                # Masks Segmentation
                instance_seg_info = []
                mask = io.imread(os.path.join(mask_dir, mask_filenames[d].text), as_grey=True)
                seg_coords_x, seg_coords_y = map(lambda x: list(x), np.where(mask>0))
                seg_coords = []
                for d, x in enumerate(seg_coords_x):
                    seg_coords.append(x)
                    seg_coords.append(seg_coords_y[d])



                # Area
                area = PolyArea(seg_coords_x, seg_coords_y)

                # Iscrowd
                if len(mask_filenames)>1:
                    iscrowd = 1
                else:
                    iscrowd = 0

                # Appending into annotations
                minival2014['annotations'].append({'iscrowd': iscrowd, 'bbox': bbox_list, 'id': annotation_id,
                                                  'image_id': image_id, 'segmentation': [seg_coords],
                                                  'area': area, 'category_id': dict_of_categories[class_name]})
                annotation_id+=1
        image_id+=1

                
#                 print(bbox_x, bbox_y, bbox_w, bbox_h)
# print("Average IoU = {}".format(sum(avg_iou)/len(avg_iou)))

['000001', '000021', '000021', '000021', '000025'] [[125.0, 37.0, 150.0, 166.0], [92.0, 89.0, 39.0, 50.0], [233.0, 48.0, 49.0, 46.0], [61.0, 189.0, 48.0, 49.0], [232.0, 8.0, 56.0, 56.0]]
00000001.jpg
00000002.jpg
00000003.jpg
00000004.jpg
00000005.jpg
00000006.jpg
00000007.jpg
00000008.jpg
00000009.jpg
00000010.jpg
00000011.jpg
00000012.jpg
00000013.jpg
00000014.jpg
00000015.jpg
00000016.jpg
00000017.jpg
00000018.jpg
00000019.jpg
00000020.jpg
00000021.jpg
00000022.jpg
00000023.jpg
00000024.jpg
00000025.jpg
00000026.jpg
00000027.jpg
00000028.jpg
00000029.jpg
00000030.jpg
00000031.jpg
00000032.jpg
00000033.jpg
00000034.jpg
00000035.jpg
00000036.jpg
00000037.jpg
00000038.jpg
00000039.jpg
00000040.jpg
00000041.jpg
00000042.jpg
00000043.jpg
00000044.jpg
00000045.jpg
00000046.jpg
00000047.jpg
00000048.jpg
00000049.jpg
00000050.jpg
00000051.jpg
00000052.jpg
00000053.jpg
00000054.jpg
00000055.jpg
00000056.jpg
00000057.jpg
00000058.jpg
00000059.jpg
00000060.jpg
00000061.jpg
00000062.jpg
0000006

In [10]:
# dataset_dir = "/home/mspr/Desktop/Mask_RCNN/Datasets/"  # Will be changed with argparse, includes directories for video names and corresponding frames inside
# train = "train/"                                                # train
# dataset_seg_dir = "GT_segmentation/" # one indexed version for providing segmentation data.

# source_dataset_name = os.path.join(dataset_dir, "VOT2016/" , train)
# source_dataset_seg_loc = os.path.join(dataset_dir, "VOT2016/" , dataset_seg_dir)
# target_dataset_name = os.path.join(dataset_dir, "coco/train2014/")

# ground_truth_bbox = "groundtruth.txt"

# id_counter = 600000

# all_video_names = [folder for folder in os.listdir(source_dataset_name) if os.path.isdir(os.path.join(source_dataset_name, folder))] 

# # Change: all_video_names --> dict_of_categories.keys()
# for video_name in dict_of_categories.keys():
#     all_frame_names = [frame for frame in os.listdir(os.path.join(source_dataset_name,
#                                                                   video_name)) if isjpg(os.path.join(source_dataset_name, video_name, frame))]
#     all_frame_names = sorted(all_frame_names)
#     all_seg_frame_names = [frame for frame in os.listdir(os.path.join(source_dataset_seg_loc,
#                                                                       video_name)) if ispng(os.path.join(source_dataset_seg_loc, video_name, frame))]
#     all_seg_frame_names = sorted(all_seg_frame_names)
    
#     # bbox
#     bbox_info_file = os.path.join(source_dataset_name, video_name, ground_truth_bbox)
#     with open(bbox_info_file) as f:
#         all_bboxes = f.read().split()       
        
#     for d, frame_name in enumerate(all_frame_names):
            
#         # Date captured
#         date_captured = DateCaptured()
        
#         # Copying to the COCO Dataset Directory in an appropiate format.
#         # Filename and ID will be collected from here.
#         if id_counter<1000000:
#             frame_file_name = "COCO_train2014_000000"+str(id_counter)+".jpg"
#         elif (id_counter>=1000000 or id_counter<=10000000):
#             frame_file_name = "COCO_train2014_00000"+str(id_counter)+".jpg"
       
#         image = io.imread(os.path.join(source_dataset_name, video_name, frame_name))

#         if os.path.isfile(os.path.join(target_dataset_name, frame_file_name))==False:
#             io.imsave(os.path.join(target_dataset_name, frame_file_name), image)
        
#         # License tag at random
#         license = np.random.randint(8)
        
#         # Getting image width and height
#         height = image.shape[1]
#         width = image.shape[0]

#         # URL Information is unknown
#         url = 'n/a'
        
#         # Appending relevant information to images section
# #         minival2014['images'].append({'date_captured': date_captured, 'file_name': frame_file_name, 
# #                               'height': height, 'id': id_counter, 'license': license, 'flickr_url': url, 
# #                               'width': width, 'coco_url': url})

#         # bbox operations
#         coords = np.array(all_bboxes[d].split(",")).reshape((-1, 2))
#         x, y = coords[:,0], coords[:,1]
#         xywh = coco_bbox_creator(x, y)
        
#         # Category ID 
#         # Create a dictionary where the keys will be video names and 
#         # values will be category id's. Then calling this dictionary
#         # appropiately will output the category id. 
#         # E.g --> dict['fish1'] = 91
#         category_id = 91
        
#         # id
#         annot_id = 300001

#         # Is crowded?
#         iscrowd = 0

#         # Image ID
#         image_id = id_counter

#         # Segmentation
#         seg_image = io.imread(os.path.join(source_dataset_seg_loc, video_name, all_seg_frame_names[d]))/255
#         seg_coords_x, seg_coords_y = map(lambda x: list(x), np.where(seg_image==1))
#         seg_coords = []
#         for d, x in enumerate(seg_coords_x):
#             seg_coords.append(x)
#             seg_coords.append(seg_coords_y[d])

#         # Area
#         area = PolyArea(seg_coords_x, seg_coords_y)
        
#         # Appending relevant info to the annotation section
# #         minival2014['annotations'].append({'area': area, 'bbox': xywh, 
# #                                    'category_id': category_id,
# #                                    'id': annot_id, 'image_id': image_id, 'iscrowd': iscrowd, 'segmentation': [seg_coords]})
#         id_counter += 1
# #         print(os.path.isfile(os.path.join(dataset_dir, source_dataset_name, train, video_name+"/", frame_name)))
# #         print(dataset_dir, target_dataset_name, frame_file_name)

In [11]:
# Images in segmentation directory are zero indexed and original image directory is one indexed.

# dataset_dir = "/home/mspr/Desktop/Mask_RCNN/Datasets/VOT2016/"  # Will be changed with argparse, includes directories for video names and corresponding frames inside
# train = "train/"                                                # train
# dataset_seg_dir = "GT_segmentation/" # one indexed version for providing segmentation data. 

# # Date captured
# dt = str(datetime.datetime.now())
# date, time = dt.split()
# time = time.split(".")[0]
# date_captured = date+" "+time

# # Copying to the COCO Dataset Directory in an appropiate format.
# # Filename and ID will be collected from here.
# target_dataset_dir = "Datasets/coco/train2014/"
# idx = 600001
# file_name = "COCO_train2014_000000"+str(idx)+".jpg"
# image = io.imread(image_dir)
# io.imsave(target_dataset_dir+file_name, image)

# # License tag at random
# license = np.random.randint(8)

# # Getting image width and height
# height = image.shape[1]
# width = image.shape[0]

# # URL Information is unknown
# url = 'n/a'
# # minival2014['images'].append({'date_captured': date_captured, 'file_name': file_name, 
# #                               'height': height, 'id': idx, 'license': license, 'flickr_url': url, 
# #                               'width': width, 'coco_url': url})

In [12]:
# (minival2014['images'][-3:])
# minival2014['images'].pop()

In [13]:
# Annotations

# def coco_bbox_creator(x, y):
#     x = list(map(lambda x: float(x), x))
#     y = list(map(lambda x: float(x), y))
#     x_min = min(x)
#     y_min = min(y)
#     w = max(x) - x_min
#     h = max(y) - y_min
#     return [x_min, y_min, w, h]

# # bbox
# bbox_info_file = "/home/mspr/Desktop/Mask_RCNN/Datasets/VOT2016/train/fish1/groundtruth.txt"
# with open(bbox_info_file) as f:
#     all_bboxes = f.read().split()
#     for bbox in all_bboxes[1:2]:
#         coords = np.array(bbox.split(",")).reshape((-1, 2)) 
#         x, y = coords[:,0], coords[:,1]
#         xywh = coco_bbox_creator(x, y)
#         print(xywh)

# # category id
# # Create a dictionary where the keys will be video names and 
# # values will be category id's. Then calling this dictionary
# # appropiately will output the category id. 
# # E.g --> dict['fish1'] = 91
# category_id = 91

# # id
# annot_id = 300001
    
# # Is crowded?
# iscrowd = 0

# # Image ID
# image_id = idx

# # Segmentation
# seg_image = io.imread(seg_image_dir)/255
# seg_coords_x, seg_coords_y = map(lambda x: list(x), np.where(seg_image==1))
# seg_coords = []
# for d, x in enumerate(seg_coords_x):
#     seg_coords.append(x)
#     seg_coords.append(seg_coords_y[d])

# # Area
# def PolyArea(x,y):
#     return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

# area = PolyArea(seg_coords_x, seg_coords_y)

# minival2014['annotations'].append({'area': area, 'bbox': xywh, 
#                                    'category_id': category_id,
#                                    'id': annot_id, 'image_id': idx, 'iscrowd': iscrowd, 'segmentation': [seg_coords]})

In [14]:
# minival2014['annotations'].pop()
# print (minival2014['annotations'][-1])


In [15]:
# Storing the data in memory and then writes them into a file. 

# from decimal import Decimal
from json import encoder
encoder.FLOAT_REPR = lambda o: format(o, '.2f')

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)

with open("Datasets/coco/annotations/instances_train2014.json","w") as f:
# with open("Datasets/coco/annotations/instances_valminusminival2014.json","w") as f:
    data = json.dumps(minival2014, cls=MyEncoder)
    f.write(data)
