# COCO-ify Dissection Dataset

https://github.com/waspinator/pycococreator/blob/master/examples/shapes/shapes_to_coco.py for reference 

DOCKER FILE: abalajiaus/oct_ca:latest-fire or abalajiaus/oct_ca:latest-fire

In [None]:
%%time
!pip install git+git://github.com/waspinator/pycococreator.git@0.2.0

In [None]:
%%time
!pip install git+git://github.com/waspinator/coco.git@2.1.0

In [None]:
from pycocotools.coco import COCO
import pycococreatortools.pycococreatortools as creator

In [None]:
import pycocotools.mask as m

In [None]:
from pathlib import Path
from fastai.vision import get_files
import PIL.Image as Image
import numpy as np
import matplotlib.pyplot as plt
import datetime
import json
from scipy.ndimage.measurements import label
import os
import shutil

In [None]:
def OCTDataToCOCO(im_path):
    INFO = {
        "description": "OCT dataset",
        "url": "tba",
        "version": "0.1.0",
        "year": 2020,
        "contributor": "abalajiaus",
        "date_created": datetime.datetime.utcnow().isoformat(' ')
    }

    LICENSES = [
        {
            "id": 1,
            "name": "VASCLAB",
            "url": "VASCLAB"
        }
    ]

    CATEGORIES = [
        {
            'id': 1,
            'name': 'lumen',
            'supercategory': 'feature',
        }

    ]
    coco = {
            "info": INFO,
            "licenses": LICENSES,
            "categories": CATEGORIES,
            "images": [],
            "annotations": []}
    
    label_path = im_path.parent/'labels'
    i=1
    anno_id = 0 #anno_id has to be unique
    for image_path in get_files(im_path, extensions='.jpg'):
        image = Image.open(image_path)
        image_id = image_path.name
        anno_path = label_path/image_id
        gt = Image.open(anno_path)

        assert (anno_path.stem == image_path.stem) # make sure label and image are corresponding
        assert (gt.size==image.size)
        gt = (np.array(gt)>200).astype(int)[:,:,0]
        assert (np.array_equal(gt, gt.astype(bool))) #make sure label is binary

        lab, num_instances = label(gt)

        image_info = creator.create_image_info(i,
                                                str(image_path),
                                                image.size)
        
        image_info['annotations'] = []
        
        for n in range(num_instances):
            tl=(lab==(n+1)).astype(int)
            category_info = {'id': 1, 'is_crowd': 0} #play with 1 or 0 here
            annotation_info = creator.create_annotation_info(anno_id,
                                                             i,
                                                             category_info,
                                                             tl,
                                                             image_size=image.size,
                                                             tolerance=1)

            if annotation_info is not None:
                annotation_info['bbox_mode']=0
                coco['annotations'].append(annotation_info)
                image_info['annotations'].append(annotation_info)
                anno_id+=1
        
        
        image_info['image_id'] = i
        image_info['sem_seg_file_name'] = str(anno_path)
        coco['images'].append(image_info)
        
        #if num_instances>1:
        #    pdb.set_trace()    
            
        #    return (image, image_info, l, tl, annotation_info)
        #    
        '''if annotation_info is not None:
            coco['annotations'].append(annotation_info)'''
        i+=1
        #if i%1000==0: print(i)
    return coco

## Reformat data structure to include train, validation, test


In [None]:
original_data_path = Path('/workspace/oct_ca_seg/data_oct/')

In [None]:
COCO_path = Path('/workspace/oct_ca_seg/COCOdata/')
train = COCO_path/'train'
valid = COCO_path/'valid'
test = COCO_path/'test'

In [None]:
%%time
for d in get_files(original_data_path, recurse=True, extensions='.jpg'):
    #print(d.stem, d.parent.name)
    stem = int(d.stem)
    parent = d.parent.name
    

    if stem <8408: t = train
    elif stem > 8408 and stem <11011: t = valid
    elif stem >11011: t = test
    
    dest = t/(parent + '/' + d.name)
    
    shutil.copy(d, dest)

In [None]:
%%time
trains = OCTDataToCOCO(train/'images')

In [None]:
len(trains['images']), len(trains['annotations'])

In [None]:
%%time
valids = OCTDataToCOCO(valid/'images')

In [None]:
len(valids['images']), len(valids['annotations'])

In [None]:
%%time
tests = OCTDataToCOCO(test/'images')

In [None]:
len(tests['images']), len(tests['annotations'])

In [None]:
valids['images'][10].keys()

In [None]:
with open(train/'images/annotations_medium.json', 'w') as output_json_file:
    json.dump(trains, output_json_file)

In [None]:
with open(valid/'images/annotations_medium.json', 'w') as output_json_file:
    json.dump(valids, output_json_file)

In [None]:
with open(test/'images/annotations_medium.json', 'w') as output_json_file:
    json.dump(tests, output_json_file)

Difference is because some annotations are blank and thus arent added.

# Visualise COCO dataset

https://github.com/waspinator/pycococreator/blob/master/examples/shapes/visualize_coco.ipynb for reference

In [None]:
train

In [None]:
anno_file = train/'images/annotations.json'

In [None]:
coco_dataset = COCO(anno_file)

In [None]:
categories = coco_dataset.loadCats(coco_dataset.getCatIds())

In [None]:
categories

In [None]:
category_ids = coco_dataset.getCatIds(catNms=['lumen'])

In [None]:
category_ids

In [None]:
image_ids = coco_dataset.getImgIds(catIds=category_ids)

In [None]:
image_ids[0:5]

In [None]:
import pylab
import skimage.io as io

In [None]:
image_data = coco_dataset.loadImgs(image_ids[np.random.randint(0, len(image_ids))])[0]

# load and display instance annotations
image = io.imread(image_data['file_name'])

plt.imshow(image); plt.axis('off')
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
annotation_ids = coco_dataset.getAnnIds(imgIds=image_data['id'], catIds=category_ids, iscrowd=None)
annotations = coco_dataset.loadAnns(annotation_ids)
coco_dataset.showAnns(annotations)