# COCO-ify Dissection Dataset

https://github.com/waspinator/pycococreator/blob/master/examples/shapes/shapes_to_coco.py for reference 

DOCKER FILE: abalajiaus/oct_ca:latest-fire or abalajiaus/oct_ca:latest-fire

In [1]:
%%time
!pip install git+git://github.com/waspinator/pycococreator.git@0.2.0

Collecting git+git://github.com/waspinator/pycococreator.git@0.2.0
  Cloning git://github.com/waspinator/pycococreator.git (to revision 0.2.0) to /tmp/pip-req-build-4cjjltbc
  Running command git clone -q git://github.com/waspinator/pycococreator.git /tmp/pip-req-build-4cjjltbc
  Running command git checkout -q fba8f4098f3c7aaa05fe119dc93bbe4063afdab8
Collecting scikit-image
  Downloading scikit_image-0.17.2-cp36-cp36m-manylinux1_x86_64.whl (12.4 MB)
[K     |################################| 12.4 MB 12.3 MB/s eta 0:00:01
[?25hCollecting tifffile>=2019.7.26
  Downloading tifffile-2020.5.30-py3-none-any.whl (133 kB)
[K     |################################| 133 kB 14.3 MB/s eta 0:00:01
Collecting imageio>=2.3.0
  Downloading imageio-2.8.0-py3-none-any.whl (3.3 MB)
[K     |################################| 3.3 MB 8.7 MB/s eta 0:00:01
[?25hCollecting PyWavelets>=1.1.1
  Downloading PyWavelets-1.1.1-cp36-cp36m-manylinux1_x86_64.whl (4.4 MB)
[K     |################################| 4.

In [2]:
%%time
!pip install git+git://github.com/waspinator/coco.git@2.1.0

Collecting git+git://github.com/waspinator/coco.git@2.1.0
  Cloning git://github.com/waspinator/coco.git (to revision 2.1.0) to /tmp/pip-req-build-6yvzz32g
  Running command git clone -q git://github.com/waspinator/coco.git /tmp/pip-req-build-6yvzz32g
  Running command git checkout -q cd69b031a5dafdd40c0b2c52bd4a9c5d7f11382e
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25ldone
[?25h  Created wheel for pycocotools: filename=pycocotools-2.1-cp36-cp36m-linux_x86_64.whl size=299494 sha256=2ffbb51925e96d569db6f814205b6ee01a1a35bd769f457aeebd5447e3cd7c15
  Stored in directory: /tmp/pip-ephem-wheel-cache-b75p9_u6/wheels/6a/fc/7d/91a76da8191691d4dc7c7144306007ce6472cb24f51051603e
Successfully built pycocotools
Installing collected packages: pycocotools
  Attempting uninstall: pycocotools
    Found existing installation: pycocotools 2.0
    Uninstalling pycocotools-2.0:
      Successfully uninstalled pycocotools-2.0
Successfully install

In [3]:
from pycocotools.coco import COCO
import pycococreatortools.pycococreatortools as creator

In [4]:
import pycocotools.mask as m

In [None]:
!pip install fastai

In [25]:
from pathlib import Path
from fastai.vision import get_files
import PIL.Image as Image
import numpy as np
import matplotlib.pyplot as plt
import datetime
import json
from scipy.ndimage.measurements import label
import os
import shutil

In [45]:
def OCTDataToCOCO(im_path):
    INFO = {
        "description": "OCT dataset",
        "url": "tba",
        "version": "0.1.0",
        "year": 2020,
        "contributor": "abalajiaus",
        "date_created": datetime.datetime.utcnow().isoformat(' ')
    }

    LICENSES = [
        {
            "id": 1,
            "name": "VASCLAB",
            "url": "VASCLAB"
        }
    ]

    CATEGORIES = [
        {
            'id': 1,
            'name': 'lumen',
            'supercategory': 'feature',
        }

    ]
    coco = {
            "info": INFO,
            "licenses": LICENSES,
            "categories": CATEGORIES,
            "images": [],
            "annotations": []}
    
    label_path = im_path.parent/'labels'
    i=1
    anno_id = 0 #anno_id has to be unique
    for image_path in get_files(im_path, extensions='.jpg'):
        image = Image.open(image_path)
        image_id = image_path.name
        anno_path = label_path/image_id
        gt = Image.open(anno_path)

        assert (anno_path.stem == image_path.stem) # make sure label and image are corresponding
        assert (gt.size==image.size)
        gt = (np.array(gt)>200).astype(int)[:,:,0]
        assert (np.array_equal(gt, gt.astype(bool))) #make sure label is binary

        lab, num_instances = label(gt)

        image_info = creator.create_image_info(i,
                                                str(image_path),
                                                image.size)
        
        image_info['annotations'] = []
        
        for n in range(num_instances):
            tl=(lab==(n+1)).astype(int)
            category_info = {'id': 1, 'is_crowd': 0} #play with 1 or 0 here
            annotation_info = creator.create_annotation_info(anno_id,
                                                             i,
                                                             category_info,
                                                             tl,
                                                             image_size=image.size,
                                                             tolerance=1)

            if annotation_info is not None:
                annotation_info['bbox_mode']=0
                coco['annotations'].append(annotation_info)
                image_info['annotations'].append(annotation_info)
                anno_id+=1
        
        
        image_info['image_id'] = i
        image_info['sem_seg_file_name'] = str(anno_path)
        coco['images'].append(image_info)
        
        #if num_instances>1:
        #    pdb.set_trace()    
            
        #    return (image, image_info, l, tl, annotation_info)
        #    
        '''if annotation_info is not None:
            coco['annotations'].append(annotation_info)'''
        i+=1
        #if i%1000==0: print(i)
    return coco

## Reformat data structure to include train, validation, test


In [48]:
original_data_path = Path('/workspace/oct_ca_seg/data_oct/')

In [33]:
def make_data_dirs(coco_path, override=False):
    if override:
        shutil.rmtree(coco_path)
    
    os.mkdir(coco_path)
    for each in ['train', 'valid', 'test']:
        each_dir = coco_path/each
        os.mkdir(each_dir)
        os.mkdir(each_dir/'images')
        os.mkdir(each_dir/'labels')    

In [81]:
COCO_path = Path('/workspace/oct_ca_seg/COCOdata/')
make_data_dirs(COCO_path, override=True)
train = COCO_path/'train'
valid = COCO_path/'valid'
test = COCO_path/'test'

In [82]:
%%time
for d in get_files(original_data_path, recurse=True, extensions='.jpg'):
    #print(d.stem, d.parent.name)
    stem = int(d.stem)
    parent = d.parent.name
    

    if stem <8500: t = train
    elif stem > 8500 and stem <11000: t = valid
    elif stem >11000 and stem <12011: t = test
    
    dest = t/(parent + '/' + d.name)
    shutil.copy(d, dest)

CPU times: user 2.13 s, sys: 1.31 s, total: 3.44 s
Wall time: 3.47 s


In [83]:
8500+2500+1011

12011

In [84]:
%%time
trains = OCTDataToCOCO(train/'images')

CPU times: user 2min 38s, sys: 1.01 s, total: 2min 39s
Wall time: 2min 41s


In [85]:
len(trains['images']), len(trains['annotations'])

(8502, 8502)

In [86]:
%%time
valids = OCTDataToCOCO(valid/'images')

CPU times: user 44.1 s, sys: 347 ms, total: 44.4 s
Wall time: 45 s


In [87]:
len(valids['images']), len(valids['annotations'])

(2499, 2499)

In [88]:
%%time
tests = OCTDataToCOCO(test/'images')

CPU times: user 17.4 s, sys: 132 ms, total: 17.5 s
Wall time: 17.8 s


In [89]:
len(tests['images']), len(tests['annotations'])

(1010, 1010)

In [19]:
valids['images'][10].keys()

dict_keys(['id', 'file_name', 'width', 'height', 'date_captured', 'license', 'coco_url', 'flickr_url', 'annotations', 'image_id', 'sem_seg_file_name'])

In [90]:
with open(train/'images/annotations.json', 'w') as output_json_file:
    json.dump(trains, output_json_file)

In [91]:
with open(valid/'images/annotations.json', 'w') as output_json_file:
    json.dump(valids, output_json_file)

In [92]:
with open(test/'images/annotations.json', 'w') as output_json_file:
    json.dump(tests, output_json_file)

Difference is because some annotations are blank and thus arent added.

# Visualise COCO dataset

https://github.com/waspinator/pycococreator/blob/master/examples/shapes/visualize_coco.ipynb for reference

In [None]:
train

In [None]:
anno_file = train/'images/annotations.json'

In [None]:
coco_dataset = COCO(anno_file)

In [None]:
categories = coco_dataset.loadCats(coco_dataset.getCatIds())

In [None]:
categories

In [None]:
category_ids = coco_dataset.getCatIds(catNms=['lumen'])

In [None]:
category_ids

In [None]:
image_ids = coco_dataset.getImgIds(catIds=category_ids)

In [None]:
image_ids[0:5]

In [None]:
import pylab
import skimage.io as io

In [None]:
image_data = coco_dataset.loadImgs(image_ids[np.random.randint(0, len(image_ids))])[0]

# load and display instance annotations
image = io.imread(image_data['file_name'])

plt.imshow(image); plt.axis('off')
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
annotation_ids = coco_dataset.getAnnIds(imgIds=image_data['id'], catIds=category_ids, iscrowd=None)
annotations = coco_dataset.loadAnns(annotation_ids)
coco_dataset.showAnns(annotations)