# Prerequisite

In [None]:
## ref coco api notebook: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoDemo.ipynb
!pip install pycocotools

In [None]:
# download annotation files
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip annotations_trainval2017.zip

# COCO to Yolo by cateogry

* [Yolo Detect Your Custom Objects](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects)

In [1]:
import os
from pycocotools.coco import COCO
from multiprocessing.dummy import Pool # use threads for I/O bound tasks
from urllib.request import urlretrieve

In [2]:
dataDir='.'
dataType='val2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)

In [3]:
# initialize COCO api for instance annotations
coco=COCO(annFile)

loading annotations into memory...
Done (t=0.63s)
creating index...
index created!


In [4]:
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

COCO categories: 
person bicycle car motorcycle airplane bus train truck boat traffic light fire hydrant stop sign parking meter bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard sports ball kite baseball bat baseball glove skateboard surfboard tennis racket bottle wine glass cup fork knife spoon bowl banana apple sandwich orange broccoli carrot hot dog pizza donut cake chair couch potted plant bed dining table toilet tv laptop mouse remote keyboard cell phone microwave oven toaster sink refrigerator book clock vase scissors teddy bear hair drier toothbrush



In [5]:
catIds = coco.getCatIds(catNms=['bear']);
## shoot: only one cat id can get imgIds. https://github.com/cocodataset/cocoapi/issues/114
imgIds = coco.getImgIds(catIds=catIds); 
images = coco.loadImgs(imgIds)
len(images)

49

In [6]:
fp_download = './train'
!mkdir -p {fp_download}

In [7]:
urls =[]
files = []
catIdsList = []
for img in images:
    urls.append(img['coco_url'])
    files.append(os.path.join(fp_download,img['file_name']))
    catIdsList.append(catIds)

In [8]:
%%capture
Pool(4).starmap(urlretrieve, zip(urls, files)) # download 4 files at a time

In [9]:
!ls {fp_download}/ | wc -l

49


In [10]:
def yolo_annotation(img, catIds):
    fp = os.path.join(fp_download, img['file_name'].replace(".jpg", ".txt"))
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)
    dw = 1./ img['width']
    dh = 1./ img['height']
    
    with open(fp, "a") as f:
        for i in range(len(anns)):
            xmin = anns[i]["bbox"][0]
            ymin = anns[i]["bbox"][1]
            xmax = anns[i]["bbox"][2] + anns[i]["bbox"][0]
            ymax = anns[i]["bbox"][3] + anns[i]["bbox"][1]
            x, y, w, h = (xmin+xmax)/2*dw, (ymin+ymax)/2*dh, (xmax-xmin)*dw, (ymax-ymin)*dh
            # TODO: remove hardcore of the class id.
            output = f"0 {x:.5f} {y:.5f} {w:.5f} {h:.5f} \n"
            f.write(output)
    f.close()
    return fp

In [11]:
result = Pool(4).starmap(yolo_annotation, zip(images, catIdsList)) # 4 files at a time

In [12]:
!ls {fp_download}/*.txt | wc -l

49


In [13]:
## Generate train.txt file
fp = os.path.join(fp_download, 'train.txt')

with open(fp, "a") as f:
    for file in files:
        output = f"data/{file[2:]}\n"
        f.write(output)
f.close()

In [15]:
!cat {fp_download}/train.txt

data/train/000000318080.jpg
data/train/000000243075.jpg
data/train/000000519688.jpg
data/train/000000560266.jpg
data/train/000000312586.jpg
data/train/000000132622.jpg
data/train/000000521231.jpg
data/train/000000308753.jpg
data/train/000000308476.jpg
data/train/000000020247.jpg
data/train/000000471450.jpg
data/train/000000104603.jpg
data/train/000000203546.jpg
data/train/000000000285.jpg
data/train/000000092839.jpg
data/train/000000079144.jpg
data/train/000000194216.jpg
data/train/000000390826.jpg
data/train/000000414261.jpg
data/train/000000186422.jpg
data/train/000000235064.jpg
data/train/000000417465.jpg
data/train/000000042296.jpg
data/train/000000519611.jpg
data/train/000000547519.jpg
data/train/000000096960.jpg
data/train/000000307658.jpg
data/train/000000194506.jpg
data/train/000000513484.jpg
data/train/000000071756.jpg
data/train/000000573391.jpg
data/train/000000205776.jpg
data/train/000000127955.jpg
data/train/000000217948.jpg
data/train/000

# Just for Debug

In [None]:
yolo_annotation(images[0],catIds)

In [None]:
!ls download_images/*.txt

In [None]:
!cat download_images/000000318080.txt

In [None]:
import skimage.io as io
import matplotlib.pyplot as plt
I = io.imread('download_images/000000318080.jpg')
plt.imshow(I)
plt.show()