In [1]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import random
import json
import os
import shutil
from pathlib import Path
import os.path
import subprocess
import os
import glob
pylab.rcParams['figure.figsize'] = (8.0, 10.0)
def coco_to_yolo_bb(x1, y1, w, h, image_w, image_h):
    return [((2*x1 + w)/(2*image_w)) , ((2*y1 + h)/(2*image_h)), w/image_w, h/image_h]

In [2]:
def getCatData():
    # display nightowls categories and supercategories
    cats = coco.loadCats(coco.getCatIds())
    cat_map = {}
    for i,cat in enumerate(cats):
        cat_map[cat['id']] = i
    nms=[cat['name'] for cat in cats]
    catIds = coco.getCatIds()
    return catIds, cat_map

In [3]:
def get_imgs_from_each_category(catIds, numImgs=None):
    imgIds = []
    for catId in catIds:
        imgIds_current_batch = coco.getImgIds(catIds=[catId])[:numImgs]
        if(len(imgIds_current_batch) < 30):
            print('ERROR! Not 30 images in this category!')
        imgIds = imgIds + imgIds_current_batch
    return imgIds

In [4]:

"""
getImgIds() -> list[imgId] 
loadImgs(ids : [imgId]) -> list[img]
getAnnIds(imgIds: list[imgId]) -> list[annId]
loadAnns(ids: list[annId]) -> list[ann]

*One imgId can be multiple annIds
*One annId is one ann


#imgIds need to have annotations and those annotations bboxes must not be negative
#write_label_files need
    #filename, which are in imgs
    #catIds, which are in anns
    #bboxes, which are in anns
"""
def get_random_img_ids(numImgs):
    imgIds = coco.getImgIds()
    random.shuffle(imgIds)
    #I think NO is the one I needed to filter for
    imgIds = list(filter(imgId_has_annotation, imgIds))
    imgIds = list(filter(imgId_has_positive_bboxes, imgIds))
    return imgIds[:numImgs]

def get_img_ids_per_cat(numImgs_per_cat):
    catIds = coco.getCatIds()
    imgIds_total = []
    for catId in catIds:
        imgIds_cat = coco.getImgIds(catIds=[catId])
        #imgIds = coco.getImgIds()
        random.shuffle(imgIds_cat)
        imgIds_current = []
        for imgId in imgIds_cat:
            if imgId_has_annotation(imgId) and imgId_has_positive_bboxes(imgId):
                imgIds_current.append(imgId)
            if(len(imgIds_current) == numImgs_per_cat):
                break
        imgIds_total = imgIds_total + imgIds_current
    return imgIds_total

def imgId_has_positive_bboxes(imgId):
    annIds = coco.getAnnIds(imgIds=imgId)
    anns = coco.loadAnns(annIds)
    for ann in anns:
        negative_bbox = False 
        for bbox_val in ann['bbox']:
            if bbox_val < 0:
                negative_bbox = True
        if negative_bbox == True:
            return False
    return True

def imgId_has_annotation(imgId):
    annIds = coco.getAnnIds(imgIds=imgId)
    if(len(annIds) > 0):
        return True
    else:
        return False

#def write_label_files(numImgs_per_cat):
def write_label_files(numImgs, cat_map, catIds, label_dir1):
    """
    Some images:
    *Do not have annotations.
    *Have negative bboxes
    """
    #imgIds = get_img_ids_per_cat(numImgs_per_cat)
    imgIds = get_random_img_ids(numImgs)
    print(len(imgIds) == 98)

    for imgId in imgIds:
        img = coco.loadImgs(imgId)[0] #If only 1 imgId, len() = 1
        label_dir = Path(label_dir1)
                        #'/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/dataset/labels'
        filename = img['file_name'].split('.')[0] + '.txt'
        with open(label_dir/filename, 'w') as file:
            annIds = coco.getAnnIds(imgIds = imgId, catIds=catIds)
            #assert len(annIds) != 0
            anns = coco.loadAnns(annIds)
            #if not anns:
                #print('no anns for this file')
            #print(len(anns))
            #print('-----------------------------')
            #anns = remove_negative_bbox_anns(anns)
            for i,ann in enumerate(anns):
                x_center, y_center, width, height = coco_to_yolo_bb(*ann['bbox'], img['width'], img['height'])
                #print(label_dir/filename)
                #print(x_center, y_center, width, height)
                #print('\n')
                file.write(f'{cat_map[ann["category_id"]]} {x_center} {y_center} {width} {height}\n')
    return

def copy_images_to_dataset_dir(src_image_dir, dest_image_dir, image_extension, src_label_dir):
    """
    x number of labels are written to the labels dir by write_label_files(x)
    This function copies the corresponding images into the sibling images dir, completing the dataset folder for YOLO training
    """
    fs = os.listdir(src_label_dir)
    for f in fs:
        if f == '.DS_Store':
            continue
        src = str(src_image_dir/f.split('.')[0]) + image_extension
        dest = str(dest_image_dir/ f.split('.')[0]) + image_extension
        if os.path.isfile(src):
            shutil.copyfile(src, dest)
        else:
            print(f'{src} is not a file')
    return

def remove_files(img_dir, label_dir):
    files = glob.glob(f'{img_dir}*')
    for f in files:
        os.remove(f)
    files = glob.glob(f'{label_dir}*')
    for f in files:
        os.remove(f)



In [5]:

def produce_nightowls_training_set():
    """
    This was done on purple!!
    """
    exit()
    label_dir =  '/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/dataset/labels/'
    src_img_dir = '/Users/azakaria/Documents/yolo datasets/nightowls/val2017'
    img_dir = '/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/dataset/images/'
    annotations_json_file = '/Users/azakaria/Documents/yolo datasets/nightowls/instances_val2017.json'

    coco=COCO('/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_train1/nightowls_validation')#


    coco=COCO('/Users/azakaria/Documents/yolo datasets/nightowls/nightowls_training.json')
    catIds, cat_map = getCatData()
    write_label_files(480, cat_map, catIds, '/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_train1/dataset/labels') 
    copy_images_to_dataset_dir(Path('/Users/azakaria/Documents/yolo datasets/nightowls/nightowls_validation'),Path('/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/dataset/images/'), '.png', '/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_train1/dataset/labels') #dataset/images
#endregion

def produce_nightowls_validation_set(numImgs):
    src_img_dir = '/Users/azakaria/Documents/yolo datasets/nightowls/nightowls_validation'
    img_dir = '/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/images/'
    label_dir='/Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/labels/'
    #annotations_json_file = '/Users/azakaria/Documents/yolo datasets/nightowls/nightowls_validation.json'
    #coco=COCO(annotations_json_file)

    print('Files removed')
    remove_files(img_dir,label_dir)

    catIds, cat_map = getCatData()
    write_label_files(numImgs, cat_map, catIds, label_dir)
    copy_images_to_dataset_dir(Path(src_img_dir),Path(img_dir), '.png', label_dir) #dataset/images

    os.system(f'ls {img_dir} | wc -l ; ls {label_dir} | wc -l')

def produce_coco_training_set(numImgs):
    label_dir =  '/Users/azakaria/Code/openmpf-yolo-training/coco/coco_train1/labels/'
    src_img_dir = '/Users/azakaria/Documents/yolo datasets/coco/train2017'
    img_dir = '/Users/azakaria/Code/openmpf-yolo-training/coco/coco_train1/images/'
    annotations_json_file = '/Users/azakaria/Documents/yolo datasets/coco/instances_train2017.json'

    remove_files(img_dir,label_dir)
    print('Files removed')

    #coco=COCO(annotations_json_file)
    catIds, cat_map = getCatData()
    write_label_files(numImgs, cat_map, catIds, label_dir) 
    copy_images_to_dataset_dir(Path(src_img_dir),Path(img_dir), '.jpg', label_dir) #dataset/images

    os.system(f'ls {img_dir} | wc -l ; ls {label_dir} | wc -l')
    #produce_coco_training_set(480)

def produce_coco_validation_set(numImgs):
    label_dir =  '/Users/azakaria/Code/openmpf-yolo-training/coco/coco_val1/labels/'
    src_img_dir = '/Users/azakaria/Documents/yolo datasets/coco/val2017'
    img_dir = '/Users/azakaria/Code/openmpf-yolo-training/coco/coco_val1/images/'
    annotations_json_file = '/Users/azakaria/Documents/yolo datasets/coco/instances_val2017.json'

    remove_files(img_dir,label_dir)

    print('Files removed')

        #coco=COCO(annotations_json_file)
    catIds, cat_map = getCatData()
    write_label_files(numImgs, cat_map, catIds, label_dir) 
    copy_images_to_dataset_dir(Path(src_img_dir),Path(img_dir), '.jpg', label_dir) #dataset/images

    os.system(f'ls {img_dir} | wc -l ; ls {label_dir} | wc -l')

if __name__ == "__main__":
    annotations_json_file = '/Users/azakaria/Documents/yolo datasets/nightowls/nightowls_validation.json'
    coco=COCO(annotations_json_file)
    produce_nightowls_validation_set(196)

    annotations_json_file = '/Users/azakaria/Documents/yolo datasets/coco/instances_train2017.json'
    coco=COCO(annotations_json_file)
    produce_coco_training_set(480)

    annotations_json_file = '/Users/azakaria/Documents/yolo datasets/coco/instances_val2017.json'
    coco=COCO(annotations_json_file)
    produce_coco_validation_set(98)

loading annotations into memory...
Done (t=0.29s)
creating index...
index created!
Files removed
False
     196
     196
loading annotations into memory...
Done (t=17.95s)
creating index...
index created!
Files removed
False
     480
     480
loading annotations into memory...
Done (t=0.63s)
creating index...
index created!
Files removed
True
      98
      98


In [10]:
#These work
os.system('/usr/local/bin/docker exec 5dcf6e04dbba rm -rf /usr/src/datasets/nightowls_val1/')
os.system('/usr/local/bin/docker exec 5dcf6e04dbba rm -rf /usr/src/datasets/nightowls_train1/')
os.system('/usr/local/bin/docker exec 5dcf6e04dbba rm -rf /usr/src/datasets/coco_val1/')
os.system('/usr/local/bin/docker exec 5dcf6e04dbba rm -rf /usr/src/datasets/coco_train1/')

os.system('docker cp /Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_val1/ 5dcf6e04dbba:/usr/src/datasets/nightowls_val1/')
os.system('docker cp /Users/azakaria/Code/openmpf-yolo-training/nightowls/nightowls_train1/ 5dcf6e04dbba:/usr/src/datasets/nightowls_train1/')
os.system('docker cp /Users/azakaria/Code/openmpf-yolo-training/coco/coco_val1/ 5dcf6e04dbba:/usr/src/datasets/coco_val1/')
os.system('docker cp /Users/azakaria/Code/openmpf-yolo-training/coco/coco_train1/ 5dcf6e04dbba:/usr/src/datasets/coco_train1/')

0