In [36]:
import ijroi
from os.path import join, splitext
from os import listdir
import numpy as np

In [17]:
IJ_ROI_DIR = join('..', 'data', 'bounding_boxes_299')

CLASSES = ['Type_1', 'Type_2', 'Type_3']
TRAINING_DIR = join('..', 'data', 'train_299')

In [47]:
tagged_images = {splitext(f)[0] for f in listdir(IJ_ROI_DIR)}
tagged, untagged = {}, {}

for class_ in CLASSES:
    files = {splitext(f)[0] for f in listdir(os.path.join(TRAINING_DIR, class_))}
    tagged[class_] = files & tagged_images
    untagged[class_] = files - tagged_images

print('Number of tagged images:', len(tagged_images))
print('Number of untagged images:', sum(len(untagged[x]) for x in untagged))

Number of tagged images: 306
Number of untagged images: 1026


In [40]:
def convert_from_roi(fname):
    """Convert a roi file to a numpy array [x, y, w, h].
    
    Parameters
    ----------
    fname : string
        If ends with `.roi`, we assume a full path is given
    
    """
    if not fname.endswith('.roi'):
        fname = '%s.roi' % join(IJ_ROI_DIR, fname)

    with open(fname, 'rb') as f:
        roi = ijroi.read_roi(f)
        top, left = roi[0]
        bottom, right = roi[2]
        height, width = bottom - top, right - left

        return np.array([top, left, width, height])

In [39]:
{f: convert_from_roi(f) for f in tagged_images}

{'0': array([ 75, 106, 120, 129], dtype=int16),
 '1': array([130, 103, 137,  94], dtype=int16),
 '10': array([156,  67, 133, 147], dtype=int16),
 '100': array([ 20, 107, 171, 177], dtype=int16),
 '1001': array([125,  57, 174, 183], dtype=int16),
 '1002': array([ 61,  19, 193, 202], dtype=int16),
 '1005': array([  5,   9, 294, 286], dtype=int16),
 '101': array([  0,   1, 299, 298], dtype=int16),
 '1010': array([ 11,  38, 276, 259], dtype=int16),
 '1011': array([ 50,  12, 190, 181], dtype=int16),
 '1013': array([126,  74,  94,  92], dtype=int16),
 '1014': array([ 40,  71, 181, 161], dtype=int16),
 '1016': array([  0,   0, 299, 244], dtype=int16),
 '1017': array([ 58,   0, 241, 246], dtype=int16),
 '1018': array([  0, 177, 299, 122], dtype=int16),
 '1019': array([ 95,  51, 179, 168], dtype=int16),
 '102': array([ 56,  55, 150, 134], dtype=int16),
 '1021': array([ 55,  41, 244, 236], dtype=int16),
 '1022': array([ 45,  35, 225, 214], dtype=int16),
 '1023': array([ 25,   5, 243, 247], dtype