In [1]:
from os import listdir
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from mrcnn.visualize import display_instances
from mrcnn.utils import extract_bboxes
from mrcnn.config import Config
from mrcnn.model import MaskRCNN

from matplotlib import pyplot

Using TensorFlow backend.


# Kangaroo RCNN

In [2]:
def extract_bounding_boxes(filename):
        tree = ElementTree.parse(filename)
        root = tree.getroot()
        boxes = [[int(box.find('xmin').text), int(box.find('ymin').text), int(box.find('xmax').text), int(box.find('ymax').text)] for box in root.findall('.//bndbox')]
        # boxes, width, height
        return boxes, int(root.find('.//size/width').text), int(root.find('.//size/height').text)

In [3]:
class Kangaroo(Dataset):
    def extract_bounding_boxes(self, filename):
        tree = ElementTree.parse(filename)
        root = tree.getroot()
        boxes = [[int(box.find('xmin').text), int(box.find('ymin').text), int(box.find('xmax').text), int(box.find('ymax').text)] for box in root.findall('.//bndbox')]
        # boxes, width, height
        return boxes, int(root.find('.//size/width').text), int(root.find('.//size/height').text)

    # Create a list of image information
    def load_data(self, dataset_path, train=True):
        self.add_class('dataset', 1, 'kangaroo')
        images = dataset_path + '/images/'
        annots = dataset_path + '/annots/'
        
        for filename in listdir(images):
            img_id = filename[:-4]
            # TODO: different dir for train and test images instead
            if train and int(img_id) >= 150:
                continue
            if not train and int(img_id) < 150:
                continue
            img_path = images + filename
            annot_path = annots + img_id + '.xml'
            self.add_image('dataset', image_id=img_id, path=img_path, annotation=annot_path)
            
    # Turn bounding boxes into masks, and return an array of masks that corresond with img_id index
    def load_mask(self, img_id):
        info = self.image_info[img_id]
        path = info['annotation']
        boxes, w, h = self.extract_bounding_boxes(path)
        masks = zeros([h, w, len(boxes)], dtype='uint8')
        ids = []
        for i in range(len(boxes)):
            box = boxes[i]
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]
            masks[row_s:row_e, col_s:col_e, i] = 1
            ids.append(self.class_names.index('kangaroo'))
        return masks, asarray(ids, dtype='int32')

In [4]:
class ModelConfig(Config):
    # TODO: take arguments to do this shit
    NAME = 'kangaroo'
    NUM_CLASSES = 1+1
    STEPS = 131

In [5]:
train_set = Kangaroo()
train_set.load_data('data//kangaroo')
train_set.prepare()
test_set = Kangaroo()
test_set.load_data('data//kangaroo', train=False)
test_set.prepare()

config = ModelConfig()
config.display()
model = MaskRCNN(mode='training', model_dir='./', config=config)
model.load_weights('mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])
model.train(train_set, test_set, learning_rate=config.LEARNING_RATE, epochs=5, layers='heads')


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE         

AttributeError: 'str' object has no attribute 'decode'

In [None]:
train_set = Kangaroo()
train_set.load_data('data//kangaroo')
train_set.prepare()
test_set = Kangaroo()
test_set.load_data('data//kangaroo', train=False)
test_set.prepare()

img = 0
image = train_set.load_image(img)
mask, ids = train_set.load_mask(img)

bbox = extract_bboxes(mask)
display_instances(image, bbox, mask, ids, train_set.class_names)