# Creating a Mask RCNN model

We first install the Mask RCNN library

In [0]:
!git clone https://github.com/matterport/Mask_RCNN/

In [0]:
cd Mask_RCNN/

In [0]:
!pip install . --user

In [0]:
import os
os.kill(os.getpid(), 9)

Now, we download the necessary files. 

In [0]:
!wget https://www.dropbox.com/s/0j983oudb5i6uwc/mask_rcnn_coco.h5?dl=1 -O mask_rcnn_coco.h5

We download the dataset. If you are working with Google Colab, you have several options to download the dataset in this notebook, see the available options in the [LabelDetection documentation](https://github.com/ancasag/LabelDetection).

In [None]:

!unzip dataset.zip

We load the necessary code and start the training process.

In [0]:
listClasses = ['apple','banana','orange']
numImg = 100

In [0]:
from os import listdir
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
 
# class that defines and loads the dataset
class NewDataset(Dataset):
	# load the dataset definitions
	def load_dataset(self, dataset_dir):
		# define one class
		for i,cl in enumerate(listClasses):
		  self.add_class("dataset", i+1, cl)
		# define data locations
		images_dir = dataset_dir + '/images/'
		annotations_dir = dataset_dir + '/annots/'
		# find all images
		for filename in listdir(images_dir):
			image_id = filename[:-4]
			img_path = images_dir + filename
			ann_path = annotations_dir + image_id + '.xml'
			self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path)
 
	# extract bounding boxes from an annotation file
	def extract_boxes(self, filename):
		# load and parse the file
		tree = ElementTree.parse(filename)
		# get the root of the document
		root = tree.getroot()
		# extract each bounding box
		boxes = list()
		for objeto in root.findall('.//object'):
			for box in root.findall('.//bndbox'):
				xmin = int(box.find('xmin').text)
				ymin = int(box.find('ymin').text)
				xmax = int(box.find('xmax').text)
				ymax = int(box.find('ymax').text)
				coors = [xmin, ymin, xmax, ymax, objeto.find('name').text]
				boxes.append(coors)
		# extract image dimensions
		width = int(root.find('.//size/width').text)
		height = int(root.find('.//size/height').text)
		return boxes, width, height
 
	# load the masks for an image
	def load_mask(self, image_id):
		# get details of image
		info = self.image_info[image_id]
		# define box file location
		path = info['annotation']
		# load XML
		boxes, w, h = self.extract_boxes(path)
		# create one array for all masks, each on a different channel
		masks = zeros([h, w, len(boxes)], dtype='uint8')
		# create masks
		class_ids = list()
		for i in range(len(boxes)):
			box = boxes[i]
			row_s, row_e = box[1], box[3]
			col_s, col_e = box[0], box[2]
			masks[row_s:row_e, col_s:col_e, i] = 1
			class_ids.append(self.class_names.index(box[4]))
		return masks, asarray(class_ids, dtype='int32')
 
	# load an image reference
	def image_reference(self, image_id):
		info = self.image_info[image_id]
		return info['path']
 
# define a configuration for the model
class NewConfig(Config):
	# define the name of the configuration
	NAME = "model_cfg"
	BACKBONE = "resnet50"
	IMAGE_RESIZE_MODE = "square"
	IMAGE_MIN_DIM = 512
	IMAGE_MAX_DIM = 512
	# number of classes (background + kangaroo)
	NUM_CLASSES = len(listClasses) + 1
	GPU_COUNT = 1
	IMAGES_PER_GPU = 4
	# number of training steps per epoch
	STEPS_PER_EPOCH = numImg // (GPU_COUNT * IMAGES_PER_GPU)
 
# prepare train set
train_set = NewDataset()
train_set.load_dataset('dataset/train')
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
# prepare config
config = NewConfig()
config.display()
# define the model
model = MaskRCNN(mode='training', model_dir='./', config=config)
# load weights (mscoco) and exclude the output layers
model.load_weights('mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])
# train weights (output layers or 'heads')
model.train(train_set, train_set, learning_rate=config.LEARNING_RATE, epochs=2, layers='heads')
# unfreeze the body of the network and train *all* layers
model.train(train_set, train_set, epochs=5,layers="all", learning_rate=config.LEARNING_RATE / 10)

Evaluation.


In [0]:
numTest = 12

In [0]:
from mrcnn.model import load_image_gt
from mrcnn.model import mold_image
from numpy import expand_dims
from mrcnn.utils import compute_ap
from numpy import mean

# define the prediction configuration
class PredictionConfig(Config):
	# define the name of the configuration
	NAME = "model_cfg"
	BACKBONE = "resnet50"
	IMAGE_RESIZE_MODE = "square"
	IMAGE_MIN_DIM = 512
	IMAGE_MAX_DIM = 512
	# number of classes (background + kangaroo)
	NUM_CLASSES = len(listClasses) + 1
	GPU_COUNT = 1
	IMAGES_PER_GPU = 1
	BATCH_SIZE = numTest
  

# calculate the mAP for a model on a given dataset
def evaluate_model(dataset, model, cfg):
	APs = list()
	for image_id in dataset.image_ids:
		# load image, bounding boxes and masks for the image id
		image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
		# convert pixel values (e.g. center)
		scaled_image = mold_image(image, cfg)
		# convert image into one sample
		sample = expand_dims(scaled_image, 0)
		# make prediction
		yhat = model.detect(sample, verbose=0)
		# extract results for first sample
		r = yhat[0]
		# calculate statistics, including AP
		AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
		# store
		APs.append(AP)
	# calculate the mean AP across all images
	mAP = mean(APs)
	return mAP



In [0]:
cfg = PredictionConfig()
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
import glob
modelFile = glob.glob('model_cfg**/*_0005.h5')[0]
model.load_weights(modelFile, by_name=True)
test_set = NewDataset()
test_set.load_dataset('dataset/test')
test_set.prepare()
test_mAP = evaluate_model(test_set, model, cfg)
print("Test mAP: %.3f" % test_mAP)

At the end you will have a new folder called model_cfg where you can find the weights of the model. Those weights can be included in the application to be employed with new images. 

-----------------


## Data distillation

After training a model with the annotated images, it is possible to apply a data distillation procedure to create a model using the unlabelled images. You can only apply this techique if there were unlabelled images in your dataset. 

In [0]:
!git clone https://github.com/ancasag/ensembleObjectDetection.git

In [0]:
cd ensembleObjectDetection/TestTimeAugmentation

In [0]:
!pip install clodsa
!pip install gluoncv
!pip install mxnet
!pip install keras_retinanet

In [0]:
import testTimeAugmentation
import function
import os
import shutil
import argparse
import ensembleOptions
from mainTTA import tta
from imutils import paths

In [0]:
pathImg = '/content/dataset/unlabelled/'

In [0]:
myTechniques = [ "histo","hflip","none"]

In [0]:
option = "consensus"

In [0]:
modelFile = glob.glob('/content/model_cfg**/*_0005.h5')[0]
maskRcnn = testTimeAugmentation.MaskRCNNPred(modelFile, '/content/dataset/classes.names')

In [0]:
cd /content

In [0]:
!mv /content/dataset/unlabelled/*.jpg /content/dataset/train/images/

In [0]:
!mv /content/dataset/unlabelled/*.xml /content/dataset/train/annots/

Restart environment to free memory.

In [0]:
import os
os.kill(os.getpid(), 9)

In [0]:
listClasses = ['apple','banana','orange']
numImg = 100

In [0]:
from os import listdir
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
import glob
 
# class that defines and loads the dataset
class NewDataset(Dataset):
	# load the dataset definitions
	def load_dataset(self, dataset_dir):
		# define one class
		for i,cl in enumerate(listClasses):
		  self.add_class("dataset", i+1, cl)
		# define data locations
		images_dir = dataset_dir + '/images/'
		annotations_dir = dataset_dir + '/annots/'
		# find all images
		for filename in listdir(images_dir):
			image_id = filename[:-4]
			img_path = images_dir + filename
			ann_path = annotations_dir + image_id + '.xml'
			self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path)
 
	# extract bounding boxes from an annotation file
	def extract_boxes(self, filename):
		# load and parse the file
		tree = ElementTree.parse(filename)
		# get the root of the document
		root = tree.getroot()
		# extract each bounding box
		boxes = list()
		for objeto in root.findall('.//object'):
			for box in root.findall('.//bndbox'):
				xmin = int(box.find('xmin').text)
				ymin = int(box.find('ymin').text)
				xmax = int(box.find('xmax').text)
				ymax = int(box.find('ymax').text)
				coors = [xmin, ymin, xmax, ymax, objeto.find('name').text]
				boxes.append(coors)
		# extract image dimensions
		width = int(root.find('.//size/width').text)
		height = int(root.find('.//size/height').text)
		return boxes, width, height
 
	# load the masks for an image
	def load_mask(self, image_id):
		# get details of image
		info = self.image_info[image_id]
		# define box file location
		path = info['annotation']
		# load XML
		boxes, w, h = self.extract_boxes(path)
		# create one array for all masks, each on a different channel
		masks = zeros([h, w, len(boxes)], dtype='uint8')
		# create masks
		class_ids = list()
		for i in range(len(boxes)):
			box = boxes[i]
			row_s, row_e = box[1], box[3]
			col_s, col_e = box[0], box[2]
			masks[row_s:row_e, col_s:col_e, i] = 1
			class_ids.append(self.class_names.index(box[4]))
		return masks, asarray(class_ids, dtype='int32')
 
	# load an image reference
	def image_reference(self, image_id):
		info = self.image_info[image_id]
		return info['path']
 
# define a configuration for the model
class NewConfig(Config):
	# define the name of the configuration
	NAME = "model_cfg"
	BACKBONE = "resnet50"
	IMAGE_RESIZE_MODE = "square"
	IMAGE_MIN_DIM = 512
	IMAGE_MAX_DIM = 512
	# number of classes (background + kangaroo)
	NUM_CLASSES = 3 + 1
	GPU_COUNT = 1
	IMAGES_PER_GPU = 4
	# number of training steps per epoch
	STEPS_PER_EPOCH = numImg // (GPU_COUNT * IMAGES_PER_GPU)
 
# prepare train set
train_set = NewDataset()
train_set.load_dataset('dataset/train')
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
# prepare config
config = NewConfig()
config.display()
# define the model
model = MaskRCNN(mode='training', model_dir='./', config=config)
# load weights (mscoco) and exclude the output layers
model.load_weights('mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])
model.train(train_set, train_set, learning_rate=config.LEARNING_RATE, epochs=2, layers='heads')
# unfreeze the body of the network and train *all* layers
model.train(train_set, train_set, epochs=7,layers="all", learning_rate=config.LEARNING_RATE / 10)

In [0]:
from mrcnn.model import load_image_gt
from mrcnn.model import mold_image
from numpy import expand_dims
from mrcnn.utils import compute_ap
from numpy import mean

# define the prediction configuration
class PredictionConfig(Config):
	# define the name of the configuration
	NAME = "model_cfg"
	BACKBONE = "resnet50"
	IMAGE_RESIZE_MODE = "square"
	IMAGE_MIN_DIM = 512
	IMAGE_MAX_DIM = 512
	# number of classes (background + kangaroo)
	NUM_CLASSES = len(listClasses) + 1
	GPU_COUNT = 1
	IMAGES_PER_GPU = 1
	BATCH_SIZE = numTest 
  

# calculate the mAP for a model on a given dataset
def evaluate_model(dataset, model, cfg):
	APs = list()
	for image_id in dataset.image_ids:
		# load image, bounding boxes and masks for the image id
		image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
		# convert pixel values (e.g. center)
		scaled_image = mold_image(image, cfg)
		# convert image into one sample
		sample = expand_dims(scaled_image, 0)
		# make prediction
		yhat = model.detect(sample, verbose=0)
		# extract results for first sample
		r = yhat[0]
		# calculate statistics, including AP
		AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
		# store
		APs.append(AP)
	# calculate the mean AP across all images
	mAP = mean(APs)
	return mAP

In [0]:
cfg = PredictionConfig()
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
import glob
modelFile = glob.glob('model_cfg**/*_0007.h5')[0]
model.load_weights(modelFile, by_name=True)
test_set = NewDataset()
test_set.load_dataset('dataset/test')
test_set.prepare()
test_mAP = evaluate_model(test_set, model, cfg)
print("Test mAP: %.3f" % test_mAP)

-------------------------------

# Using the model in LabelDetection

If you want to use the trained model with LabelDetection, you must download the following files:
- model_cfg**/*_0007.h5
- datasets/classes.names