In [1]:
from preprocessing import crop_image as ci
from preprocessing.load_file import check_num_images
import csv
from PIL import Image
from os import listdir
import random
from matplotlib import pyplot

# split into train and test set
from numpy import zeros
from numpy import asarray
from numpy import expand_dims
from numpy import mean
from Mask_RCNN.mrcnn.utils import Dataset
from Mask_RCNN.mrcnn.visualize import display_instances
from Mask_RCNN.mrcnn.utils import extract_bboxes
from Mask_RCNN.mrcnn.config import Config
from Mask_RCNN.mrcnn.model import MaskRCNN
from Mask_RCNN.mrcnn.utils import compute_ap
from Mask_RCNN.mrcnn.model import load_image_gt
from Mask_RCNN.mrcnn.model import mold_image

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
data_file_name = "../full_data_train.csv"
path_training = "../train/sampled_images/" # path from current folder to training images
#path_testing = "data/test/" # path from current folder to testing images
#path_testing = "train/images/" # path from current folder to testing images

percent_training = 0.8

In [3]:
# class that defines and loads the buildings dataset
class BuildingsDataset(Dataset):
    # load the dataset definitions
    def load_dataset(self, path, data_file_name, percent_training, is_train=True):
        # define one class
        self.add_class("dataset", 1, "building")
        # find all images
        count = 0
        m_training = round(len(listdir(path)) * percent_training)
        for filename in listdir(path):
            if(filename[-3:] != "png"):
                continue
            # extract image id
            image_id = filename[:-4]
            # skip all images after m_training if we are building the train set
            if is_train and count >= m_training:
                count += 1
                continue
            # skip all images before m_training if we are building the test/val set
            if not is_train and count < m_training:
                count += 1
                continue
            count += 1 
            img_path = path + filename
            # add to dataset
            self.add_image('dataset', image_id=image_id, path=img_path, filename=filename, data_file_name=data_file_name)
            
    # Load and process the images for the mini-batch
    def extract_boxes(self, filename, data_file_name):
        boxes = []
        ### Find the rows associated with the file
        with open(data_file_name) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            for row in csv_reader:
                if (row[0] == ""):
                    continue
                if (row[17] == filename):
                    xy = row[22]
                    points = ci.process_xy(xy)
                    (xmin, xmax, ymin, ymax) = ci.get_corners(points)
                    coordinates = [xmin, ymin, xmax, ymax]
                    boxes.append(coordinates)
        return (boxes, 1024, 1024)
                
    # load the masks for an image
    def load_mask(self, image_id):
        # get details of image
        info = self.image_info[image_id]
        filename = info['filename']
        data_file_name = info['data_file_name']
        boxes, w, h = self.extract_boxes(filename, data_file_name)
        # create one array for all masks, each on a different channel
        masks = zeros([h, w, len(boxes)], dtype='uint8')
        # create masks
        class_ids = []
        for i in range(len(boxes)):
            box = boxes[i]
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]
            masks[row_s:row_e, col_s:col_e, i] = 1
            class_ids.append(self.class_names.index('building'))
        return masks, asarray(class_ids, dtype='int32')
 
    # load an image reference
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

In [4]:
# train set
train_set = BuildingsDataset()
train_set.load_dataset(path_training, data_file_name, percent_training, True)
train_set.prepare()

print('Train: %d' % len(train_set.image_ids))

# test/val set
test_set = BuildingsDataset()
test_set.load_dataset(path_training, data_file_name, percent_training, False)
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))

Train: 97
Test: 23


### Testing BuildingsDataset Object

In [None]:
# load an image
image_id = 1
image = test_set.load_image(image_id)
print(image.shape)
# load image mask
mask, class_ids = test_set.load_mask(image_id)
print(mask.shape)

In [None]:
# plot image
pyplot.imshow(image)
# plot mask
pyplot.imshow(mask[:, :, 1], cmap='gray', alpha=0.5)
pyplot.show()

In [None]:
# extract bounding boxes from the masks
bbox = extract_bboxes(mask)
# display image with masks and bounding boxes
display_instances(image, bbox, mask, class_ids, train_set.class_names)

In [None]:
# define a configuration for the model
class BuildingConfig(Config):
	# Give the configuration a recognizable name
	NAME = "building_cfg"
	# Number of classes (background + building)
	NUM_CLASSES = 1 + 1
	# Number of training steps per epoch
	STEPS_PER_EPOCH = len(train_set.image_ids)
 
# prepare config
config = BuildingConfig()

In [None]:
# define the model
model = MaskRCNN(mode='training', model_dir='./', config=config)

model.keras_model.metrics_tensors = []

In [None]:
# load weights (mscoco) and exclude the output layers
model.load_weights('../mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])

In [None]:
# train weights (output layers or 'heads')
model.train(train_set, test_set, learning_rate=config.LEARNING_RATE, epochs=5, layers='heads')

### Testing the model

Precision refers to the percentage of the correctly predicted bounding boxes (IoU > 0.5) out of all bounding boxes predicted. Recall is the percentage of the correctly predicted bounding boxes (IoU > 0.5) out of all objects in the photo.

As we make more predictions, the recall percentage will increase, but precision will drop or become erratic as we start making false positive predictions. The recall (x) can be plotted against the precision (y) for each number of predictions to create a curve or line. We can maximize the value of each point on this line and calculate the average value of the precision or AP for each value of recall.

The average or mean of the average precision (AP) across all of the images in a dataset is called the mean average precision, or mAP.

In [5]:
# define the prediction configuration
class PredictionConfig(Config):
	# define the name of the configuration
	NAME = "building_cfg"
	# number of classes (background + kangaroo)
	NUM_CLASSES = 1 + 1
	# simplify GPU config
	GPU_COUNT = 1
	IMAGES_PER_GPU = 1
    
# calculate the mAP for a model on a given dataset
def evaluate_model(dataset, model, cfg):
    APs = list()
    for image_id in dataset.image_ids:
        # load image, bounding boxes and masks for the image id
        image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
        # convert pixel values (e.g. center)
        scaled_image = mold_image(image, cfg)
        # convert image into one sample
        sample = expand_dims(scaled_image, 0)
        # make prediction
        yhat = model.detect(sample, verbose=0)
        # extract results for first sample
        r = yhat[0]
        if(gt_bbox.shape[0] == 0):
            continue
        # calculate statistics, including AP
        AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
        # store
        APs.append(AP)
    # calculate the mean AP across all images
    mAP = mean(APs)
    return mAP

In [6]:
# create config
cfg = PredictionConfig()
# define the model
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
box_ind is deprecated, use box_indices instead


Instructions for updating:
Use `tf.cast` instead.


In [7]:
# load model weights from the model that has the best accuracy
model.load_weights('building_cfg20200530T2056/mask_rcnn_building_cfg_0005.h5', by_name=True)

In [8]:
# evaluate model on test dataset
test_mAP = evaluate_model(test_set, model, cfg)
print("Test mAP: %.3f" % test_mAP)


Test mAP: 0.220
