In [3]:
# fit a mask rcnn on the kangaroo dataset
from os import listdir
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
import json
import os
import sys
import json
import datetime
import numpy as np
import skimage.draw
import cv2
from mrcnn.visualize import display_instances
import matplotlib.pyplot as plt

In [3]:
class LoadDataset(Dataset):
    # load the dataset definitions
    def load_dataset(self, dataset_dir):
        self.add_class("dataset",1, "house")
        self.add_class("dataset",2,"water_body")
        #self.add_class("dataset",3,"vegetation")
        self.add_class("dataset",3,"open_land")
        
        
        current_path = dataset_dir
        images_dir = os.path.join(current_path, 'images')
        annotations_dir = os.path.join(current_path, 'annots')
        
        # find all images
        for filename in listdir(images_dir):
            # extract image id
            image_id = filename.split('.')[0]
            image_file = os.path.join(images_dir, filename)    
            annotation_file = os.path.join(annotations_dir, image_id + '.json')
            with open(annotation_file, 'r') as f:            
                data = json.load(f)
            annotations = data['shapes']  # don't need the dict keys
            for a in annotations:
                class_id = [int(self.get_coord(a)[1])]
                polygons = [self.get_coord(a)[0]]
                image = skimage.io.imread(image_file)
                height, width = image.shape[:2]
                # add to dataset
                self.add_image(
                    "dataset",  ## for a single class just add the name here
                    image_id= image_id,  # use file name as a unique image id
                    path=image_file,
                    width=width, height=height,
                    polygons=polygons,
                    class_ids=class_id)
        '''
    input = dictionary
    this function will retrun x,y coordinates of a annotation in folllowing format
    shape_attribute = {
       'x' = [101,102]
       'y' = [112,115]
    }
    '''
    def get_coord(self,json_dict):
        shape_attribute = {}
        map_dict = {'house': '1',
                    'water_body': '2',
                    'open_land': '3'}
        class_label = json_dict['label']
        class_id = map_dict[class_label]
        x = [point[0] for point in json_dict['points']]
        y = [point[1] for point in json_dict['points']]
        shape_attribute['x'] = x
        shape_attribute['y'] = y
        return shape_attribute,class_id
    
    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a balloon dataset image, delegate to parent class.
        image_info = self.image_info[image_id]

        class_ids = image_info['class_ids']
        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['y'], p['x'])
            rr[rr > mask.shape[0]-1] = mask.shape[0]-1
            cc[cc > mask.shape[1]-1] = mask.shape[1]-1
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        #class_ids=np.array([self.class_names.index(shapes[0])])
#         print("info['class_ids']=", info['class_ids'])
        class_ids = np.array(class_ids, dtype=np.int32)
        return mask, class_ids
    
    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "damage":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)
    

In [4]:
# define a configuration for the model
class DataConfig(Config):
	# define the name of the configuration
	NAME = "veg_cfg"
	# number of classes (background + kangaroo)
	NUM_CLASSES = 1 + 3
	# number of training steps per epoch
	STEPS_PER_EPOCH = 50
    
config = DataConfig()

In [5]:
#preparing trainset
train_set = LoadDataset()
train_set.load_dataset('vegetation')
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
#preparing testset
test_set = LoadDataset()
test_set.load_dataset('vegetation_test')
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))

Train: 2383
Test: 329


In [6]:
#example of a train image
print(train_set.image_info[0])
print('*'*100)
print(train_set.image_info[1])
print('*'*100)
print(train_set.image_info[2])

{'class_ids': [2], 'width': 960, 'source': 'dataset', 'polygons': [{'y': [14.666666666666666, 30.666666666666668, 489.3333333333333, 774.6666666666666, 760.0, 666.6666666666666, 581.3333333333334, 494.6666666666667, 497.3333333333333, 341.3333333333333, 9.333333333333334, 13.333333333333334, 77.33333333333333, 106.66666666666667, 192.0, 677.3333333333334, 718.6666666666666, 957.3333333333334, 950.6666666666666], 'x': [954.6666666666667, 773.3333333333334, 729.3333333333334, 552.0, 469.33333333333337, 454.66666666666663, 486.66666666666663, 474.66666666666663, 534.6666666666666, 548.0, 652.0, 108.0, 129.33333333333331, 225.33333333333331, 225.33333333333331, 61.33333333333334, 12.0, 4.0, 952.0]}], 'path': 'vegetation/images/000000034.jpg', 'height': 960, 'id': '000000034'}
****************************************************************************************************
{'class_ids': [1], 'width': 960, 'source': 'dataset', 'polygons': [{'y': [0.8474576271186441, 38.983050847457626, 45

In [7]:
# prepare config
config = DataConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 2
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                16
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'rpn_class_loss': 1.0, 'mrcnn_class_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE         

In [8]:
# define the model
model = MaskRCNN(mode='training', model_dir='./', config=config)
# load weights (mscoco) and exclude the output layers
model.load_weights('mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",  "mrcnn_bbox", "mrcnn_mask"])
# train weights (output layers or 'heads')
model.train(train_set, test_set, learning_rate=config.LEARNING_RATE, epochs=7, layers='heads')







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
box_ind is deprecated, use box_indices instead

Starting at epoch 0. LR=0.001

Checkpoint Path: ./veg_cfg20191030T2044/mask_rcnn_veg_cfg_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_shared        (Conv2D)
    rpn_class_raw          (Conv2D)
    rpn_bbox_pred          (Conv2D)
mrcnn_mask_conv1       (TimeDistributed)
mrcnn_mask_bn1         (TimeDistributed)
mrcnn_mask_conv2       (TimeDistributed)
mrcnn_mask_bn2         (TimeDistributed)
mrcnn_class_conv1      (TimeDistributed)
mrcnn_class_bn1        (TimeDistributed)
mrcnn_mask_conv3       (TimeDistributed)
mrcnn_mask_bn3       

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "




Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [9]:
import gc
gc.collect()

112

# Evaluate

In [4]:
import os
from os import listdir
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from numpy import expand_dims
from numpy import mean
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
from mrcnn.utils import Dataset
from mrcnn.utils import compute_ap
from mrcnn.model import load_image_gt
from mrcnn.model import mold_image
from tqdm import tqdm_notebook as tqdm

# class that defines and loads the kangaroo dataset
class LoadDataset(Dataset):
    def load_dataset(self, dataset_dir):
        self.add_class("dataset",1, "house")
        self.add_class("dataset",2,"water_body")
        #self.add_class("dataset",3,"vegetation")
        self.add_class("dataset",3,"open_land")
        
        
        current_path = dataset_dir
        images_dir = os.path.join(current_path, 'images')
        annotations_dir = os.path.join(current_path, 'annots')
        
        # find all images
        for filename in listdir(images_dir):
            # extract image id
            image_id = filename.split('.')[0]
            image_file = os.path.join(images_dir, filename)    
            annotation_file = os.path.join(annotations_dir, image_id + '.json')
            with open(annotation_file, 'r') as f:            
                data = json.load(f)
            annotations = data['shapes']  # don't need the dict keys
            for a in annotations:
                class_id = [int(self.get_coord(a)[1])]
                polygons = [self.get_coord(a)[0]]
                image = skimage.io.imread(image_file)
                height, width = image.shape[:2]
                # add to dataset
                self.add_image(
                    "dataset",  ## for a single class just add the name here
                    image_id= image_id,  # use file name as a unique image id
                    path=image_file,
                    width=width, height=height,
                    polygons=polygons,
                    class_ids=class_id)
        '''
    input = dictionary
    this function will retrun x,y coordinates of a annotation in folllowing format
    shape_attribute = {
       'x' = [101,102]
       'y' = [112,115]
    }
    '''
    def get_coord(self,json_dict):
        shape_attribute = {}
        map_dict = {'house': '1',
                    'water_body': '2',
                    'open_land': '3'}
        class_label = json_dict['label']
        class_id = map_dict[class_label]
        x = [point[0] for point in json_dict['points']]
        y = [point[1] for point in json_dict['points']]
        shape_attribute['x'] = x
        shape_attribute['y'] = y
        return shape_attribute,class_id
    
    def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a balloon dataset image, delegate to parent class.
        image_info = self.image_info[image_id]

        class_ids = image_info['class_ids']
        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['y'], p['x'])
            rr[rr > mask.shape[0]-1] = mask.shape[0]-1
            cc[cc > mask.shape[1]-1] = mask.shape[1]-1
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        #class_ids=np.array([self.class_names.index(shapes[0])])
#         print("info['class_ids']=", info['class_ids'])
        class_ids = np.array(class_ids, dtype=np.int32)
        return mask, class_ids
    
    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "damage":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

# define the prediction configuration
class PredictionConfig(Config):
	# define the name of the configuration
	NAME = "veg_cfg"
	# number of classes (background + kangaroo)
	NUM_CLASSES = 1 + 3
	# simplify GPU config
	GPU_COUNT = 1
	IMAGES_PER_GPU = 1

# calculate the mAP for a model on a given dataset
def evaluate_model(dataset, model, cfg):
	APs = list()
	for image_id in tqdm(dataset.image_ids):
		# load image, bounding boxes and masks for the image id
		image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
		# convert pixel values (e.g. center)
		scaled_image = mold_image(image, cfg)
		# convert image into one sample
		sample = expand_dims(scaled_image, 0)
		# make prediction
		yhat = model.detect(sample, verbose=0)
		# extract results for first sample
		r = yhat[0]
		# calculate statistics, including AP
		AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
		# store
		APs.append(AP)
	# calculate the mean AP across all images
	mAP = mean(APs)
	return mAP

#preparing trainset
train_set = LoadDataset()
train_set.load_dataset('vegetation')
train_set.prepare()
print('Train: %d' % len(train_set.image_ids))
#preparing testset
test_set = LoadDataset()
test_set.load_dataset('vegetation_test')
test_set.prepare()
print('Test: %d' % len(test_set.image_ids))
# create config
cfg = PredictionConfig()
# define the model
model = MaskRCNN(mode='inference', model_dir='./', config=cfg)
# load model weights
model.load_weights('mask_rcnn_veg_cfg_0007.h5', by_name=True)
# evaluate model on training dataset
train_mAP = evaluate_model(train_set, model, cfg)
print("Train mAP: %.3f" % train_mAP)
# evaluate model on test dataset
test_mAP = evaluate_model(test_set, model, cfg)
print("Test mAP: %.3f" % test_mAP)


Train: 2383
Test: 329







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
box_ind is deprecated, use box_indices instead


Instructions for updating:
Use `tf.cast` instead.


HBox(children=(IntProgress(value=0, max=2383), HTML(value='')))


Train mAP: 0.149


HBox(children=(IntProgress(value=0, max=329), HTML(value='')))


Test mAP: 0.163
