In [None]:
%matplotlib notebook
import concurrent.futures
import importlib
import os
import sys
from pprint import pprint
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

from skimage.io import imread, imsave
from skimage.transform import resize
import imgaug.augmenters as iaa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import keras.backend as K
from skimage.color import label2rgb
from keras import Model
from keras.optimizers import Nadam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.utils import to_categorical
from pycocotools.coco import COCO
from collections import Counter
from sklearn.utils import compute_class_weight

# from abyss_deep_learning.keras.detection import MaskRcnnDataset
from mrcnn.model import MaskRCNN, data_generator
from mrcnn.utils import Dataset as MrcnnDatasetBase

from skimage.morphology import remove_small_holes

In [None]:
def load_config(path):
    spec = importlib.util.spec_from_file_location(
        "maskrcnn_config", path)
    config_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config_module)
    config = config_module.Config()
    input_shape = config.IMAGE_SHAPE
    return config

In [None]:
class DetectionTask(CocoInterface, DatasetTaskBase):
    def __init__(self, coco, translator=None, **kwargs):
        '''Assumes that the data can be anything, but each data has 0 or more targets.

        kwargs:
          * cached: (Boolean)
              Cache the targets in memory instead of loading it every time
          * force_balance: (Boolean) #TODO
              Sample ids such that all classes are balanced to the smallest class.
          * translator: (Callable)
              After loading the data target run it through this translator function.
              Must be an instance of a subclass of abyss.datasets.translators.AnnotationTranslator.
              Should be used for example remapping captions.
        '''
        CocoInterface.__init__(self, coco, **kwargs)
        assert isinstance(translator, (AnnotationTranslator, type(None)))
        self.translator = translator or AnnotationTranslator()
        self.captions = set(sorted([
            caption
            for annotation in self.coco.loadAnns(self.coco.getAnnIds(imgIds=[]))
            for caption in self.translator.translate(annotation)
            if self.translator.filter(annotation)]))
        self.num_classes = len(self.captions)
        self.stats = dict()
        self._targets = dict()

        self._preprocess_targets = kwargs.get('preprocess_targets', _noop)

        if kwargs.get('cached', False):
            with concurrent.futures.ProcessPoolExecutor() as executor:
                for data_id, targets in zip(
                        self.data_ids, executor.map(self.load_targets, self.data_ids)):
                    self._targets[data_id] = targets

        self._calc_class_stats()

    def load_targets(self, data_id, **kwargs):
        if data_id in self._targets:
            return self._targets[data_id]
        
        image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
                load_image_gt(dataset, config, image_id, augment=augment,
                              augmentation=None,
                              use_mini_mask=config.USE_MINI_MASK)
            
        batch_images = mold_image(image.astype(np.float32), config)
        
        batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
                          batch_gt_class_ids, batch_gt_boxes, batch_gt_masks

    def _calc_class_stats(self):
        if not self.stats:
            targets = [self.load_targets(data_id) for data_id in self.data_ids]
            unlabeled = sum([1 for target in targets if not target])
            self.stats['unlabeled'] = unlabeled / len(self.data_ids)
            targets = [caption 
                for captions in targets
                for caption in captions]
            self.stats['images_per_class'] = dict(sorted(Counter(targets).items(), key=lambda x: x[0]))
            class_weights = compute_class_weight('balanced', list(self.captions), targets)
            class_weights = {i: float(np.round(v, 3)) for i, v in enumerate(class_weights)}
            self.stats['class_weights'] = class_weights
            a = np.array(list(class_weights.values()))
            self.stats['trivial_accuracy'] = np.mean(a / np.max(a))

    @property
    def class_weights(self):
        '''Returns the class weights that will balance the backprop update over the class distribution.'''
        return self.stats['class_weights']

    def print_class_stats(self):
        '''Prints statistics about the class/image distribution.'''
        self._calc_class_stats()
        print("{:s} class stats {:s}".format('=' * 8, '=' * 8))
        print("data count per class:")
        print(" ", self.stats['images_per_class'])
        print("class weights:")
        print(" ", self.class_weights)
        print("trivial result accuracy:\n  {:.2f} or {:.2f}".format(
            self.stats['trivial_accuracy'], 1 - self.stats['trivial_accuracy']))


In [None]:
class MrcnnCocoDataset(CocoDataset, ImageDatatype, DetectionTask):
    # TODO: 
    #   *  Class statistics readout
    #   *  Support for computing class weights given current dataset config
    #   *  Support for forcing class balance by selecting IDs evenly
    #   *  Generator data order optimization
    #   *  Support for visualising data sample or prediction with same format
    def __init__(self, json_path, **kwargs):
        CocoDataset.__init__(self, json_path, **kwargs)
        ImageDatatype.__init__(self, self.coco, **kwargs)
        ClassificationTask.__init__(self, self.coco, **kwargs)
        
    def sample(self, image_id=None, **kwargs):
        if not image_id:
            image_id = random.choice(self.data_ids)
        return (self.load_data(image_id, **kwargs), self.load_targets(image_id, **kwargs))
            
    def generator(self, data_ids=None, shuffle_ids=False, endless=False, **kwargs):
        if not data_ids:
            data_ids = list(self.data_ids)
        if shuffle_ids:
            random.shuffle(data_ids)
        iterator = itertools.cycle if endless else iter
        for data_id in iterator(data_ids):
            yield self.load_data(data_id, **kwargs), self.load_targets(data_id, **kwargs)



# Setup Variables

In [None]:
image_dir = None
categories = None
num_classes = 2
use_balanced_set = False
use_class_weights = True
config_file = "/home/docker/src/abyss/deep-learning/configs/MaskRCNN_default_config.py"
aug_config = iaa.Sequential([
    iaa.Fliplr(0.5),
    iaa.Sometimes(0.9, iaa.Multiply((0.8, 1.2))),
    iaa.Affine(
        scale=(0.85, 1.15),
        translate_percent={"x": (-0.15, 0.15), "y": (-0.15, 0.15)},
        rotate=(-45, 45),
        shear=0.0,
        order=1,
        cval=0,
        mode='constant',
    )
])

# Setup Data

In [None]:
database_dir = "/data/acfr/collated/2017-summer-lettuce"
dataset_name = "weeks2to6"

########## Don't modify below
dataset_files = {
    'train': os.path.join(database_dir, "{:s}/train.json".format(dataset_name)),
    'val': os.path.join(database_dir, "{:s}/val.json".format(dataset_name)),
    'test': os.path.join(database_dir, "{:s}/val.json".format(dataset_name))
}
dataset = {
    'names': list(dataset_files.keys()),
    'classes': [], # MUST FILL IN
    'class_weights': {name: None for name in dataset_files.keys()},
    'ids' : {},
    'gens': {},
    'data': {},
    'coco': {},
    'config': load_config(config_file)
}

for name, path in dataset_files.items():
    coco = MaskRcnnDataset(path)
    ids = coco.image_ids
    gen = data_generator(coco, dataset['config'], shuffle=True, augmentation=aug_config, detection_targets=False)
    print("{:s}: {:d} images".format(name, len(ids)))
    dataset['coco'][name] = coco
    dataset['ids'][name] = ids
    dataset['gens'][name] = gen

dataset['name'] = dataset_name.replace("/", "-")
dataset['classes'] = sorted([cat['id'] for cat in dataset['coco']['train'].coco.cats.values()])    
dataset['config'].NAME = dataset['name']    
dataset['config'].STEPS_PER_EPOCH = len(dataset['ids']['train']) // dataset['config'].BATCH_SIZE
dataset['config'].NUM_CLASSES = len(dataset['classes'])
dataset['config'].display()

num_classes = dataset['config'].NUM_CLASSES


In [None]:
%%timeit -n5 -r1
image, target = dataset['coco']['train'].sample()
print(image.shape, target[0].shape, target[1])


In [None]:
for inputs, targets in dataset['gens']['train']:
    images, image_meta, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks = inputs
    print("images.shape", images.shape)
    print("image_meta.shape", image_meta.shape)
    print("rpn_match.shape", rpn_match.shape)
    print("rpn_bbox.shape", rpn_bbox.shape)
    print("gt_class_ids.shape", gt_class_ids.shape)
    print("gt_boxes.shape", gt_boxes.shape)
    print("gt_masks.shape", gt_masks.shape)
    print("images min/max", np.min(images), np.max(images))
    break
    
plt.figure()
num_rows = 2
print("Left to right: ground truth samples from ", end='')
for j in range(num_rows):
    for i, name in enumerate(dataset['names']):
        plt.subplot(num_rows, 3, 3 * j + i + 1)
    #     print(data[0].shape, data[1], (np.min(data[0]), np.max(data[0])))
        image, targets = dataset['coco'][name].sample()
        if name == 'train':
            image = aug_config.augment_image(image)
        print(image.dtype, image.shape)
        plt.imshow((image))
#         plt.title(', '.join([caption_map_r[int(cap_id)] for cap_id in np.argwhere(label)]))
        print(name, end=', ')
        plt.axis('off')



In [None]:
# coco = dataset['coco']['train'].coco
# for ann_id, ann in coco.anns.items():
#     try:
#         mask = coco.annToMask(ann)
#     except:
#         print("masking failed on", ann_id)
#     if ann.get('annotation_type', None) == 'magnetic_lasso':
#         print("mag", ann_id)
#         plt.figure()
#         plt.imshow(mask)

In [None]:
if use_class_weights:
    for name, ds in dataset['coco'].items():
        print("{:s} {:s} class stats {:s}".format('=' * 8, name, '=' * 8))
        y = [ann['category_id'] for ann in ds.coco.anns.values() if 'segmentation' in ann]
        count = np.array(list(dict(sorted(Counter(y).items(), key=lambda x: x[0])).values()))
        spread = {i: float(v.round(2)) for i, v in enumerate(count / np.sum(count))}
        class_weights = compute_class_weight('balanced', dataset['classes'], y)
        class_weights = {i: float(np.round(v, 3)) for i, v in enumerate(class_weights)}
        dataset['class_weights'][name] = class_weights
        a = np.array(list(dataset['class_weights'][name].values()))
        
        print("class weights:".format(name))
        print(" ", class_weights)
        print("class cover fractions:\n  ", spread )


In [None]:
class Experiment(object):
    def __init__(self, config, model_dir):
        self.epoch = 0
        self.model = None
        self.config = config
        self.model_dir = model_dir
        self.compiled = False
    
    def create(self, model_path=None, train=False, fresh_heads=False, gpu_count=1):
        if not model_path:
            model_path = '/data/models/mask_rcnn_coco.h5'
            
        if not train:
            self.config.IMAGES_PER_GPU = 1
            self.config.BATCH_SIZE = 1
        self.model = None
        K.clear_session()
        self.config.GPU_COUNT = gpu_count
        self.model = MaskRCNN(
            mode=("training" if train else "inference"),
            config=self.config, model_dir=self.model_dir)
        if model_path: 
            exclude = [
                "mrcnn_class_logits", "mrcnn_bbox_fc",
                "mrcnn_bbox", "mrcnn_mask"] if fresh_heads else []
            self.model.load_weights(model_path, by_name=True, exclude=exclude)
    
    def train(self, learning_rate, epochs, layers, **kwargs):
        return self.model.train(
            dataset['coco']['train'], dataset['coco']['val'], 
            learning_rate, epochs, layers,
            **kwargs
        )


# Train MRCNN heads

In [None]:
model_path = None # None for COCO pretrained weights
logdir = os.path.join("/data/log/maskrcnn/{:s}".format(dataset['name']))
!mkdir -p "$logdir"
best_path = os.path.join(logdir, "models/best.{epoch:03d}-{val_loss:.4f}.h5")

In [None]:
config = dataset['config']
config.USE_MINI_MASK = True
config.WEIGHT_DECAY = 1e-4
config.VALIDATION_STEPS = len(dataset['ids']['val']) // config.BATCH_SIZE

In [None]:
exp = None
exp = Experiment(dataset['config'], logdir)
model = exp.create(model_path=model_path, train=True, fresh_heads=True, gpu_count=1)

In [None]:
exp.train(
    2.5e-3, 50, 'heads',
    custom_callbacks=[EarlyStopping(patience=2, min_delta=0.05, verbose=1)],
    augmentation=aug_config,
    no_augmentation_sources=None)
exp.model.keras_model.save_weights(os.path.join(logdir, 'heads.h5'))

In [None]:
saved_model_path = os.path.join(logdir, 'heads.h5')

exp = None
exp = Experiment(dataset['config'], logdir)
model = exp.create(model_path=saved_model_path, train=True, fresh_heads=False)

if exp.model.epoch == 0:
    exp.model.epoch = 12

callbacks = [
    ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=3, cooldown=10, verbose=1),
    EarlyStopping(
        monitor='val_loss', min_delta=0.0, patience=20, verbose=1, mode='auto')
]
try:
    lr = K.get_value(exp.model.keras_model.optimizer.lr)
except AttributeError:
    lr = 1e-4


exp.train(
    lr, 200, 'all',
    augmentation=aug_config,
    custom_callbacks=callbacks,
    no_augmentation_sources=None)
exp.model.keras_model.save_weights(os.path.join(logdir, 'final.h5'))

In [None]:
exp = None
config.USE_MINI_MASK = False
config.IMAGES_PER_GPU = 1
exp = Experiment(dataset['config'], logdir)
model = exp.create(model_path=os.path.join(logdir, 'final2.h5'), train=False, fresh_heads=False)

# Visualisation

In [None]:
from mrcnn.utils import expand_mask
from mrcnn.visualize import display_images, display_instances
from abyss_deep_learning.keras.segmentation import jaccard_index

def plot_test(gen, model, num_images=1, show=False):
    from scipy.optimize import linear_sum_assignment
    ious_list = []
    i = 0
    for ((images, image_meta, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks), targets) in gen:
        image = images[0]
        valid = np.all(gt_boxes[0], axis=1)
        class_ids = gt_class_ids[0, valid]
        masks = gt_masks[0, ..., valid].transpose((1, 2, 0))
        boxes = gt_boxes[0, valid, ...]
        
        labels = expand_mask(boxes, masks, image.shape).astype(np.uint8)
        r = model.detect([image], verbose=True)[0]
        num_pred = len(r['class_ids'])
        num_gt = len(class_ids)
        print("GTs = {:d}, Pred = {:d}".format(num_gt, num_pred))
        
        ious = np.array([[
            jaccard_index(r['masks'][..., i] , labels[..., j]) 
                for j in range(labels.shape[-1])] 
                for i in range(r['masks'].shape[-1])])
        pred_idx, gt_idx = linear_sum_assignment(1-ious)
        r['ious'] = np.array([ious[pred_idx[i], gt_idx[i]] 
                              if (i in pred_idx and i in gt_idx) else 0.0 for i in range(num_pred)])
        print("IoUs", r['ious'])
        print("Scores", r['scores'])
        ious_list.append(ious)
        class_names = ['BG'] + [cat['name'] for cat in dataset['coco']['train'].coco.cats.values()]
        if show:
            plt.figure()
            ax = plt.subplot(1, 2, 1)
            display_instances(
                image + dataset['config'].MEAN_PIXEL,
                boxes,
                masks,
                class_ids,
                class_names, ax=ax)
            ax = plt.subplot(1, 2, 2, sharex=ax, sharey=ax)
            display_instances(
                image + dataset['config'].MEAN_PIXEL,
                r['rois'],
                r['masks'],
                r['class_ids'],
                class_names, ax=ax)
            
#         imsave("/tmp/maskrcnn/image.png", (image + config.MEAN_PIXEL).astype(np.uint8))
        i += 1    
        if i >= num_images:
                break
    return ious_list

ious = plot_test(dataset['gens']['test'], exp.model, num_images=1, show=True)

In [None]:
import mrcnn.visualize as viz
# evaluate_coco(model, dataset_val, coco_val, eval_type="segm", limit=0, image_ids=None)
viz.display_weight_stats(exp.model)

In [None]:
coco = dataset['coco']['val']
image = coco.load_image(1)
exp
# Get activations of a few sample layers
activations = exp.model.run_graph([image], [
#     ("input_image",        exp.model.keras_model.get_layer("input_image").output),
    ("res2c_out",          exp.model.keras_model.get_layer("res2c_out").output),
    ("res3c_out",          exp.model.keras_model.get_layer("res3c_out").output),
    ("res4c_out",          exp.model.keras_model.get_layer("res4c_out").output),
    ("res5c_out",          exp.model.keras_model.get_layer("res5c_out").output),
    ("rpn_bbox",           exp.model.keras_model.get_layer("rpn_bbox").output),
    ("roi",                exp.model.keras_model.get_layer("ROI").output),
])

plt.figure()
layer_names = ["res2c_out", "res3c_out", "res4c_out", "res5c_out"]
ax = None
for i, layer in enumerate(layer_names):
    ax = plt.subplot(len(layer_names) // 2, 2, i + 1)
    plt.imshow(activations[layer].sum(axis=3)[0])
    plt.title(layer)
plt.tight_layout()

In [None]:
# # Backbone feature map
# display_images(np.transpose(activations["res2c_out"][0,:,:,:4], [2, 0, 1]), cols=4)
# display_images(np.transpose(activations["res3c_out"][0,:,:,:4], [2, 0, 1]), cols=4)
# display_images(np.transpose(activations["res4c_out"][0,:,:,:4], [2, 0, 1]), cols=4)
# display_images(np.transpose(activations["res5c_out"][0,:,:,:4], [2, 0, 1]), cols=4)
