In [1]:
import sys
from collections import OrderedDict
from tqdm.notebook import tqdm
sys.path.append('/workspace/shared_workspace/deep-learning-models/models/vision/detection/')
import tensorflow as tf
import horovod.tensorflow as hvd
hvd.init()

In [2]:
from awsdet.datasets.coco import CocoDataset
from awsdet.datasets.data_generator import DataGenerator
from awsdet.datasets.loader.build_loader import build_dataloader

from awsdet.models.backbones import keras_backbone
from awsdet.models.necks import fpn
from awsdet.models.anchor_heads.rpn_head import RPNHead
from awsdet.core.bbox.bbox_target import ProposalTarget
from awsdet.models.roi_extractors.roi_align import PyramidROIAlign
from awsdet.models.bbox_heads.bbox_head import BBoxHead

In [3]:
dataset_dir = '/workspace/shared_workspace/data/coco/coco/'
subset = 'train'
coco_dataset = CocoDataset(dataset_dir, subset, train=True)

loading annotations into memory...
Done (t=13.03s)
creating index...
index created!


In [4]:
local_batch_size=2
coco_tdf = build_dataloader(coco_dataset, local_batch_size)[0]

In [5]:
backbone = keras_backbone.KerasBackbone('ResNet50V1')
neck = fpn.FPN()
rpn_head = RPNHead()
bbox_target = ProposalTarget()
bbox_roi_extractor = PyramidROIAlign(pool_shape=[7, 7], pool_type='avg')
mask_roi_extractor = PyramidROIAlign(pool_shape=[14, 14], pool_type='avg')

In [6]:
import tensorflow as tf
from tensorflow.keras import layers

class MaskHead(tf.keras.Model):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes
        # Convolution block 1
        self.conv_1 = layers.Conv2D(256, (3, 3), padding="same", name="mask_conv_1")
        self.bn_1 = layers.BatchNormalization(name='mask_bn_1')
        self.activation_1 = layers.ReLU()
        # Convolution block 2
        self.conv_2 = layers.Conv2D(256, (3, 3), padding="same", name="mask_conv_2")
        self.bn_2 = layers.BatchNormalization(name='mask_bn_2')
        self.activation_2 = layers.ReLU()
        # Convolution block 3
        self.conv_3 = layers.Conv2D(256, (3, 3), padding="same", name="mask_conv_3")
        self.bn_3 = layers.BatchNormalization(name='mask_bn_3')
        self.activation_3 = layers.ReLU()
        # Convolution block 4
        self.conv_4 = layers.Conv2D(256, (3, 3), padding="same", name="mask_conv_4")
        self.bn_4 = layers.BatchNormalization(name='mask_bn_4')
        self.activation_4 = layers.ReLU()
        # Deconv to 28x28
        self.deconv = layers.Conv2DTranspose(256, (2, 2), strides=2, activation="relu",
                                             name="mask_deconv")
        self.masks = layers.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid", name="mask")
        
    def call(self, inputs, training=True):
        masks = []
        pooled_rois_list = inputs
        for pooled_rois in pooled_rois_list:
            x = self.conv_1(pooled_rois)
            x = self.bn_1(x)
            x = self.activation_1(x)
            x = self.conv_2(x)
            x = self.bn_2(x)
            x = self.activation_2(x)
            x = self.conv_3(x)
            x = self.bn_3(x)
            x = self.activation_3(x)
            x = self.conv_4(x)
            x = self.bn_4(x)
            x = self.activation_4(x)
            x = self.deconv(x)
            masks.append(self.masks(x))
        return masks

In [7]:
img, img_metas, gt_boxes, gt_class_ids = next(iter(coco_tdf))

Starting new loop for GPU: 0


In [16]:
training=True
class Model(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.backbone = keras_backbone.KerasBackbone('ResNet50V1')
        self.neck = fpn.FPN()
        self.rpn_head = RPNHead()
        self.bbox_target = ProposalTarget()
        self.bbox_roi_extractor = PyramidROIAlign(pool_shape=[7, 7], pool_type='avg')
        self.mask_roi_extractor = PyramidROIAlign(pool_shape=[14, 14], pool_type='avg')
        self.bbox_head = BBoxHead(81, (7, 7))
        self.mask_head = MaskHead(81)
    
    def call(self, img, img_metas, gt_boxes, gt_class_ids, training=True):
        C2, C3, C4, C5 = self.backbone(img, training=training)
        P2, P3, P4, P5, P6 = self.neck((C2, C3, C4, C5), training=training)
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        rcnn_feature_maps = [P2, P3, P4, P5]
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(rpn_feature_maps, training=training)
        proposals_list = self.rpn_head.get_proposals(
                    rpn_probs, rpn_deltas, img_metas, training=training)
        if training:
            rois_list, gt_assignment, rcnn_target_matchs, \
            rcnn_target_deltas, inside_weights, outside_weights = \
                self.bbox_target.build_targets(proposals_list, gt_boxes, gt_class_ids, img_metas)
        else:
            rois_list = proposals_list
        bbox_pooled_regions_list = self.bbox_roi_extractor(
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        rcnn_class_logits, rcnn_probs, rcnn_deltas = \
            self.bbox_head(bbox_pooled_regions_list, training=training)
        mask_pooled_regions_list = self.mask_roi_extractor(
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        masks = self.mask_head(mask_pooled_regions_list)
        rpn_inputs = (rpn_class_logits, rpn_deltas, gt_boxes, gt_class_ids, img_metas)
        rpn_class_loss, rpn_bbox_loss = self.rpn_head.loss(rpn_inputs)
        rcnn_inputs = (rcnn_class_logits, rcnn_deltas, rcnn_target_matchs,
                rcnn_target_deltas, inside_weights, outside_weights)
        rcnn_class_loss, rcnn_bbox_loss = self.bbox_head.loss(rcnn_inputs)
        losses_dict = {
                'rpn_class_loss': rpn_class_loss,
                'rpn_bbox_loss': rpn_bbox_loss,
                'rcnn_class_loss': rcnn_class_loss,
                'rcnn_bbox_loss': rcnn_bbox_loss
            }
        return proposals_list, gt_boxes, gt_class_ids, img_metas

In [17]:
model = Model()
train_tdf = iter(coco_tdf)
proposals_list, gt_boxes, gt_class_ids, img_metas = model(*next(train_tdf))

Starting new loop for GPU: 0


In [18]:
proposals_list, gt_boxes, gt_class_ids, img_metas = model(*next(train_tdf))

In [47]:
import numpy as np
from awsdet.models.utils.misc import calc_pad_shapes, trim_zeros
from awsdet.core.bbox import geometry, transforms

In [53]:
idx = 1
proposals, _ = trim_zeros(proposals_list[idx])
gt_box, non_zeros = trim_zeros(gt_boxes[idx])
gt_box = tf.cast(gt_box, proposals.dtype)
gt_labels = tf.boolean_mask(gt_class_ids[idx], non_zeros)
noise_mean = 5.0
noisy_gt_boxes = tf.add(gt_box, 
                    tf.random.truncated_normal(tf.shape(gt_box), 
                                               noise_mean, 0.1, dtype=proposals.dtype))
proposals_gt = tf.concat([proposals, noisy_gt_boxes], axis=0)
iou = geometry.compute_overlaps(proposals_gt, gt_box)
max_overlaps = tf.reduce_max(iou, axis=1)
gt_assignment = tf.argmax(iou, axis=1)

In [54]:
np.unique(gt_assignment.numpy())

array([0, 1, 2, 3])

In [None]:
training=True
class Model(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.backbone = keras_backbone.KerasBackbone('ResNet50V1')
        self.neck = fpn.FPN()
        self.rpn_head = RPNHead()
        self.bbox_target = ProposalTarget()
        self.bbox_roi_extractor = PyramidROIAlign(pool_shape=[14, 14], pool_type='avg')
        self.bbox_head = BBoxHead(81, (14, 14))
    def call(self, img, img_metas, gt_boxes, gt_class_ids, training=True):
        C2, C3, C4, C5 = self.backbone(img, training=training)
        P2, P3, P4, P5, P6 = self.neck((C2, C3, C4, C5), training=training)
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        rcnn_feature_maps = [P2, P3, P4, P5]
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(rpn_feature_maps, training=training)
        proposals_list = self.rpn_head.get_proposals(
                    rpn_probs, rpn_deltas, img_metas, training=training)
        if training:
            rois_list, rcnn_target_matchs, rcnn_target_deltas, inside_weights, outside_weights = \
                self.bbox_target.build_targets(proposals_list, gt_boxes, gt_class_ids, img_metas)
        else:
            rois_list = proposals_list
        pooled_regions_list = self.bbox_roi_extractor(
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        rcnn_class_logits, rcnn_probs, rcnn_deltas = \
            self.bbox_head(pooled_regions_list, training=training)
        rpn_inputs = (rpn_class_logits, rpn_deltas, gt_boxes, gt_class_ids, img_metas)
        rpn_class_loss, rpn_bbox_loss = self.rpn_head.loss(rpn_inputs)
        rcnn_inputs = (rcnn_class_logits, rcnn_deltas, rcnn_target_matchs,
                rcnn_target_deltas, inside_weights, outside_weights)
        rcnn_class_loss, rcnn_bbox_loss = self.bbox_head.loss(rcnn_inputs)
        losses_dict = {
                'rpn_class_loss': rpn_class_loss,
                'rpn_bbox_loss': rpn_bbox_loss,
                'rcnn_class_loss': rcnn_class_loss,
                'rcnn_bbox_loss': rcnn_bbox_loss
            }
        return losses_dict

In [None]:
def parse_losses(losses, local_batch_size):
    log_vars = OrderedDict()
    for loss_name, loss_value in losses.items():
        if tf.is_tensor(loss_value):
            log_vars[loss_name] = tf.reduce_mean(loss_value)
        elif isinstance(loss_value, list):
            log_vars[loss_name] = tf.add_n(
                [tf.reduce_mean(_loss) for _loss in loss_value])
        else:
            raise TypeError(
                '{} is not a tensor or list of tensors'.format(loss_name))
    loss_list = []
    for _key, _value in log_vars.items():
        if 'loss' in _key:
            if 'reg_loss' not in _key:
                loss_list.append(_value/local_batch_size) # horovod averages (not sums) gradients by default over workers
            else:
                loss_list.append(_value)
    total_loss = sum(loss_list) 
    log_vars['loss'] = total_loss
    return total_loss, log_vars



In [None]:
model = Model()
train_tdf = iter(coco_tdf)
loss = model(*next(train_tdf))
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.7, nesterov=False)

In [None]:
@tf.function(experimental_relax_shapes=True)
def train_step(img, img_metas, gt_boxes, gt_class_ids):
    with tf.GradientTape() as tape:
        losses = model(img, img_metas, gt_boxes, gt_class_ids)
        reg_losses = tf.add_n(model.losses)
        losses['reg_loss'] = reg_losses
        loss, log_vars = parse_losses(losses, local_batch_size)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [None]:
progressbar = tqdm(range(10000))

for i in progressbar:
    loss = train_step(*next(train_tdf))
    progressbar.set_description("Loss: {0:.3f}".format(loss))