In [1]:
import sys
from collections import OrderedDict
from tqdm.notebook import tqdm
sys.path.append('/workspace/shared_workspace/deep-learning-models/models/vision/detection/')
import tensorflow as tf
import horovod.tensorflow as hvd
hvd.init()

In [2]:
from awsdet.datasets.coco import CocoDataset
from awsdet.datasets.data_generator import DataGenerator
from awsdet.datasets.loader.build_loader import build_dataloader

from awsdet.models.backbones import keras_backbone
from awsdet.models.necks import fpn
from awsdet.models.anchor_heads.rpn_head import RPNHead
from awsdet.core.bbox.bbox_target import ProposalTarget
from awsdet.models.roi_extractors.roi_align import PyramidROIAlign
from awsdet.models.bbox_heads.bbox_head import BBoxHead

In [3]:
dataset_dir = '/workspace/shared_workspace/data/coco/coco/'
subset = 'train'
# create a training dataset
coco_dataset = CocoDataset(dataset_dir, subset, train=True)

loading annotations into memory...
Done (t=13.15s)
creating index...
index created!


In [4]:
local_batch_size=2
coco_tdf = iter(build_dataloader(coco_dataset, local_batch_size)[0])

In [5]:
backbone = keras_backbone.KerasBackbone('ResNet50V1')
neck = fpn.FPN()
rpn_head = RPNHead()
bbox_target = ProposalTarget()
bbox_roi_extractor = PyramidROIAlign(pool_shape=[7, 7], pool_type='avg')
bbox_head = BBoxHead(81, (7, 7))

In [6]:
imgs, img_meta, gt_bboxes, gt_labels = next(coco_tdf)

Starting new loop for GPU: 0


In [7]:
C2, C3, C4, C5 = backbone(imgs, training=True)

In [8]:
P2, P3, P4, P5, P6 = neck([C2, C3, C4, C5], training=True)

In [9]:
rpn_feature_maps = [P2, P3, P4, P5, P6]
rcnn_feature_maps = [P2, P3, P4, P5]

In [10]:
rpn_class_logits, rpn_probs, rpn_deltas = rpn_head(rpn_feature_maps, training=True)

In [11]:
proposals_list = rpn_head.get_proposals(
                    rpn_probs, rpn_deltas, img_meta, training=True)

In [12]:
rois_list, gt_assignment, rcnn_target_matchs, \
    rcnn_target_deltas, inside_weights, outside_weights = \
    bbox_target.build_targets(proposals_list, gt_bboxes, gt_labels, img_meta)

In [13]:
bbox_pooled_regions_list = bbox_roi_extractor(
            (rois_list, rcnn_feature_maps, img_meta), training=True)

In [14]:
rcnn_class_logits, rcnn_probs, rcnn_deltas = \
                bbox_head(bbox_pooled_regions_list, training=True)

In [15]:
rpn_inputs = (rpn_class_logits, rpn_deltas, gt_bboxes, gt_labels, img_meta)

In [16]:
rpn_class_loss, rpn_bbox_loss = rpn_head.loss(rpn_inputs)

In [17]:
rcnn_inputs = (rcnn_class_logits, rcnn_deltas, rcnn_target_matchs,
                rcnn_target_deltas, inside_weights, outside_weights)

In [18]:
rcnn_class_loss, rcnn_bbox_loss = bbox_head.loss(rcnn_inputs)

In [19]:
losses_dict = {
                'rpn_class_loss': rpn_class_loss,
                'rpn_bbox_loss': rpn_bbox_loss,
                'rcnn_class_loss': rcnn_class_loss,
                'rcnn_bbox_loss': rcnn_bbox_loss
            }

In [20]:
losses_dict

{'rpn_class_loss': <tf.Tensor: shape=(), dtype=float32, numpy=1.2592322>,
 'rpn_bbox_loss': <tf.Tensor: shape=(), dtype=float32, numpy=2.4125695>,
 'rcnn_class_loss': <tf.Tensor: shape=(), dtype=float32, numpy=9.188705>,
 'rcnn_bbox_loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.018615035>}

In [21]:
# now build as a model to train

class FRCNN(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.backbone = keras_backbone.KerasBackbone('ResNet50V1')
        self.neck = fpn.FPN()
        self.rpn_head = RPNHead()
        self.bbox_target = ProposalTarget()
        self.bbox_roi_extractor = PyramidROIAlign(pool_shape=[7, 7], pool_type='avg')
        self.bbox_head = BBoxHead(81, (7, 7))
    
    @tf.function
    def call(self, inputs):
        imgs, img_meta, gt_bboxes, gt_labels = inputs
        C2, C3, C4, C5 = self.backbone(imgs, training=True)
        P2, P3, P4, P5, P6 = self.neck([C2, C3, C4, C5], training=True)
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        rcnn_feature_maps = [P2, P3, P4, P5]
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(rpn_feature_maps, training=True)
        proposals_list = self.rpn_head.get_proposals(
                    rpn_probs, rpn_deltas, img_meta, training=True)
        rois_list, gt_assignment, rcnn_target_matchs, \
            rcnn_target_deltas, inside_weights, outside_weights = \
            self.bbox_target.build_targets(proposals_list, gt_bboxes, gt_labels, img_meta)
        bbox_pooled_regions_list = self.bbox_roi_extractor(
            (rois_list, rcnn_feature_maps, img_meta), training=True)
        rcnn_class_logits, rcnn_probs, rcnn_deltas = \
                self.bbox_head(bbox_pooled_regions_list, training=True)
        rpn_inputs = (rpn_class_logits, rpn_deltas, gt_bboxes, gt_labels, img_meta)
        rpn_class_loss, rpn_bbox_loss = self.rpn_head.loss(rpn_inputs)
        rcnn_inputs = (rcnn_class_logits, rcnn_deltas, rcnn_target_matchs,
                rcnn_target_deltas, inside_weights, outside_weights)
        rcnn_class_loss, rcnn_bbox_loss = self.bbox_head.loss(rcnn_inputs)
        losses_dict = {
                'rpn_class_loss': rpn_class_loss,
                'rpn_bbox_loss': rpn_bbox_loss,
                'rcnn_class_loss': rcnn_class_loss,
                'rcnn_bbox_loss': rcnn_bbox_loss
            }
        losses_dict['reg_loss'] = tf.add_n(self.losses)
        local_batch_size = tf.cast(tf.shape(imgs)[0], tf.float32)
        loss = self.parse_losses(losses_dict, local_batch_size)
        return loss
    
    def parse_losses(self, losses, local_batch_size):
        log_vars = OrderedDict()
        for loss_name, loss_value in losses.items():
            if tf.is_tensor(loss_value):
                log_vars[loss_name] = tf.reduce_mean(loss_value)
            elif isinstance(loss_value, list):
                log_vars[loss_name] = tf.add_n(
                    [tf.reduce_mean(_loss) for _loss in loss_value])
            else:
                raise TypeError(
                    '{} is not a tensor or list of tensors'.format(loss_name))
        loss_list = []
        for _key, _value in log_vars.items():
            if 'loss' in _key:
                if 'reg_loss' not in _key:
                    loss_list.append(_value/local_batch_size) # horovod averages (not sums) gradients by default over workers
                else:
                    loss_list.append(_value)
        total_loss = sum(loss_list) 
        # log_vars['loss'] = total_loss
        return total_loss
        

In [28]:
model = FRCNN()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.7, nesterov=False)

In [29]:
@tf.function
def train_step(inputs):
    with tf.GradientTape() as tape:
        loss = model(inputs)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [None]:
progressbar = tqdm(range(1000))

for batch in progressbar:
    loss = train_step(next(coco_tdf))
    progressbar.set_description("Loss: {0:.4f}".format(loss))

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))