In [1]:
import sys
from collections import OrderedDict
from tqdm.notebook import tqdm
sys.path.append('/workspace/shared_workspace/deep-learning-models/models/vision/detection/')
import tensorflow as tf
import horovod.tensorflow as hvd
hvd.init()

In [2]:
from awsdet.datasets.coco import CocoDataset
from awsdet.datasets.data_generator import DataGenerator
from awsdet.datasets.loader.build_loader import build_dataloader

from awsdet.models.backbones import keras_backbone
from awsdet.models.necks import fpn
from awsdet.models.anchor_heads.rpn_head import RPNHead
from awsdet.core.bbox.bbox_target import ProposalTarget
from awsdet.core.mask.mask_target import MaskTarget
from awsdet.models.roi_extractors.roi_align import PyramidROIAlign
from awsdet.models.bbox_heads.bbox_head import BBoxHead
from awsdet.models.mask_heads.mask_head import MaskHead

In [3]:
dataset_dir = '/workspace/shared_workspace/data/coco/coco/'
subset = 'train'
coco_dataset = CocoDataset(dataset_dir, subset, mask=True, train=True)

loading annotations into memory...
Done (t=18.65s)
creating index...
index created!


In [4]:
local_batch_size=2
coco_tdf = build_dataloader(coco_dataset, local_batch_size)[0]

In [5]:
backbone = keras_backbone.KerasBackbone('ResNet50V1')
neck = fpn.FPN()
rpn_head = RPNHead()
bbox_target = ProposalTarget()
bbox_roi_extractor = PyramidROIAlign(pool_shape=[7, 7], pool_type='avg')
mask_roi_extractor = PyramidROIAlign(pool_shape=[14, 14], pool_type='avg')

In [6]:
img, img_metas, gt_boxes, gt_class_ids, gt_masks = next(iter(coco_tdf))

Starting new loop for GPU: 0


In [7]:
train_tdf = iter(coco_tdf)

In [8]:
training=True
class Model(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.backbone = keras_backbone.KerasBackbone('ResNet50V1')
        self.neck = fpn.FPN()
        self.rpn_head = RPNHead()
        self.bbox_target = ProposalTarget()
        self.bbox_roi_extractor = PyramidROIAlign(pool_shape=[7, 7], pool_type='avg')
        self.mask_roi_extractor = PyramidROIAlign(pool_shape=[14, 14], pool_type='avg')
        self.bbox_head = BBoxHead(81, (7, 7))
        self.mask_head = MaskHead(81)
        self.mask_target = MaskTarget()
    
    def call(self, img, img_metas, gt_boxes, gt_class_ids, gt_masks, training=True):
        C2, C3, C4, C5 = self.backbone(img, training=training)
        P2, P3, P4, P5, P6 = self.neck((C2, C3, C4, C5), training=training)
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        rcnn_feature_maps = [P2, P3, P4, P5]
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(rpn_feature_maps, training=training)
        proposals_list = self.rpn_head.get_proposals(
                    rpn_probs, rpn_deltas, img_metas, training=training)
        if training:
            rois_list, rcnn_target_matchs, \
            rcnn_target_deltas, inside_weights, outside_weights, \
            fg_assignments = \
                self.bbox_target.build_targets(proposals_list, gt_boxes, gt_class_ids, img_metas)
        else:
            rois_list = proposals_list
        bbox_pooled_regions_list = self.bbox_roi_extractor(
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        rcnn_class_logits, rcnn_probs, rcnn_deltas = \
            self.bbox_head(bbox_pooled_regions_list, training=training)
        '''mask_pooled_regions_list = self.mask_roi_extractor(
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        
        '''
        
        '''rcnn_masks = self.mask_head(mask_pooled_regions_list)
        
        rpn_inputs = (rpn_class_logits, rpn_deltas, gt_boxes, gt_class_ids, img_metas)
        rpn_class_loss, rpn_bbox_loss = self.rpn_head.loss(rpn_inputs)
        rcnn_inputs = (rcnn_class_logits, rcnn_deltas, rcnn_target_matchs,
                rcnn_target_deltas, inside_weights, outside_weights)
        #rcnn_class_loss, rcnn_bbox_loss = self.bbox_head.loss(rcnn_inputs)
        pred_masks, target_masks = self.mask_target(rois_list, rcnn_masks, gt_masks, 
                                                   fg_assignments, rcnn_target_matchs, 
                                                   img_metas)'''
        '''losses_dict = {
                'rpn_class_loss': rpn_class_loss,
                'rpn_bbox_loss': rpn_bbox_loss,
                'rcnn_class_loss': rcnn_class_loss,
                'rcnn_bbox_loss': rcnn_bbox_loss
            }'''
        return rois_list, rcnn_target_matchs, gt_masks, fg_assignments, img_metas

In [9]:
model = Model()

In [10]:
rois_list, rcnn_target_matchs, gt_masks, fg_assignments, img_metas = model(*next(train_tdf), training=True)

Starting new loop for GPU: 0


In [29]:
tf.reshape(tf.transpose(tf.reshape(tf.tile(tf.range(2), [512]), [512, 2])), -1)

<tf.Tensor: shape=(1024,), dtype=int32, numpy=array([0, 0, 0, ..., 1, 1, 1], dtype=int32)>

In [34]:
@tf.function
def batch_count(batch_size, repeats):
    return tf.concat([tf.repeat(i, repeats) for i in range(batch_size)], axis=0)

In [40]:
tf.gather(batch_count(2, 512), tf.squeeze(tf.where(rcnn_target_matchs!=0)))

<tf.Tensor: shape=(16,), dtype=int32, numpy=array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)>

In [15]:
tf.squeeze(tf.where(rcnn_target_matchs!=0))

<tf.Tensor: shape=(16,), dtype=int64, numpy=
array([  0,   1,   2,   3, 512, 513, 514, 515, 516, 517, 518, 519, 520,
       521, 522, 523])>

In [16]:
tf.gather(tf.concat(rois_list, axis=0), tf.squeeze(tf.where(rcnn_target_matchs!=0)))

<tf.Tensor: shape=(16, 4), dtype=float32, numpy=
array([[221.06992 , 456.7647  , 345.9264  , 549.0271  ],
       [224.29211 , 474.85352 , 339.93106 , 527.5802  ],
       [237.96452 , 793.42017 , 310.80106 , 818.81604 ],
       [715.0024  ,   5.963801, 770.4071  ,  28.432322],
       [201.72339 , 579.794   , 569.4519  , 806.60846 ],
       [246.35315 , 589.04987 , 517.69244 , 812.0991  ],
       [155.88126 , 616.3534  , 591.2737  , 789.6814  ],
       [173.56662 , 613.30304 , 549.78296 , 833.6649  ],
       [253.29303 , 638.742   , 555.3841  , 796.4214  ],
       [243.05148 , 627.30914 , 598.4955  , 765.806   ],
       [165.04272 , 597.0479  , 596.3755  , 752.92847 ],
       [314.59863 , 611.3273  , 683.7969  , 830.27606 ],
       [158.93147 , 600.4821  , 480.85242 , 802.0304  ],
       [208.2381  , 621.34973 , 814.2215  , 769.47485 ],
       [132.95966 , 661.50824 , 701.4819  , 774.52545 ],
       [152.20798 , 617.7173  , 627.2467  , 802.4598  ]], dtype=float32)>

In [35]:
tf.where(tf.reshape(rcnn_target_matchs, [2, 512])!=0)

<tf.Tensor: shape=(5, 2), dtype=int64, numpy=
array([[0, 0],
       [1, 0],
       [1, 1],
       [1, 2],
       [1, 3]])>

In [32]:
tf.gather(tf.concat(mask_pooled_regions_list, axis=0), 
          tf.squeeze(tf.where(rcnn_target_matchs!=0)))

<tf.Tensor: shape=(5, 14, 14, 256), dtype=float32, numpy=
array([[[[-1.20300312e+01,  2.58238935e+00, -6.35264635e+00, ...,
           3.42369032e+00,  8.05296707e+00, -9.55788612e-01],
         [-1.21715355e+01,  1.67522812e+00, -5.89326096e+00, ...,
           2.65098667e+00,  8.39896774e+00, -1.28212869e-01],
         [-1.34285612e+01,  1.07657516e+00, -6.58809137e+00, ...,
           2.73185372e+00,  8.24378395e+00,  5.99050462e-01],
         ...,
         [-1.19593067e+01,  2.56814814e+00, -7.01596260e+00, ...,
           2.34046650e+00,  8.26298523e+00, -1.06420197e-01],
         [-1.03611870e+01,  2.82687163e+00, -6.25982761e+00, ...,
           2.74056482e+00,  8.06267834e+00, -1.78712666e-01],
         [-1.01709194e+01,  2.56229544e+00, -5.93952036e+00, ...,
           2.64149570e+00,  8.19792557e+00, -6.38368487e-01]],

        [[-1.04787083e+01,  2.95771599e+00, -6.02308846e+00, ...,
           2.85633206e+00,  6.98382330e+00, -3.49115133e-01],
         [-7.99249935e+00,  2.

In [23]:
tf.where(rcnn_target_matchs!=0)

<tf.Tensor: shape=(5, 1), dtype=int64, numpy=
array([[  0],
       [512],
       [513],
       [514],
       [515]])>

In [74]:
@tf.function
def mask_loss(target_masks, pred_masks, rcnn_target_matchs, img_metas):
    batch_size = tf.shape(img_metas)[0]
    splits = tf.cast(tf.repeat(tf.shape(rcnn_target_matchs)[0]/batch_size, batch_size), tf.int32)
    mask_indices = tf.where(tf.stack(tf.split(rcnn_target_matchs, splits))!=0)
    targets = tf.gather_nd(target_masks, mask_indices)
    predictions = tf.gather_nd(pred_masks, mask_indices)
    targets = tf.reshape(targets, [-1])
    predictions = tf.reshape(predictions, [-1])
    predictions = tf.transpose(tf.stack([1-predictions, predictions]))
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) \
                    (targets, predictions)
    return batch_size

In [78]:
%time
mask_loss(target_masks, pred_masks, rcnn_target_matchs, img_metas)

CPU times: user 16 µs, sys: 2 µs, total: 18 µs
Wall time: 37 µs


<tf.Tensor: shape=(), dtype=int32, numpy=2>

In [18]:
proposals_list, gt_boxes, gt_class_ids, img_metas = model(*next(train_tdf))

In [47]:
import numpy as np
from awsdet.models.utils.misc import calc_pad_shapes, trim_zeros
from awsdet.core.bbox import geometry, transforms

In [53]:
idx = 1
proposals, _ = trim_zeros(proposals_list[idx])
gt_box, non_zeros = trim_zeros(gt_boxes[idx])
gt_box = tf.cast(gt_box, proposals.dtype)
gt_labels = tf.boolean_mask(gt_class_ids[idx], non_zeros)
noise_mean = 5.0
noisy_gt_boxes = tf.add(gt_box, 
                    tf.random.truncated_normal(tf.shape(gt_box), 
                                               noise_mean, 0.1, dtype=proposals.dtype))
proposals_gt = tf.concat([proposals, noisy_gt_boxes], axis=0)
iou = geometry.compute_overlaps(proposals_gt, gt_box)
max_overlaps = tf.reduce_max(iou, axis=1)
gt_assignment = tf.argmax(iou, axis=1)

In [54]:
np.unique(gt_assignment.numpy())

array([0, 1, 2, 3])

In [None]:
training=True
class Model(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.backbone = keras_backbone.KerasBackbone('ResNet50V1')
        self.neck = fpn.FPN()
        self.rpn_head = RPNHead()
        self.bbox_target = ProposalTarget()
        self.bbox_roi_extractor = PyramidROIAlign(pool_shape=[14, 14], pool_type='avg')
        self.bbox_head = BBoxHead(81, (14, 14))
    def call(self, img, img_metas, gt_boxes, gt_class_ids, training=True):
        C2, C3, C4, C5 = self.backbone(img, training=training)
        P2, P3, P4, P5, P6 = self.neck((C2, C3, C4, C5), training=training)
        rpn_feature_maps = [P2, P3, P4, P5, P6]
        rcnn_feature_maps = [P2, P3, P4, P5]
        rpn_class_logits, rpn_probs, rpn_deltas = self.rpn_head(rpn_feature_maps, training=training)
        proposals_list = self.rpn_head.get_proposals(
                    rpn_probs, rpn_deltas, img_metas, training=training)
        if training:
            rois_list, rcnn_target_matchs, rcnn_target_deltas, inside_weights, outside_weights = \
                self.bbox_target.build_targets(proposals_list, gt_boxes, gt_class_ids, img_metas)
        else:
            rois_list = proposals_list
        pooled_regions_list = self.bbox_roi_extractor(
            (rois_list, rcnn_feature_maps, img_metas), training=training)
        rcnn_class_logits, rcnn_probs, rcnn_deltas = \
            self.bbox_head(pooled_regions_list, training=training)
        rpn_inputs = (rpn_class_logits, rpn_deltas, gt_boxes, gt_class_ids, img_metas)
        rpn_class_loss, rpn_bbox_loss = self.rpn_head.loss(rpn_inputs)
        rcnn_inputs = (rcnn_class_logits, rcnn_deltas, rcnn_target_matchs,
                rcnn_target_deltas, inside_weights, outside_weights)
        rcnn_class_loss, rcnn_bbox_loss = self.bbox_head.loss(rcnn_inputs)
        losses_dict = {
                'rpn_class_loss': rpn_class_loss,
                'rpn_bbox_loss': rpn_bbox_loss,
                'rcnn_class_loss': rcnn_class_loss,
                'rcnn_bbox_loss': rcnn_bbox_loss
            }
        return losses_dict

In [None]:
def parse_losses(losses, local_batch_size):
    log_vars = OrderedDict()
    for loss_name, loss_value in losses.items():
        if tf.is_tensor(loss_value):
            log_vars[loss_name] = tf.reduce_mean(loss_value)
        elif isinstance(loss_value, list):
            log_vars[loss_name] = tf.add_n(
                [tf.reduce_mean(_loss) for _loss in loss_value])
        else:
            raise TypeError(
                '{} is not a tensor or list of tensors'.format(loss_name))
    loss_list = []
    for _key, _value in log_vars.items():
        if 'loss' in _key:
            if 'reg_loss' not in _key:
                loss_list.append(_value/local_batch_size) # horovod averages (not sums) gradients by default over workers
            else:
                loss_list.append(_value)
    total_loss = sum(loss_list) 
    log_vars['loss'] = total_loss
    return total_loss, log_vars



In [None]:
model = Model()
train_tdf = iter(coco_tdf)
loss = model(*next(train_tdf))
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.7, nesterov=False)

In [None]:
@tf.function(experimental_relax_shapes=True)
def train_step(img, img_metas, gt_boxes, gt_class_ids):
    with tf.GradientTape() as tape:
        losses = model(img, img_metas, gt_boxes, gt_class_ids)
        reg_losses = tf.add_n(model.losses)
        losses['reg_loss'] = reg_losses
        loss, log_vars = parse_losses(losses, local_batch_size)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [None]:
progressbar = tqdm(range(10000))

for i in progressbar:
    loss = train_step(*next(train_tdf))
    progressbar.set_description("Loss: {0:.3f}".format(loss))