In [1]:
import sys
sys.path.append('..')
# from awsdet.models.backbones import resnet
# from awsdet.models.builder import build_backbone
from awsdet import models
from awsdet import datasets
from awsdet import core
from awsdet import training
from awsdet.training.schedulers import WarmupScheduler
#from configs.mask_rcnn_params import default_config
from configs.mrcnn_config import config
import tensorflow as tf

import horovod.tensorflow as hvd
hvd.init()

devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices([devices[hvd.rank()]], 'GPU')
logical_devices = tf.config.list_logical_devices('GPU')
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
tf.config.optimizer.set_jit(True)

In [2]:
detector = models.TwoStageDetector(backbone=config.backbone_cfg,
                                   neck=config.fpn_cfg,
                                   rpn_head=config.rpn_head_cfg,
                                   roi_head=config.roi_head_cfg,
                                   train_cfg=config.train_config)

In [3]:
train_tdf = iter(datasets.build_dataset(config.train_data)().repeat())

Instructions for updating:
Use fn_output_signature instead


In [4]:
result = detector(next(train_tdf)[0], training=False)

[2021-01-12 20:19:10.998 ip-172-31-38-50:272 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2021-01-12 20:19:11.367 ip-172-31-38-50:272 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.


In [6]:
chkp = tf.compat.v1.train.NewCheckpointReader(config.backbone_checkpoint)
weights = [chkp.get_tensor(i) for i in ['/'.join(i.name.split('/')[-2:]).split(':')[0] for i in detector.layers[0].weights]]
detector.layers[0].set_weights(weights)

In [7]:
features, labels = next(train_tdf)

result = detector(features, labels, training=True)

In [8]:
result.keys()

dict_keys(['class_outputs', 'box_outputs', 'class_targets', 'box_targets', 'box_rois', 'total_loss_bbox', 'class_loss', 'box_loss', 'mask_outputs', 'mask_targets', 'selected_class_targets', 'mask_loss', 'total_rpn_loss', 'rpn_score_loss', 'rpn_box_loss'])

In [9]:
schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay([100000, 200000],
                                                                [0.005, 0.0005, 0.00005])

schedule = WarmupScheduler(schedule, 0.0005, 1000)

optimizer = tf.keras.optimizers.SGD(learning_rate=schedule,
                                    momentum=0.9)
optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(optimizer, 'dynamic')

In [10]:
features, labels = next(train_tdf)
loss = detector.train_step(features, labels, optimizer, fp16=True)

In [11]:
loss

{'bbox_loss': <tf.Tensor: shape=(), dtype=float32, numpy=4.6802087>,
 'mask_loss': <tf.Tensor: shape=(), dtype=float32, numpy=1.365921>,
 'rpn_loss': <tf.Tensor: shape=(), dtype=float32, numpy=2.9666545>,
 'l2_regularization_loss': <tf.Tensor: shape=(), dtype=float32, numpy=0.70246184>,
 'total_loss': <tf.Tensor: shape=(), dtype=float32, numpy=9.715246>}

In [12]:
from tqdm.notebook import tqdm
from statistics import mean

In [13]:
pbar = tqdm(range(25000))
loss_history = []
for i in pbar:
    features, labels = next(train_tdf)
    loss = detector.train_step(features, labels, optimizer, fp16=True)
    loss_history.append(loss['total_loss'].numpy())
    loss_mean = mean(loss_history[-50:])
    pbar.set_description("Loss: {0:.4f}".format(loss_mean))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25000.0), HTML(value='')))




In [2]:
backbone = models.build_backbone(config.backbone_cfg)
fpn = models.build_neck(config.fpn_cfg)
train_tdf = iter(datasets.build_dataset(config.train_data)().repeat())
#anchors = core.build_anchors(anchor_cfg)
#rpn_loss = training.build_loss(rpn_loss_cfg)
rpn_head = models.build_head(config.rpn_head_cfg)
roi_head = models.build_head(config.roi_head_cfg)

Instructions for updating:
Use fn_output_signature instead


In [3]:
result = backbone(next(train_tdf)[0]['images'])

[2021-01-12 15:39:14.551 ip-172-31-38-50:16265 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2021-01-12 15:39:14.709 ip-172-31-38-50:16265 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.


In [4]:
chkp = tf.compat.v1.train.NewCheckpointReader(config.backbone_checkpoint)
weights = [chkp.get_tensor(i) for i in ['/'.join(i.name.split('/')[-2:]).split(':')[0] for i in backbone.weights]]
backbone.set_weights(weights)

In [5]:
features, labels = next(train_tdf)
backbone_maps = backbone(features['images'])
fpn_maps = fpn(backbone_maps)
scores_outputs, box_outputs, proposals = rpn_head(fpn_maps, features['image_info'], training=True)
#scores_outputs, box_outputs, proposals = rpn_head(pmaps, features['image_info'], gt_boxes=None, gt_labels=None, training=False)

In [6]:
labels.keys()

dict_keys(['flat_gt_masks', 'cropped_gt_masks', 'score_targets_2', 'box_targets_2', 'score_targets_3', 'box_targets_3', 'score_targets_4', 'box_targets_4', 'score_targets_5', 'box_targets_5', 'score_targets_6', 'box_targets_6', 'gt_boxes', 'gt_classes'])

In [7]:
model_outputs = roi_head(fpn_maps, features['image_info'], proposals[0], 
                 gt_bboxes=labels['gt_boxes'], gt_labels=labels['gt_classes'],
                 gt_masks=labels['cropped_gt_masks'])

In [9]:
model_outputs.keys()

dict_keys(['class_outputs', 'box_outputs', 'class_targets', 'box_targets', 'box_rois', 'total_loss_bbox', 'class_loss', 'box_loss', 'mask_outputs', 'mask_targets', 'selected_class_targets', 'mask_loss'])

In [10]:
model_outputs['total_loss_bbox']

<tf.Tensor: shape=(), dtype=float32, numpy=5.3265376>

In [11]:
model_outputs['mask_loss']

<tf.Tensor: shape=(), dtype=float32, numpy=0.9917703>

In [43]:
features, labels = next(train_tdf)

In [44]:
_ = model(features)

In [45]:
chkp = tf.compat.v1.train.NewCheckpointReader(config.backbone_checkpoint)
weights = [chkp.get_tensor(i) for i in ['/'.join(i.name.split('/')[-2:]).split(':')[0] for i in backbone.weights]]
model.layers[0].set_weights(weights)

In [46]:
features, labels = next(train_tdf)

In [47]:
loss, _ = model.forward_train(features, labels)

In [48]:
@tf.function
def train_step(model, features, labels, opt):
    with tf.GradientTape() as tape:
        loss, _ = model.forward_train(features, labels)
    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

In [49]:
from tqdm.notebook import tqdm
from statistics import mean

In [52]:
pbar = tqdm(range(25000))
loss_history = []
for i in pbar:
    features, labels = next(train_tdf)
    loss = train_step(model, features, labels, optimizer)
    loss_history.append(loss[0].numpy())
    pbar.set_description("Loss: {0:.5f}".format(mean(loss_history[-50:])))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25000.0), HTML(value='')))




In [35]:
loss

(<tf.Tensor: shape=(), dtype=float32, numpy=0.7070601>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.6845154>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.02254466>)