Implement giou/iou rpn bounding box regression loss.

generalized-iou · May 18, 2019 · dcfab5f · dcfab5f
1 parent a65373c
commit dcfab5f
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -20,22 +20,24 @@ This repository is a fork of [roytseng-tw/Detectron.pytorch](https://github.com/
 
 ### Losses
 
-The loss can be chosen with the `MODEL.LOSS_TYPE` option in the configuration file. The valid options are currently: `[iou|giou|sl1]`. At this moment, we apply bounding box loss only on final bounding box refinement layer, just as in the paper.
+The type of bounding box loss can be configured in the configuration file as following. `MODEL.LOSS_TYPE` configures the final bounding box refinement loss. `MODEL.RPN_LOSS_TYPE` determines the type of the RPN bounding box loss. The valid options are currently: `[iou|giou|sl1]`.
 
 ```
 MODEL:
   LOSS_TYPE: 'iou'
+  RPN_LOSS_TYPE: 'iou'
 ```
 
 Please take a look at `compute_iou` function of [lib/utils/net.py](lib/utils/net.py) for our GIoU and IoU loss implementation in PyTorch.
 
 ### Normalizers
 
-We also implement a normalizer of final bounding box refinement loss. This can be specified with the `MODEL.LOSS_BBOX_WEIGHT` parameter in the configuration file. The default value is `1.0`. We use `MODEL.LOSS_BBOX_WEIGHT` of `10.` for IoU and GIoU experiments.
+We also implement a normalizer of bounding box refinement losses. This can be specified with the `MODEL.LOSS_BBOX_WEIGHT` and `MODEL.RPN_LOSS_BBOX_WEIGHT` parameters in the configuration file. The default value is `1.0`. We use `MODEL.LOSS_BBOX_WEIGHT` of `10.` for IoU and GIoU experiments in the paper.
 
 ```
 MODEL:
   LOSS_BBOX_WEIGHT: 10.
+  RPN_LOSS_BBOX_WEIGHT: 1.
 ```
 
 ### Network Configurations
@@ -64,7 +66,9 @@ Here are the trained models using the configurations in this repository.
 
  - [Faster RCNN + SmoothL1](https://giou.stanford.edu/rcnn_weights/faster_sl1.pth)
  - [Faster RCNN + IoU](https://giou.stanford.edu/rcnn_weights/faster_iou.pth)
+ - [Faster RCNN + RPN IoU loss + IoU](https://giou.stanford.edu/rcnn_weights/faster_rpn_iou.pth)
  - [Faster RCNN + GIoU](https://giou.stanford.edu/rcnn_weights/faster_giou.pth)
+ - [Faster RCNN + RPN GIoU loss + GIoU](https://giou.stanford.edu/rcnn_weights/faster_rpn_giou.pth)
  - [Mask RCNN + SmoothL1](https://giou.stanford.edu/rcnn_weights/mask_sl1.pth)
  - [Mask RCNN + IoU](https://giou.stanford.edu/rcnn_weights/mask_iou.pth)
  - [Mask RCNN + GIoU](https://giou.stanford.edu/rcnn_weights/mask_giou.pth)
diff --git a/configs/baselines/e2e_faster_rcnn_R-50-FPN_rpn_giou_1x.yaml b/configs/baselines/e2e_faster_rcnn_R-50-FPN_rpn_giou_1x.yaml
@@ -0,0 +1,38 @@
+MODEL:
+  TYPE: generalized_rcnn
+  CONV_BODY: FPN.fpn_ResNet50_conv5_body
+  FASTER_RCNN: True
+  LOSS_TYPE: 'giou'
+  RPN_LOSS_TYPE: 'giou'
+  LOSS_BBOX_WEIGHT: 10.
+  RPN_LOSS_BBOX_WEIGHT: 1.
+RESNETS:
+  IMAGENET_PRETRAINED_WEIGHTS: 'data/pretrained_model/resnet50_caffe.pth'
+NUM_GPUS: 8
+SOLVER:
+  WEIGHT_DECAY: 0.0001
+  LR_POLICY: steps_with_decay
+  BASE_LR: 0.02
+  GAMMA: 0.1
+  MAX_ITER: 90000
+  STEPS: [0, 60000, 80000]
+FPN:
+  FPN_ON: True
+  MULTILEVEL_ROIS: True
+  MULTILEVEL_RPN: True
+FAST_RCNN:
+  ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
+  ROI_XFORM_METHOD: RoIAlign
+  ROI_XFORM_RESOLUTION: 7
+  ROI_XFORM_SAMPLING_RATIO: 2
+TRAIN:
+  SCALES: (800,)
+  MAX_SIZE: 1333
+  BATCH_SIZE_PER_IM: 512
+  RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
+TEST:
+  SCALE: 800
+  MAX_SIZE: 1333
+  NMS: 0.5
+  RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
+  RPN_POST_NMS_TOP_N: 1000
diff --git a/lib/core/config.py b/lib/core/config.py
@@ -405,9 +405,11 @@
 
 # The type of bounding box regression loss to train on
 __C.MODEL.LOSS_TYPE = 'smooth_l1'
+__C.MODEL.RPN_LOSS_TYPE = 'smooth_l1'
 
 # The weight of the final bounding box refinement loss
 __C.MODEL.LOSS_BBOX_WEIGHT = 1.
+__C.MODEL.RPN_LOSS_BBOX_WEIGHT = 1.
 
 # Use a class agnostic bounding box regressor instead of the default per-class
 # regressor

diff --git a/lib/modeling/FPN.py b/lib/modeling/FPN.py
@@ -452,13 +452,28 @@ def fpn_rpn_losses(**kwargs):
         # Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
         # handled by (1) setting bbox outside weights and (2) SmoothL1Loss
         # normalizes by IMS_PER_BATCH
-        loss_rpn_bbox_fpn = net_utils.smooth_l1_loss(
+        sl1_loss_rpn_bbox_fpn = net_utils.smooth_l1_loss(
             kwargs['rpn_bbox_pred_fpn' + slvl], rpn_bbox_targets_fpn,
             rpn_bbox_inside_weights_fpn, rpn_bbox_outside_weights_fpn,
             beta=1/9)
+        iou_loss_rpn_bbox_fpn, giou_loss_rpn_bbox_fpn = net_utils.compute_iou(
+            kwargs['rpn_bbox_pred_fpn' + slvl].permute(0, 2, 3, 1).reshape(-1, 4),
+            rpn_bbox_targets_fpn.permute(0, 2, 3, 1).reshape(-1, 4),
+            rpn_bbox_inside_weights_fpn.permute(0, 2, 3, 1).reshape(-1, 4),
+            rpn_bbox_outside_weights_fpn.permute(0, 2, 3, 1).reshape(-1, 4),
+            batch_size=cfg.TRAIN.IMS_PER_BATCH)
+
+        if cfg.MODEL.RPN_LOSS_TYPE == 'smooth_l1':
+            loss_rpn_bbox_fpn = sl1_loss_rpn_bbox_fpn
+        elif cfg.MODEL.RPN_LOSS_TYPE == 'iou':
+            loss_rpn_bbox_fpn = iou_loss_rpn_bbox_fpn
+        elif cfg.MODEL.RPN_LOSS_TYPE == 'giou':
+            loss_rpn_bbox_fpn = giou_loss_rpn_bbox_fpn
+        else:
+            raise ValueError('Invalid loss type: ' + cfg.MODEL.RPN_LOSS_TYPE)
 
         losses_cls.append(loss_rpn_cls_fpn)
-        losses_bbox.append(loss_rpn_bbox_fpn)
+        losses_bbox.append(loss_rpn_bbox_fpn * cfg.MODEL.RPN_LOSS_BBOX_WEIGHT)
 
     return losses_cls, losses_bbox
 

diff --git a/lib/utils/net.py b/lib/utils/net.py
@@ -36,10 +36,13 @@ def bbox_transform(deltas, weights):
 
 
 def compute_iou(output, target, bbox_inside_weights, bbox_outside_weights,
-                transform_weights=None):
+                transform_weights=None, batch_size=None):
     if transform_weights is None:
         transform_weights = (1., 1., 1., 1.)
 
+    if batch_size is None:
+        batch_size = output.size(0)
+
     x1, y1, x2, y2 = bbox_transform(output, transform_weights)
     x1g, y1g, x2g, y2g = bbox_transform(target, transform_weights)
 
@@ -65,8 +68,8 @@ def compute_iou(output, target, bbox_inside_weights, bbox_outside_weights,
     area_c = (xc2 - xc1) * (yc2 - yc1) + 1e-7
     miouk = iouk - ((area_c - unionk) / area_c)
     iou_weights = bbox_inside_weights.view(-1, 4).mean(1) * bbox_outside_weights.view(-1, 4).mean(1)
-    iouk = ((1 - iouk) * iou_weights).sum(0) / output.size(0)
-    miouk = ((1 - miouk) * iou_weights).sum(0) / output.size(0)
+    iouk = ((1 - iouk) * iou_weights).sum(0) / batch_size
+    miouk = ((1 - miouk) * iou_weights).sum(0) / batch_size
 
     return iouk, miouk