flip W<->H for Faster R-CNN train code

chainer · Jun 1, 2017 · 083aa13 · 083aa13
1 parent 5271e99
commit 083aa13
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 9 deletions.
diff --git a/chainercv/links/model/faster_rcnn/faster_rcnn_train_chain.py b/chainercv/links/model/faster_rcnn/faster_rcnn_train_chain.py
@@ -101,7 +101,7 @@ def __call__(self, imgs, bboxes, labels, scale):
             raise ValueError('Currently only batch size 1 is supported.')
 
         _, _, H, W = imgs.shape
-        img_size = (W, H)
+        img_size = (H, W)
 
         features = self.faster_rcnn.extractor(imgs, test=not self.train)
         rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(

diff --git a/chainercv/links/model/faster_rcnn/utils/anchor_target_creator.py b/chainercv/links/model/faster_rcnn/utils/anchor_target_creator.py
@@ -57,7 +57,7 @@ def __call__(self, bbox, anchor, img_size):
                 :math:`(R, 4)`.
             anchor (array): Coordinates of anchors. Its shape is
                 :math:`(S, 4)`.
-            img_size (tuple of ints): A tuple :obj:`W, H`, which
+            img_size (tuple of ints): A tuple :obj:`H, W`, which
                 is a tuple of height and width of an image.
 
         Returns:
@@ -74,10 +74,10 @@ def __call__(self, bbox, anchor, img_size):
         bbox = cuda.to_cpu(bbox)
         anchor = cuda.to_cpu(anchor)
 
-        img_W, img_H = img_size
+        img_H, img_W = img_size
 
         n_anchor = len(anchor)
-        inside_index = _get_inside_index(anchor, img_W, img_H)
+        inside_index = _get_inside_index(anchor, img_H, img_W)
         anchor = anchor[inside_index]
         argmax_ious, label = self._create_label(
             inside_index, anchor, bbox)
@@ -156,7 +156,7 @@ def _unmap(data, count, index, fill=0):
     return ret
 
 
-def _get_inside_index(anchor, W, H):
+def _get_inside_index(anchor, H, W):
     # Calc indicies of anchors which are located completely inside of the image
     # whose size is speficied.
     xp = cuda.get_array_module(anchor)

diff --git a/chainercv/utils/testing/generate_random_bbox.py b/chainercv/utils/testing/generate_random_bbox.py
@@ -6,7 +6,7 @@ def generate_random_bbox(n, img_size, min_length, max_length):
 
     Args:
         n (int): The number of bounding boxes.
-        img_size (tuple): A tuple of length 2. The width and the height
+        img_size (tuple): A tuple of length 2. The height and the width
             of the image on which bounding boxes locate.
         min_length (float): The minimum length of edges of bounding boxes.
         max_length (float): The maximum length of edges of bounding boxes.
@@ -22,7 +22,7 @@ def generate_random_bbox(n, img_size, min_length, max_length):
         :math:`min\_length \\leq y_{max} - y_{min} < max\_length`.
 
     """
-    W, H = img_size
+    H, W = img_size
     x_min = np.random.uniform(0, W - max_length, size=(n,))
     y_min = np.random.uniform(0, H - max_length, size=(n,))
     x_max = x_min + np.random.uniform(min_length, max_length, size=(n,))

diff --git a/examples/faster_rcnn/train.py b/examples/faster_rcnn/train.py
@@ -57,12 +57,12 @@ def transform(in_data):
         img = faster_rcnn.prepare(img)
         _, o_H, o_W = img.shape
         scale = o_H / H
-        bbox = transforms.resize_bbox(bbox, (W, H), (o_W, o_H))
+        bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W))
 
         # horizontally flip
         img, params = transforms.random_flip(
             img, x_random=True, return_param=True)
-        bbox = transforms.flip_bbox(bbox, (o_W, o_H), params['x_flip'])
+        bbox = transforms.flip_bbox(bbox, (o_H, o_W), params['x_flip'])
 
         return img, bbox, label, scale
     train_data = TransformDataset(train_data, transform)