add YOLOv2Tiny

chainer · Oct 18, 2018 · a2e8cb7 · a2e8cb7
1 parent 5f0c195
commit a2e8cb7
Show file tree

Hide file tree

Showing 3 changed files with 197 additions and 74 deletions.
diff --git a/chainercv/links/__init__.py b/chainercv/links/__init__.py
@@ -20,4 +20,5 @@
 from chainercv.links.model.ssd import SSD512  # NOQA
 from chainercv.links.model.vgg import VGG16  # NOQA
 from chainercv.links.model.yolo import YOLOv2  # NOQA
+from chainercv.links.model.yolo import YOLOv2Tiny  # NOQA
 from chainercv.links.model.yolo import YOLOv3  # NOQA
diff --git a/chainercv/links/model/yolo/__init__.py b/chainercv/links/model/yolo/__init__.py
@@ -1,6 +1,8 @@
 from chainercv.links.model.yolo.yolo_base import YOLOBase  # NOQA
+from chainercv.links.model.yolo.yolo_v2 import DarknetExtractor  # NOQA
 from chainercv.links.model.yolo.yolo_v2 import Darknet19Extractor  # NOQA
 from chainercv.links.model.yolo.yolo_v2 import YOLOv2  # NOQA
+from chainercv.links.model.yolo.yolo_v2 import YOLOv2Tiny  # NOQA
 from chainercv.links.model.yolo.yolo_v3 import Darknet53Extractor  # NOQA
 from chainercv.links.model.yolo.yolo_v3 import ResidualBlock  # NOQA
 from chainercv.links.model.yolo.yolo_v3 import YOLOv3  # NOQA
diff --git a/chainercv/links/model/yolo/yolo_v2.py b/chainercv/links/model/yolo/yolo_v2.py
@@ -29,60 +29,7 @@ def _reorg(x):
     return F.reshape(x, (n, c * 4, h // 2, w // 2))
 
 
-class Darknet19Extractor(chainer.ChainList):
-    """A Darknet19 based feature extractor for YOLOv2.
-
-    This is a feature extractor for :class:`~chainercv.links.model.yolo.YOLOv2`
-    """
-
-    insize = 416
-    grid = 13
-
-    def __init__(self):
-        super(Darknet19Extractor, self).__init__()
-
-        # Darknet19
-        for k, n_conv in enumerate((1, 1, 3, 3, 5, 5)):
-            for i in range(n_conv):
-                if i % 2 == 0:
-                    self.append(
-                        Conv2DBNActiv(32 << k, 3, pad=1, activ=_leaky_relu))
-                else:
-                    self.append(
-                        Conv2DBNActiv(32 << (k - 1), 1, activ=_leaky_relu))
-
-        # additional links
-        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
-        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
-        self.append(Conv2DBNActiv(64, 1, activ=_leaky_relu))
-        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
-
-    def __call__(self, x):
-        """Compute a feature map from a batch of images.
-
-        Args:
-            x (ndarray): An array holding a batch of images.
-                The images should be resized to :math:`416\\times 416`.
-
-        Returns:
-            Variable:
-        """
-
-        h = x
-        for i, link in enumerate(self):
-            h = link(h)
-            if i == 12:
-                tmp = h
-            elif i == 19:
-                h, tmp = tmp, h
-            elif i == 20:
-                h = F.concat((_reorg(h), tmp))
-            if i in {0, 1, 4, 7, 12}:
-                h = _maxpool(h)
-        return h
-
-
-class YOLOv2(YOLOBase):
+class YOLOv2Base(YOLOBase):
     """YOLOv2.
 
     This is a model of YOLOv2 [#]_.
@@ -113,24 +60,8 @@ class YOLOv2(YOLOBase):
 
     """
 
-    _models = {
-        'voc0712': {
-            'param': {'n_fg_class': 20},
-            'url': 'https://chainercv-models.preferred.jp/'
-            'yolo_v2_voc0712_converted_2018_05_03.npz',
-            'cv2': True
-        },
-    }
-
-    _anchors = (
-        (1.73145, 1.3221),
-        (4.00944, 3.19275),
-        (8.09892, 5.05587),
-        (4.84053, 9.47112),
-        (10.0071, 11.2364))
-
     def __init__(self, n_fg_class=None, pretrained_model=None):
-        super(YOLOv2, self).__init__()
+        super(YOLOv2Base, self).__init__()
 
         param, path = utils.prepare_pretrained_model(
             {'n_fg_class': n_fg_class}, pretrained_model, self._models)
@@ -139,7 +70,7 @@ def __init__(self, n_fg_class=None, pretrained_model=None):
         self.use_preset('visualize')
 
         with self.init_scope():
-            self.extractor = Darknet19Extractor()
+            self.extractor = self._extractorDarknet19Extractor()
             self.subnet = Convolution2D(
                 len(self._anchors) * (4 + 1 + self.n_fg_class), 1)
 
@@ -153,11 +84,11 @@ def __init__(self, n_fg_class=None, pretrained_model=None):
             chainer.serializers.load_npz(path, self, strict=False)
 
     def to_cpu(self):
-        super(YOLOv2, self).to_cpu()
+        super(YOLOv2Base, self).to_cpu()
         self._default_bbox = cuda.to_cpu(self._default_bbox)
 
     def to_gpu(self, device=None):
-        super(YOLOv2, self).to_gpu(device)
+        super(YOLOv2Base, self).to_gpu(device)
         self._default_bbox = cuda.to_gpu(self._default_bbox, device)
 
     def __call__(self, x):
@@ -233,3 +164,192 @@ def _decode(self, loc, obj, conf):
         score = self.xp.hstack(score).astype(np.float32)
 
         return bbox, label, score
+
+
+class Darknet19Extractor(chainer.ChainList):
+    """A Darknet19 based feature extractor for YOLOv2.
+
+    This is a feature extractor for :class:`~chainercv.links.model.yolo.YOLOv2`
+    """
+
+    insize = 416
+    grid = 13
+
+    def __init__(self):
+        super(Darknet19Extractor, self).__init__()
+
+        # Darknet19
+        for k, n_conv in enumerate((1, 1, 3, 3, 5, 5)):
+            for i in range(n_conv):
+                if i % 2 == 0:
+                    self.append(
+                        Conv2DBNActiv(32 << k, 3, pad=1, activ=_leaky_relu))
+                else:
+                    self.append(
+                        Conv2DBNActiv(32 << (k - 1), 1, activ=_leaky_relu))
+
+        # additional links
+        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
+        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
+        self.append(Conv2DBNActiv(64, 1, activ=_leaky_relu))
+        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
+
+    def __call__(self, x):
+        """Compute a feature map from a batch of images.
+
+        Args:
+            x (ndarray): An array holding a batch of images.
+                The images should be resized to :math:`416\\times 416`.
+
+        Returns:
+            Variable:
+        """
+
+        h = x
+        for i, link in enumerate(self):
+            h = link(h)
+            if i == 12:
+                tmp = h
+            elif i == 19:
+                h, tmp = tmp, h
+            elif i == 20:
+                h = F.concat((_reorg(h), tmp))
+            if i in {0, 1, 4, 7, 12}:
+                h = _maxpool(h)
+        return h
+
+
+class YOLOv2(YOLOv2Base):
+    """YOLOv2.
+
+    This is a model of YOLOv2 [#]_.
+    This model uses :class:`~chainercv.links.model.yolo.Darknet19Extractor` as
+    its feature extractor.
+
+    .. [#] Joseph Redmon, Ali Farhadi.
+       YOLO9000: Better, Faster, Stronger. CVPR 2017.
+
+    Args:
+        n_fg_class (int): The number of classes excluding the background.
+        pretrained_model (string): The weight file to be loaded.
+            This can take :obj:`'voc0712'`, `filepath` or :obj:`None`.
+            The default value is :obj:`None`.
+
+            * :obj:`'voc0712'`: Load weights trained on trainval split of \
+                PASCAL VOC 2007 and 2012. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`20` or :obj:`None`. \
+                These weights were converted from the darknet model \
+                provided by `the original implementation \
+                <https://pjreddie.com/darknet/yolov2/>`_. \
+                The conversion code is \
+                `chainercv/examples/yolo/darknet2npz.py`.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+
+    """
+
+    _extractor = Darknet19Extractor
+
+    _models = {
+        'voc0712': {
+            'param': {'n_fg_class': 20},
+            'url': 'https://chainercv-models.preferred.jp/'
+            'yolo_v2_voc0712_converted_2018_05_03.npz',
+            'cv2': True
+        },
+    }
+
+    _anchors = (
+        (1.73145, 1.3221),
+        (4.00944, 3.19275),
+        (8.09892, 5.05587),
+        (4.84053, 9.47112),
+        (10.0071, 11.2364))
+
+
+class DarknetExtractor(chainer.ChainList):
+    """A Darknet based feature extractor for YOLOv2Tiny.
+
+    This is a feature extractor for
+    :class:`~chainercv.links.model.yolo.YOLOv2Tiny`
+    """
+
+    insize = 416
+    grid = 13
+
+    def __init__(self):
+        super(DarknetExtractor, self).__init__()
+
+        # Darknet
+        for k in range(6):
+            self.append(Conv2DBNActiv(16 << k, 3, pad=1, activ=_leaky_relu))
+
+        # additional link
+        self.append(Conv2DBNActiv(1024, 3, pad=1, activ=_leaky_relu))
+
+    def __call__(self, x):
+        """Compute a feature map from a batch of images.
+
+        Args:
+            x (ndarray): An array holding a batch of images.
+                The images should be resized to :math:`416\\times 416`.
+
+        Returns:
+            Variable:
+        """
+
+        h = x
+        for i, link in enumerate(self):
+            h = link(h)
+            if i < 5:
+                h = _maxpool(h)
+        return h
+
+
+class YOLOv2Tiny(YOLOv2Base):
+    """YOLOv2 tiny.
+
+    This is a model of YOLOv2 tiny a.k.a. Tiny YOLO.
+    This model uses :class:`~chainercv.links.model.yolo.DarknetExtractor` as
+    its feature extractor.
+
+    Args:
+        n_fg_class (int): The number of classes excluding the background.
+        pretrained_model (string): The weight file to be loaded.
+            This can take :obj:`'voc0712'`, `filepath` or :obj:`None`.
+            The default value is :obj:`None`.
+
+            * :obj:`'voc0712'`: Load weights trained on trainval split of \
+                PASCAL VOC 2007 and 2012. \
+                The weight file is downloaded and cached automatically. \
+                :obj:`n_fg_class` must be :obj:`20` or :obj:`None`. \
+                These weights were converted from the darknet model \
+                provided by `the original implementation \
+                <https://pjreddie.com/darknet/yolov2/>`_. \
+                The conversion code is \
+                `chainercv/examples/yolo/darknet2npz.py`.
+            * `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
+                must be specified properly.
+            * :obj:`None`: Do not load weights.
+
+    """
+
+    _extractor = DarknetExtractor
+
+    _models = {
+        'voc0712': {
+            'param': {'n_fg_class': 20},
+            'url': 'https://chainercv-models.preferred.jp/'
+            'yolo_v2_tiny_voc0712_converted_2018_10_19.npz',
+            'cv2': True
+        },
+    }
+
+    _anchors = (
+        (1.19, 1.08),
+        (4.41, 3.42),
+        (11.38, 6.63),
+        (5.11, 9.42),
+        (10.52, 16.62))