chainer · yuyu2172 · Aug 19, 2017 · Aug 15, 2017 · Aug 15, 2017 · Aug 15, 2017
diff --git a/chainercv/datasets/__init__.py b/chainercv/datasets/__init__.py
@@ -2,6 +2,10 @@
 from chainercv.datasets.camvid.camvid_dataset import camvid_label_colors  # NOQA
 from chainercv.datasets.camvid.camvid_dataset import camvid_label_names  # NOQA
 from chainercv.datasets.camvid.camvid_dataset import CamVidDataset  # NOQA
+from chainercv.datasets.cityscapes.cityscapes_semantic_segmentation_dataset import CityscapesSemanticSegmentationDataset  # NOQA
+from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_label_colors  # NOQA
+from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_label_names  # NOQA
+from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_labels  # NOQA
 from chainercv.datasets.cub.cub_keypoint_dataset import CUBKeypointDataset  # NOQA
 from chainercv.datasets.cub.cub_label_dataset import CUBLabelDataset  # NOQA
 from chainercv.datasets.cub.cub_utils import cub_label_names  # NOQA

diff --git a/chainercv/datasets/cityscapes/__init__.py b/chainercv/datasets/cityscapes/__init__.py
diff --git a/chainercv/datasets/cityscapes/cityscapes_semantic_segmentation_dataset.py b/chainercv/datasets/cityscapes/cityscapes_semantic_segmentation_dataset.py
@@ -0,0 +1,85 @@
+import glob
+import os
+
+import numpy as np
+
+from chainer import dataset
+from chainercv.datasets.cityscapes.cityscapes_utils import cityscapes_labels
+from chainercv.utils import read_image
+
+
+class CityscapesSemanticSegmentationDataset(dataset.DatasetMixin):
+
+    """Dataset class for a semantic segmentation task on `Cityscapes dataset`_.
+
+    .. _`Cityscapes dataset`: https://www.cityscapes-dataset.com
+
+    .. note::
+
+        Please download the data by yourself because Cityscapes dataset doesn't
+        allow to re-distribute their data.
+
+    Args:
+        img_dir (string): Path to the image dir. It should end with
+            ``leftImg8bit``.
+        label_dir (string): Path to the dir which contains labels. It should
+            end with either ``gtFine`` or ``gtCoarse``.
+        split ({'train', 'val'}): Select from dataset splits used in
+            Cityscapes dataset.
+        ignore_labels (bool): If True, the labels marked ``ignoreInEval``
+            defined in the original
+            `cityscapesScripts<https://github.com/mcordts/cityscapesScripts>_`
+            will be replaced with `-1` in the `get_example` method.
+
+    """
+
+    def __init__(self, img_dir, label_dir, split='train', ignore_labels=True):
+        img_dir = os.path.join(img_dir, split)
+        self.ignore_labels = ignore_labels
+
+        self.label_fns, self.img_fns = [], []
+        resol = os.path.basename(label_dir)
+        for dname in glob.glob('{}/*'.format(label_dir)):
+            if split in dname:
+                for label_fn in glob.glob(
+                        '{}/*/*_labelIds.png'.format(dname)):
+                    self.label_fns.append(label_fn)
+        for label_fn in self.label_fns:
+            img_fn = label_fn.replace(resol, 'leftImg8bit')
+            img_fn = img_fn.replace('_labelIds', '')
+            self.img_fns.append(img_fn)
+
+    def __len__(self):
+        return len(self.img_fns)
+
+    def get_example(self, i):
+        """Returns the i-th example.
+
+        Returns a color image and a label image. The color image is in CHW
+        format and the label image is in HW format.
+
+        Args:
+            i (int): The index of the example.
+
+        Returns:
+            tuple of a color image and a label whose shapes are (3, H, W) and
+            (H, W) respectively. H and W are height and width of the image.
+            The dtype of the color image is :obj:`numpy.float32` and
+            the dtype of the label image is :obj:`numpy.int32`.
+
+        """
+        img = read_image(self.img_fns[i])
+        label_orig = read_image(
+            self.label_fns[i], dtype=np.int32, color=False)[0]
+        H, W = label_orig.shape
+        if self.ignore_labels:
+            label_out = np.ones((H, W), dtype=np.int32) * -1
+            for label in cityscapes_labels:
+                if label.ignoreInEval:
+                    label_out[np.where(label_orig == label.id)] = -1
+                else:
+                    label_out[np.where(label_orig == label.id)] = label.trainId
+        else:
+            label_out = label
+        img = img.astype(np.float32)
+        return img, label_out
diff --git a/chainercv/datasets/cityscapes/cityscapes_utils.py b/chainercv/datasets/cityscapes/cityscapes_utils.py
@@ -0,0 +1,54 @@
+# The values used here is copied from cityscapesScripts:
+# https://github.com/mcordts/cityscapesScripts
+
+from collections import namedtuple
+
+
+Label = namedtuple(
+    'Label', ['name', 'id', 'trainId', 'category', 'categoryId',
+              'hasInstances', 'ignoreInEval', 'color'])
+
+cityscapes_labels = tuple([
+    Label('unlabeled', 0, 255, 'void', 0, False, True, (0, 0, 0)),
+    Label('egovehicle', 1, 255, 'void', 0, False, True, (0, 0, 0)),
+    Label('rectificationborder', 2, 255, 'void', 0, False, True, (0, 0, 0)),
+    Label('outofroi', 3, 255, 'void', 0, False, True, (0, 0, 0)),
+    Label('static', 4, 255, 'void', 0, False, True, (0, 0, 0)),
+    Label('dynamic', 5, 255, 'void', 0, False, True, (111, 74, 0)),
+    Label('ground', 6, 255, 'void', 0, False, True, (81, 0, 81)),
+    Label('road', 7, 0, 'flat', 1, False, False, (128, 64, 128)),
+    Label('sidewalk', 8, 1, 'flat', 1, False, False, (244, 35, 232)),
+    Label('parking', 9, 255, 'flat', 1, False, True, (250, 170, 160)),
+    Label('railtrack', 10, 255, 'flat', 1, False, True, (230, 150, 140)),
+    Label('building', 11, 2, 'construction', 2, False, False, (70, 70, 70)),
+    Label('wall', 12, 3, 'construction', 2, False, False, (102, 102, 156)),
+    Label('fence', 13, 4, 'construction', 2, False, False, (190, 153, 153)),
+    Label(
+        'guardrail', 14, 255, 'construction', 2, False, True, (180, 165, 180)),
+    Label('bridge', 15, 255, 'construction', 2, False, True, (150, 100, 100)),
+    Label('tunnel', 16, 255, 'construction', 2, False, True, (150, 120, 90)),
+    Label('pole', 17, 5, 'object', 3, False, False, (153, 153, 153)),
+    Label('polegroup', 18, 255, 'object', 3, False, True, (153, 153, 153)),
+    Label('trafficlight', 19, 6, 'object', 3, False, False, (250, 170, 30)),
+    Label('trafficsign', 20, 7, 'object', 3, False, False, (220, 220, 0)),
+    Label('vegetation', 21, 8, 'nature', 4, False, False, (107, 142, 35)),
+    Label('terrain', 22, 9, 'nature', 4, False, False, (152, 251, 152)),
+    Label('sky', 23, 10, 'sky', 5, False, False, (70, 130, 180)),
+    Label('person', 24, 11, 'human', 6, True, False, (220, 20, 60)),
+    Label('rider', 25, 12, 'human', 6, True, False, (255, 0, 0)),
+    Label('car', 26, 13, 'vehicle', 7, True, False, (0, 0, 142)),
+    Label('truck', 27, 14, 'vehicle', 7, True, False, (0, 0, 70)),
+    Label('bus', 28, 15, 'vehicle', 7, True, False, (0, 60, 100)),
+    Label('caravan', 29, 255, 'vehicle', 7, True, True, (0, 0, 90)),
+    Label('trailer', 30, 255, 'vehicle', 7, True, True, (0, 0, 110)),
+    Label('train', 31, 16, 'vehicle', 7, True, False, (0, 80, 100)),
+    Label('motorcycle', 32, 17, 'vehicle', 7, True, False, (0, 0, 230)),
+    Label('bicycle', 33, 18, 'vehicle', 7, True, False, (119, 11, 32)),
+    Label('licenseplate', -1, -1, 'vehicle', 7, False, True, (0, 0, 142)),
+])
+
+cityscapes_label_names = tuple(
+    l.name for l in cityscapes_labels if not l.ignoreInEval)
+
+cityscapes_label_colors = tuple(
+    l.color for l in cityscapes_labels if not l.ignoreInEval)
diff --git a/tests/datasets_tests/cityscapes_tests/test_cityscapes.py b/tests/datasets_tests/cityscapes_tests/test_cityscapes.py
@@ -0,0 +1,59 @@
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+from PIL import Image
+
+from chainer import testing
+from chainer.testing import attr
+from chainercv.datasets import cityscapes_label_names
+from chainercv.datasets import CityscapesSemanticSegmentationDataset
+from chainercv.utils import assert_is_semantic_segmentation_dataset
+
+
+@testing.parameterize(
+    {'split': 'train'},
+    {'split': 'val'}
+)
+class TestCityscapesSemanticSegmentationDataset(unittest.TestCase):
+
+    def setUp(self):
+        self.temp_dir = tempfile.mkdtemp()
+        img_dir = os.path.join(
+            self.temp_dir, 'leftImg8bit/{}/aachen'.format(self.split))
+        label_dir = os.path.join(
+            self.temp_dir, 'gtFine/{}/aachen'.format(self.split))
+        os.makedirs(img_dir)
+        os.makedirs(label_dir)
+
+        for i in range(10):
+            img = np.random.randint(0, 255, size=(128, 160, 3))
+            img = Image.fromarray(img.astype(np.uint8))
+            img.save(os.path.join(
+                img_dir, 'aachen_000000_0000{:02d}_leftImg8bit.png'.format(i)))
+
+            label = np.random.randint(0, 20, size=(128, 160)).astype(np.uint8)
+            label = Image.fromarray(np.zeros((128, 160), dtype=np.uint8))
+            label.save(os.path.join(
+                label_dir,
+                'aachen_000000_0000{:02d}_gtFine_labelIds.png'.format(i)))
+
+        img_dir = os.path.join(self.temp_dir, 'leftImg8bit')
+        label_dir = os.path.join(self.temp_dir, 'gtFine')
+        if self.split == 'test':
+            label_dir = None
+        self.dataset = CityscapesSemanticSegmentationDataset(
+            img_dir, label_dir, self.split)
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir)
+
+    @attr.slow
+    def test_cityscapes_semantic_segmentation_dataset(self):
+        assert_is_semantic_segmentation_dataset(
+            self.dataset, len(cityscapes_label_names), n_example=10)
+
+
+testing.run_module(__name__, __file__)