From b1cb12d726078589bf99b31ab66f553a1ab717a9 Mon Sep 17 00:00:00 2001
From: Richard Calland <rcalland@users.noreply.github.com>
Date: Wed, 28 Jun 2017 15:30:17 +0900
Subject: [PATCH 1/8] Fetch the SVHN dataset

SVHN is very similar to MNIST. I added code that will download and create datasets for train and test samples the same way `get_mnist()` does.
---
 chainer/datasets/svhn.py | 73 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 chainer/datasets/svhn.py

diff --git a/chainer/datasets/svhn.py b/chainer/datasets/svhn.py
new file mode 100644
index 000000000000..d9c1d56dc1fa
--- /dev/null
+++ b/chainer/datasets/svhn.py
@@ -0,0 +1,73 @@
+import os
+
+import numpy
+from scipy import io
+
+
+from chainer.dataset import download
+from chainer.datasets import tuple_dataset
+
+def get_svhn(withlabel=True, scale=1., dtype=numpy.float32, label_dtype=numpy.int32):
+    """Gets the SVHN dataset.
+
+    `SVHN <http://ufldl.stanford.edu/housenumbers/>` is a dataset similar to MNIST but
+    composed of cropped images of house numbers. The functionality is identical to the MNIST dataset,
+    with the exception that there is no ``ndim`` argument.
+
+    Args:
+        withlabel (bool): If ``True``, it returns datasets with labels. In this
+            case, each example is a tuple of an image and a label. Otherwise,
+            the datasets only contain images.
+        scale (float): Pixel value scale. If it is 1 (default), pixels are
+            scaled to the interval ``[0, 1]``.
+        dtype: Data type of resulting image arrays.
+        label_dtype: Data type of the labels.
+
+    Returns:
+        A tuple of two datasets. If ``withlabel`` is ``True``, both datasets
+        are :class:`~chainer.datasets.TupleDataset` instances. Otherwise, both
+        datasets are arrays of images.
+
+    """
+    train_raw = _retrieve_svhn_training()
+    train = _preprocess_svhn(train_raw, withlabel, scale, dtype,
+                              label_dtype)
+    test_raw = _retrieve_svhn_test()
+    test = _preprocess_svhn(test_raw, withlabel, scale, dtype,
+                             label_dtype)
+    return train, test
+
+def _preprocess_svhn(raw, withlabel, scale, image_dtype, label_dtype):
+    images = raw["x"].transpose(3,2,0,1)
+    images = images.astype(image_dtype)
+    images *= scale / 255.
+
+    labels = raw["y"].astype(label_dtype).flatten()
+    # labels go from 1-10, but we want zero-ed 0-9
+    labels -= 1
+
+    if withlabel:
+        return tuple_dataset.TupleDataset(images, labels)
+    else:
+        return images
+
+def _retrieve_svhn_training():
+    return _retrieve_svhn("train.npz", "http://ufldl.stanford.edu/housenumbers/train_32x32.mat")
+
+def _retrieve_svhn_test():
+    return _retrieve_svhn("test.npz", "http://ufldl.stanford.edu/housenumbers/test_32x32.mat")
+
+def _retrieve_svhn(name, url):
+    root = download.get_dataset_directory('pfnet/chainer/svhn')
+    path = os.path.join(root, name)
+    return download.cache_or_load_file(
+        path, lambda path: _make_npz(path, url), numpy.load)
+
+def _make_npz(path, url):
+    _path = download.cached_download(url)
+    raw = io.loadmat(_path)
+    images = raw["X"].astype(numpy.uint8)
+    labels = raw["y"].astype(numpy.uint8)
+
+    numpy.savez_compressed(path, x=images, y=labels)
+    return {'x': images, 'y': labels}

From 71d804a312ebdbd2844a96b5f963c0fc357a2de3 Mon Sep 17 00:00:00 2001
From: Richard Calland <rcalland@users.noreply.github.com>
Date: Tue, 4 Jul 2017 15:59:13 +0900
Subject: [PATCH 2/8] Fix label ordering

Made label ordering intuitive
---
 chainer/datasets/svhn.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chainer/datasets/svhn.py b/chainer/datasets/svhn.py
index d9c1d56dc1fa..d87c99eaa32d 100644
--- a/chainer/datasets/svhn.py
+++ b/chainer/datasets/svhn.py
@@ -3,7 +3,6 @@
 import numpy
 from scipy import io
 
-
 from chainer.dataset import download
 from chainer.datasets import tuple_dataset
 
@@ -43,8 +42,9 @@ def _preprocess_svhn(raw, withlabel, scale, image_dtype, label_dtype):
     images *= scale / 255.
 
     labels = raw["y"].astype(label_dtype).flatten()
-    # labels go from 1-10, but we want zero-ed 0-9
-    labels -= 1
+    # labels go from 1-10, with the digit "0" having label 10.
+    # Set "0" to be label 0 to restore expected ordering
+    labels[labels==10] = 0
 
     if withlabel:
         return tuple_dataset.TupleDataset(images, labels)

From 8738dabe194bd9ec81d91e368abbb3f9ee64b630 Mon Sep 17 00:00:00 2001
From: Richard Calland <rcalland@users.noreply.github.com>
Date: Tue, 4 Jul 2017 16:00:21 +0900
Subject: [PATCH 3/8] Update __init__.py for svhn dataset

---
 chainer/datasets/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/chainer/datasets/__init__.py b/chainer/datasets/__init__.py
index b9e2f431f6a1..a43d90c376ae 100644
--- a/chainer/datasets/__init__.py
+++ b/chainer/datasets/__init__.py
@@ -2,6 +2,7 @@
 from chainer.datasets import dict_dataset  # NOQA
 from chainer.datasets import image_dataset  # NOQA
 from chainer.datasets import mnist  # NOQA
+from chainer.datasets import svhn # NOQA
 from chainer.datasets import ptb  # NOQA
 from chainer.datasets import sub_dataset  # NOQA
 from chainer.datasets import transform_dataset  # NOQA

From 02ae96e691c3d545059092d720e5dfb77d0ae6f1 Mon Sep 17 00:00:00 2001
From: rcalland <richard.calland@ipmu.jp>
Date: Mon, 10 Jul 2017 17:41:48 +0900
Subject: [PATCH 4/8] fixed flake8 errors

---
 chainer/datasets/__init__.py |  2 +-
 chainer/datasets/svhn.py     | 28 +++++++++++++++++++---------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/chainer/datasets/__init__.py b/chainer/datasets/__init__.py
index a43d90c376ae..f7b5b13e981d 100644
--- a/chainer/datasets/__init__.py
+++ b/chainer/datasets/__init__.py
@@ -2,7 +2,7 @@
 from chainer.datasets import dict_dataset  # NOQA
 from chainer.datasets import image_dataset  # NOQA
 from chainer.datasets import mnist  # NOQA
-from chainer.datasets import svhn # NOQA
+from chainer.datasets import svhn  # NOQA
 from chainer.datasets import ptb  # NOQA
 from chainer.datasets import sub_dataset  # NOQA
 from chainer.datasets import transform_dataset  # NOQA
diff --git a/chainer/datasets/svhn.py b/chainer/datasets/svhn.py
index d87c99eaa32d..bd1024acc0fd 100644
--- a/chainer/datasets/svhn.py
+++ b/chainer/datasets/svhn.py
@@ -6,11 +6,14 @@
 from chainer.dataset import download
 from chainer.datasets import tuple_dataset
 
-def get_svhn(withlabel=True, scale=1., dtype=numpy.float32, label_dtype=numpy.int32):
+
+def get_svhn(withlabel=True, scale=1., dtype=numpy.float32,
+             label_dtype=numpy.int32):
     """Gets the SVHN dataset.
 
-    `SVHN <http://ufldl.stanford.edu/housenumbers/>` is a dataset similar to MNIST but
-    composed of cropped images of house numbers. The functionality is identical to the MNIST dataset,
+    `SVHN <http://ufldl.stanford.edu/housenumbers/>` is a dataset
+    similar to MNIST but composed of cropped images of house numbers.
+    The functionality is identical to the MNIST dataset,
     with the exception that there is no ``ndim`` argument.
 
     Args:
@@ -30,32 +33,38 @@ def get_svhn(withlabel=True, scale=1., dtype=numpy.float32, label_dtype=numpy.in
     """
     train_raw = _retrieve_svhn_training()
     train = _preprocess_svhn(train_raw, withlabel, scale, dtype,
-                              label_dtype)
+                             label_dtype)
     test_raw = _retrieve_svhn_test()
     test = _preprocess_svhn(test_raw, withlabel, scale, dtype,
-                             label_dtype)
+                            label_dtype)
     return train, test
 
+
 def _preprocess_svhn(raw, withlabel, scale, image_dtype, label_dtype):
-    images = raw["x"].transpose(3,2,0,1)
+    images = raw["x"].transpose(3, 2, 0, 1)
     images = images.astype(image_dtype)
     images *= scale / 255.
 
     labels = raw["y"].astype(label_dtype).flatten()
     # labels go from 1-10, with the digit "0" having label 10.
     # Set "0" to be label 0 to restore expected ordering
-    labels[labels==10] = 0
+    labels[labels == 10] = 0
 
     if withlabel:
         return tuple_dataset.TupleDataset(images, labels)
     else:
         return images
 
+
 def _retrieve_svhn_training():
-    return _retrieve_svhn("train.npz", "http://ufldl.stanford.edu/housenumbers/train_32x32.mat")
+    url = "http://ufldl.stanford.edu/housenumbers/train_32x32.mat"
+    return _retrieve_svhn("train.npz", url)
+
 
 def _retrieve_svhn_test():
-    return _retrieve_svhn("test.npz", "http://ufldl.stanford.edu/housenumbers/test_32x32.mat")
+    url = "http://ufldl.stanford.edu/housenumbers/test_32x32.mat"
+    return _retrieve_svhn("test.npz", url)
+
 
 def _retrieve_svhn(name, url):
     root = download.get_dataset_directory('pfnet/chainer/svhn')
@@ -63,6 +72,7 @@ def _retrieve_svhn(name, url):
     return download.cache_or_load_file(
         path, lambda path: _make_npz(path, url), numpy.load)
 
+
 def _make_npz(path, url):
     _path = download.cached_download(url)
     raw = io.loadmat(_path)

From b8e0f3845871124fef438580b824fe99f52c8eea Mon Sep 17 00:00:00 2001
From: rcalland <richard.calland@ipmu.jp>
Date: Wed, 12 Jul 2017 11:24:35 +0900
Subject: [PATCH 5/8] added get_svhn to __init__.py

---
 chainer/datasets/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/chainer/datasets/__init__.py b/chainer/datasets/__init__.py
index f7b5b13e981d..a75b376dcecf 100644
--- a/chainer/datasets/__init__.py
+++ b/chainer/datasets/__init__.py
@@ -17,6 +17,7 @@
 from chainer.datasets.image_dataset import ImageDataset  # NOQA
 from chainer.datasets.image_dataset import LabeledImageDataset  # NOQA
 from chainer.datasets.mnist import get_mnist  # NOQA
+from chainer.datasets.svhn import get_svhn  # NOQA
 from chainer.datasets.ptb import get_ptb_words  # NOQA
 from chainer.datasets.ptb import get_ptb_words_vocabulary  # NOQA
 from chainer.datasets.sub_dataset import get_cross_validation_datasets  # NOQA

From 25410270f2659526e8e5ac72da53867d48a74c0c Mon Sep 17 00:00:00 2001
From: rcalland <richard.calland@ipmu.jp>
Date: Thu, 13 Jul 2017 17:43:30 +0900
Subject: [PATCH 6/8] fix alphabetical order of imports

---
 chainer/datasets/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chainer/datasets/__init__.py b/chainer/datasets/__init__.py
index a75b376dcecf..292f991386a1 100644
--- a/chainer/datasets/__init__.py
+++ b/chainer/datasets/__init__.py
@@ -2,9 +2,9 @@
 from chainer.datasets import dict_dataset  # NOQA
 from chainer.datasets import image_dataset  # NOQA
 from chainer.datasets import mnist  # NOQA
-from chainer.datasets import svhn  # NOQA
 from chainer.datasets import ptb  # NOQA
 from chainer.datasets import sub_dataset  # NOQA
+from chainer.datasets import svhn  # NOQA
 from chainer.datasets import transform_dataset  # NOQA
 from chainer.datasets import tuple_dataset  # NOQA
 
@@ -17,7 +17,6 @@
 from chainer.datasets.image_dataset import ImageDataset  # NOQA
 from chainer.datasets.image_dataset import LabeledImageDataset  # NOQA
 from chainer.datasets.mnist import get_mnist  # NOQA
-from chainer.datasets.svhn import get_svhn  # NOQA
 from chainer.datasets.ptb import get_ptb_words  # NOQA
 from chainer.datasets.ptb import get_ptb_words_vocabulary  # NOQA
 from chainer.datasets.sub_dataset import get_cross_validation_datasets  # NOQA
@@ -27,5 +26,6 @@
 from chainer.datasets.sub_dataset import split_dataset_n_random  # NOQA
 from chainer.datasets.sub_dataset import split_dataset_random  # NOQA
 from chainer.datasets.sub_dataset import SubDataset  # NOQA
+from chainer.datasets.svhn import get_svhn  # NOQA
 from chainer.datasets.transform_dataset import TransformDataset  # NOQA
 from chainer.datasets.tuple_dataset import TupleDataset  # NOQA

From 31fc9de93010e3e9048056896df4262b2cb124a1 Mon Sep 17 00:00:00 2001
From: rcalland <richard.calland@ipmu.jp>
Date: Mon, 31 Jul 2017 13:04:14 +0900
Subject: [PATCH 7/8] add check for scipy availability

---
 chainer/datasets/svhn.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/chainer/datasets/svhn.py b/chainer/datasets/svhn.py
index bd1024acc0fd..55c73f22043b 100644
--- a/chainer/datasets/svhn.py
+++ b/chainer/datasets/svhn.py
@@ -1,7 +1,11 @@
 import os
 
 import numpy
-from scipy import io
+try:
+    from scipy import io
+    _scipy_available = True
+except ImportError:
+    _scipy_available = False
 
 from chainer.dataset import download
 from chainer.datasets import tuple_dataset
@@ -31,6 +35,9 @@ def get_svhn(withlabel=True, scale=1., dtype=numpy.float32,
         datasets are arrays of images.
 
     """
+    if not _scipy_available:
+            raise RuntimeError('scipy is not available')
+
     train_raw = _retrieve_svhn_training()
     train = _preprocess_svhn(train_raw, withlabel, scale, dtype,
                              label_dtype)

From f46bacf20a2216af2a97b4619b206e91506ab1d8 Mon Sep 17 00:00:00 2001
From: rcalland <richard.calland@ipmu.jp>
Date: Mon, 31 Jul 2017 13:46:23 +0900
Subject: [PATCH 8/8] fix alignment issue

---
 chainer/datasets/svhn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chainer/datasets/svhn.py b/chainer/datasets/svhn.py
index 55c73f22043b..d27c34e009e8 100644
--- a/chainer/datasets/svhn.py
+++ b/chainer/datasets/svhn.py
@@ -36,7 +36,7 @@ def get_svhn(withlabel=True, scale=1., dtype=numpy.float32,
 
     """
     if not _scipy_available:
-            raise RuntimeError('scipy is not available')
+        raise RuntimeError('scipy is not available')
 
     train_raw = _retrieve_svhn_training()
     train = _preprocess_svhn(train_raw, withlabel, scale, dtype,