apache · piiswrong · Mar 23, 2018 · Mar 17, 2018 · Mar 19, 2018 · Mar 23, 2018
@@ -69,6 +69,51 @@ In the rest of this document, we list routines provided by the `gluon.data` pack
     ImageFolderDataset
 ```
 
+#### Vision Transforms
+
+```eval_rst
+.. currentmodule:: mxnet.gluon.data.vision.transforms
+```
+
+Transforms can be used to augment input data during training. You
+can compose multiple transforms sequentially, for example:
+
+```python
+from mxnet.gluon.data.vision import MNIST, transforms
+from mxnet import gluon
+transform = transforms.Compose([
+    transforms.Resize(300),
+    transforms.RandomResizedCrop(224),
+    transforms.RandomBrightness(0.1),
+    transforms.ToTensor(),
+    transforms.Normalize(0, 1)])
+data = MNIST(train=True).transform_first(transform)
+data_loader = gluon.data.DataLoader(data, batch_size=32, num_workers=1)
+for data, label in data_loader:
+    # do something with data and label
+```
+
+```eval_rst
+.. autosummary::
+    :nosignatures:
+
+    Compose
+    Cast
+    ToTensor
+    Normalize
+    RandomResizedCrop
+    CenterCrop
+    Resize
+    RandomFlipLeftRight
+    RandomFlipTopBottom
+    RandomBrightness
+    RandomContrast
+    RandomSaturation
+    RandomHue
+    RandomColorJitter
+    RandomLighting
+```
+
 ## API Reference
 
 <script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
@@ -84,6 +129,9 @@ In the rest of this document, we list routines provided by the `gluon.data` pack
 
 .. automodule:: mxnet.gluon.data.vision.datasets
     :members:
+
+.. automodule:: mxnet.gluon.data.vision.transforms
+    :members:
 
 ```
 

@@ -32,6 +32,22 @@ class Compose(Sequential):
     ----------
     transforms : list of transform Blocks.
         The list of transforms to be composed.
+
+
+    Inputs:
+        - **data**: input tensor with shape of the first transform Block requires.
+
+    Outputs:
+        - **out**: output tensor with shape of the last transform Block produces.
+
+    Examples
+    --------
+    >>> transformer = transforms.Compose([transforms.Resize(300),
+    ...                                   transforms.CenterCrop(256),
+    ...                                   transforms.ToTensor()])
+    >>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
+    >>> transformer(image)
+    <NDArray 3x256x256 @cpu(0)>
     """
     def __init__(self, transforms):
         super(Compose, self).__init__()
@@ -60,6 +76,13 @@ class Cast(HybridBlock):
     ----------
     dtype : str, default 'float32'
         The target data type, in string or `numpy.dtype`.
+
+
+    Inputs:
+        - **data**: input tensor with arbitrary shape.
+
+    Outputs:
+        - **out**: output tensor with the same shape as `data`.
     """
     def __init__(self, dtype='float32'):
         super(Cast, self).__init__()
@@ -75,6 +98,31 @@ class ToTensor(HybridBlock):
     Converts an image NDArray of shape (H x W x C) in the range
     [0, 255] to a float32 tensor NDArray of shape (C x H x W) in
     the range [0, 1).
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape and uint8 type.
+
+    Outputs:
+        - **out**: output tensor with (C x H x W) shape and float32 type.
+
+    Examples
+    --------
+    >>> transformer = vision.transforms.ToTensor()
+    >>> image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
+    >>> transformer(image)
+    [[[ 0.85490197  0.72156864]
+      [ 0.09019608  0.74117649]
+      [ 0.61960787  0.92941177]
+      [ 0.96470588  0.1882353 ]]
+     [[ 0.6156863   0.73725492]
+      [ 0.46666667  0.98039216]
+      [ 0.44705883  0.45490196]
+      [ 0.01960784  0.8509804 ]]
+     [[ 0.39607844  0.03137255]
+      [ 0.72156864  0.52941179]
+      [ 0.16470589  0.7647059 ]
+      [ 0.05490196  0.70588237]]]
+    <NDArray 3x4x2 @cpu(0)>
     """
     def __init__(self):
         super(ToTensor, self).__init__()
@@ -100,6 +148,13 @@ class Normalize(HybridBlock):
         The mean values.
     std : float or tuple of floats
         The standard deviation values.
+
+
+    Inputs:
+        - **data**: input tensor with (C x H x W) shape.
+
+    Outputs:
+        - **out**: output tensor with the shape as `data`.
     """
     def __init__(self, mean, std):
         super(Normalize, self).__init__()
@@ -129,6 +184,13 @@ class RandomResizedCrop(Block):
     interpolation : int
         Interpolation method for resizing. By default uses bilinear
         interpolation. See OpenCV's resize function for available choices.
+
+
+    Inputs:
+        - **data**: input tensor with (Hi x Wi x C) shape.
+
+    Outputs:
+        - **out**: output tensor with (H x W x C) shape.
     """
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
                  interpolation=2):
@@ -153,6 +215,20 @@ class CenterCrop(Block):
     interpolation : int
         Interpolation method for resizing. By default uses bilinear
         interpolation. See OpenCV's resize function for available choices.
+
+
+    Inputs:
+        - **data**: input tensor with (Hi x Wi x C) shape.
+
+    Outputs:
+        - **out**: output tensor with (H x W x C) shape.
+
+    Examples
+    --------
+    >>> transformer = vision.transforms.CenterCrop(size=(1000, 500))
+    >>> image = mx.nd.random.uniform(0, 255, (2321, 3482, 3)).astype(dtype=np.uint8)
+    >>> transformer(image)
+    <NDArray 500x1000x3 @cpu(0)>
     """
     def __init__(self, size, interpolation=2):
         super(CenterCrop, self).__init__()
@@ -174,6 +250,20 @@ class Resize(Block):
     interpolation : int
         Interpolation method for resizing. By default uses bilinear
         interpolation. See OpenCV's resize function for available choices.
+
+
+    Inputs:
+        - **data**: input tensor with (Hi x Wi x C) shape.
+
+    Outputs:
+        - **out**: output tensor with (H x W x C) shape.
+
+    Examples
+    --------
+    >>> transformer = vision.transforms.Resize(size=(1000, 500))
+    >>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
+    >>> transformer(image)
+    <NDArray 500x1000x3 @cpu(0)>
     """
     def __init__(self, size, interpolation=2):
         super(Resize, self).__init__()
@@ -188,6 +278,12 @@ def forward(self, x):
 class RandomFlipLeftRight(HybridBlock):
     """Randomly flip the input image left to right with a probability
     of 0.5.
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self):
         super(RandomFlipLeftRight, self).__init__()
@@ -199,6 +295,12 @@ def hybrid_forward(self, F, x):
 class RandomFlipTopBottom(HybridBlock):
     """Randomly flip the input image top to bottom with a probability
     of 0.5.
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self):
         super(RandomFlipTopBottom, self).__init__()
@@ -210,6 +312,19 @@ def hybrid_forward(self, F, x):
 class RandomBrightness(HybridBlock):
     """Randomly jitters image brightness with a factor
     chosen from `[max(0, 1 - brightness), 1 + brightness]`.
+
+    Parameters
+    ----------
+    brightness: float
+        How much to jitter brightness. brightness factor is randomly
+        chosen from `[max(0, 1 - brightness), 1 + brightness]`.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self, brightness):
         super(RandomBrightness, self).__init__()
@@ -222,6 +337,19 @@ def hybrid_forward(self, F, x):
 class RandomContrast(HybridBlock):
     """Randomly jitters image contrast with a factor
     chosen from `[max(0, 1 - contrast), 1 + contrast]`.
+
+    Parameters
+    ----------
+    contrast: float
+        How much to jitter contrast. contrast factor is randomly
+        chosen from `[max(0, 1 - contrast), 1 + contrast]`.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self, contrast):
         super(RandomContrast, self).__init__()
@@ -234,6 +362,19 @@ def hybrid_forward(self, F, x):
 class RandomSaturation(HybridBlock):
     """Randomly jitters image saturation with a factor
     chosen from `[max(0, 1 - saturation), 1 + saturation]`.
+
+    Parameters
+    ----------
+    saturation: float
+        How much to jitter saturation. saturation factor is randomly
+        chosen from `[max(0, 1 - saturation), 1 + saturation]`.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self, saturation):
         super(RandomSaturation, self).__init__()
@@ -246,6 +387,19 @@ def hybrid_forward(self, F, x):
 class RandomHue(HybridBlock):
     """Randomly jitters image hue with a factor
     chosen from `[max(0, 1 - hue), 1 + hue]`.
+
+    Parameters
+    ----------
+    hue: float
+        How much to jitter hue. hue factor is randomly
+        chosen from `[max(0, 1 - hue), 1 + hue]`.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self, hue):
         super(RandomHue, self).__init__()
@@ -273,6 +427,13 @@ class RandomColorJitter(HybridBlock):
     hue : float
         How much to jitter hue. hue factor is randomly
         chosen from `[max(0, 1 - hue), 1 + hue]`.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
         super(RandomColorJitter, self).__init__()
@@ -289,6 +450,13 @@ class RandomLighting(HybridBlock):
     ----------
     alpha : float
         Intensity of the image.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) shape.
+
+    Outputs:
+        - **out**: output tensor with same shape as `data`.
     """
     def __init__(self, alpha):
         super(RandomLighting, self).__init__()