Skip to content

Commit

Permalink
[MXNet-111] Doc for gluon.data.transforms (apache#10155)
Browse files Browse the repository at this point in the history
* add transform section

* more examples for vision.transforms

* add overall examples for gluon.vision.transforms

* Update data.md
  • Loading branch information
yzhliu authored and Jin Huang committed Mar 30, 2018
1 parent 45a6bbe commit 8d3e747
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 0 deletions.
48 changes: 48 additions & 0 deletions docs/api/python/gluon/data.md
Expand Up @@ -69,6 +69,51 @@ In the rest of this document, we list routines provided by the `gluon.data` pack
ImageFolderDataset
```

#### Vision Transforms

```eval_rst
.. currentmodule:: mxnet.gluon.data.vision.transforms
```

Transforms can be used to augment input data during training. You
can compose multiple transforms sequentially, for example:

```python
from mxnet.gluon.data.vision import MNIST, transforms
from mxnet import gluon
transform = transforms.Compose([
transforms.Resize(300),
transforms.RandomResizedCrop(224),
transforms.RandomBrightness(0.1),
transforms.ToTensor(),
transforms.Normalize(0, 1)])
data = MNIST(train=True).transform_first(transform)
data_loader = gluon.data.DataLoader(data, batch_size=32, num_workers=1)
for data, label in data_loader:
# do something with data and label
```

```eval_rst
.. autosummary::
:nosignatures:
Compose
Cast
ToTensor
Normalize
RandomResizedCrop
CenterCrop
Resize
RandomFlipLeftRight
RandomFlipTopBottom
RandomBrightness
RandomContrast
RandomSaturation
RandomHue
RandomColorJitter
RandomLighting
```

## API Reference

<script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
Expand All @@ -84,6 +129,9 @@ In the rest of this document, we list routines provided by the `gluon.data` pack
.. automodule:: mxnet.gluon.data.vision.datasets
:members:
.. automodule:: mxnet.gluon.data.vision.transforms
:members:
```

Expand Down
168 changes: 168 additions & 0 deletions python/mxnet/gluon/data/vision/transforms.py
Expand Up @@ -32,6 +32,22 @@ class Compose(Sequential):
----------
transforms : list of transform Blocks.
The list of transforms to be composed.
Inputs:
- **data**: input tensor with shape of the first transform Block requires.
Outputs:
- **out**: output tensor with shape of the last transform Block produces.
Examples
--------
>>> transformer = transforms.Compose([transforms.Resize(300),
... transforms.CenterCrop(256),
... transforms.ToTensor()])
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 3x256x256 @cpu(0)>
"""
def __init__(self, transforms):
super(Compose, self).__init__()
Expand Down Expand Up @@ -60,6 +76,13 @@ class Cast(HybridBlock):
----------
dtype : str, default 'float32'
The target data type, in string or `numpy.dtype`.
Inputs:
- **data**: input tensor with arbitrary shape.
Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, dtype='float32'):
super(Cast, self).__init__()
Expand All @@ -75,6 +98,31 @@ class ToTensor(HybridBlock):
Converts an image NDArray of shape (H x W x C) in the range
[0, 255] to a float32 tensor NDArray of shape (C x H x W) in
the range [0, 1).
Inputs:
- **data**: input tensor with (H x W x C) shape and uint8 type.
Outputs:
- **out**: output tensor with (C x H x W) shape and float32 type.
Examples
--------
>>> transformer = vision.transforms.ToTensor()
>>> image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
>>> transformer(image)
[[[ 0.85490197 0.72156864]
[ 0.09019608 0.74117649]
[ 0.61960787 0.92941177]
[ 0.96470588 0.1882353 ]]
[[ 0.6156863 0.73725492]
[ 0.46666667 0.98039216]
[ 0.44705883 0.45490196]
[ 0.01960784 0.8509804 ]]
[[ 0.39607844 0.03137255]
[ 0.72156864 0.52941179]
[ 0.16470589 0.7647059 ]
[ 0.05490196 0.70588237]]]
<NDArray 3x4x2 @cpu(0)>
"""
def __init__(self):
super(ToTensor, self).__init__()
Expand All @@ -100,6 +148,13 @@ class Normalize(HybridBlock):
The mean values.
std : float or tuple of floats
The standard deviation values.
Inputs:
- **data**: input tensor with (C x H x W) shape.
Outputs:
- **out**: output tensor with the shape as `data`.
"""
def __init__(self, mean, std):
super(Normalize, self).__init__()
Expand Down Expand Up @@ -129,6 +184,13 @@ class RandomResizedCrop(Block):
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.
Outputs:
- **out**: output tensor with (H x W x C) shape.
"""
def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
interpolation=2):
Expand All @@ -153,6 +215,20 @@ class CenterCrop(Block):
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.
Outputs:
- **out**: output tensor with (H x W x C) shape.
Examples
--------
>>> transformer = vision.transforms.CenterCrop(size=(1000, 500))
>>> image = mx.nd.random.uniform(0, 255, (2321, 3482, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 500x1000x3 @cpu(0)>
"""
def __init__(self, size, interpolation=2):
super(CenterCrop, self).__init__()
Expand All @@ -174,6 +250,20 @@ class Resize(Block):
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.
Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.
Outputs:
- **out**: output tensor with (H x W x C) shape.
Examples
--------
>>> transformer = vision.transforms.Resize(size=(1000, 500))
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 500x1000x3 @cpu(0)>
"""
def __init__(self, size, interpolation=2):
super(Resize, self).__init__()
Expand All @@ -188,6 +278,12 @@ def forward(self, x):
class RandomFlipLeftRight(HybridBlock):
"""Randomly flip the input image left to right with a probability
of 0.5.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self):
super(RandomFlipLeftRight, self).__init__()
Expand All @@ -199,6 +295,12 @@ def hybrid_forward(self, F, x):
class RandomFlipTopBottom(HybridBlock):
"""Randomly flip the input image top to bottom with a probability
of 0.5.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self):
super(RandomFlipTopBottom, self).__init__()
Expand All @@ -210,6 +312,19 @@ def hybrid_forward(self, F, x):
class RandomBrightness(HybridBlock):
"""Randomly jitters image brightness with a factor
chosen from `[max(0, 1 - brightness), 1 + brightness]`.
Parameters
----------
brightness: float
How much to jitter brightness. brightness factor is randomly
chosen from `[max(0, 1 - brightness), 1 + brightness]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, brightness):
super(RandomBrightness, self).__init__()
Expand All @@ -222,6 +337,19 @@ def hybrid_forward(self, F, x):
class RandomContrast(HybridBlock):
"""Randomly jitters image contrast with a factor
chosen from `[max(0, 1 - contrast), 1 + contrast]`.
Parameters
----------
contrast: float
How much to jitter contrast. contrast factor is randomly
chosen from `[max(0, 1 - contrast), 1 + contrast]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, contrast):
super(RandomContrast, self).__init__()
Expand All @@ -234,6 +362,19 @@ def hybrid_forward(self, F, x):
class RandomSaturation(HybridBlock):
"""Randomly jitters image saturation with a factor
chosen from `[max(0, 1 - saturation), 1 + saturation]`.
Parameters
----------
saturation: float
How much to jitter saturation. saturation factor is randomly
chosen from `[max(0, 1 - saturation), 1 + saturation]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, saturation):
super(RandomSaturation, self).__init__()
Expand All @@ -246,6 +387,19 @@ def hybrid_forward(self, F, x):
class RandomHue(HybridBlock):
"""Randomly jitters image hue with a factor
chosen from `[max(0, 1 - hue), 1 + hue]`.
Parameters
----------
hue: float
How much to jitter hue. hue factor is randomly
chosen from `[max(0, 1 - hue), 1 + hue]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, hue):
super(RandomHue, self).__init__()
Expand Down Expand Up @@ -273,6 +427,13 @@ class RandomColorJitter(HybridBlock):
hue : float
How much to jitter hue. hue factor is randomly
chosen from `[max(0, 1 - hue), 1 + hue]`.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
super(RandomColorJitter, self).__init__()
Expand All @@ -289,6 +450,13 @@ class RandomLighting(HybridBlock):
----------
alpha : float
Intensity of the image.
Inputs:
- **data**: input tensor with (H x W x C) shape.
Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, alpha):
super(RandomLighting, self).__init__()
Expand Down

0 comments on commit 8d3e747

Please sign in to comment.