Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNet-111] Doc for gluon.data.transforms #10155

Merged
merged 4 commits into from Mar 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions docs/api/python/gluon/data.md
Expand Up @@ -69,6 +69,51 @@ In the rest of this document, we list routines provided by the `gluon.data` pack
ImageFolderDataset
```

#### Vision Transforms

```eval_rst
.. currentmodule:: mxnet.gluon.data.vision.transforms
```

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add an example. Something like

from mxnet.gluon.data.vision import MNIST, transforms�
transform = transforms.Compose([
    transforms.Resize(300),
    transforms.RandomResizedCrop(224),
    transforms.RandomBrightness(0.1),
    transforms.ToTensor(),
    transforms.Normalize([0,0,0], [1,1,1])])

data = MNIST(train=True).transform_first(transform)

data_loader = gluon.data.DataLoader(
    data, batch_size=64, num_workers=4)

for data, label in data_loader:
    …�

Transforms can be used to augment input data during training. You
can compose multiple transforms sequentially, for example:

```python
from mxnet.gluon.data.vision import MNIST, transforms
from mxnet import gluon
transform = transforms.Compose([
transforms.Resize(300),
transforms.RandomResizedCrop(224),
transforms.RandomBrightness(0.1),
transforms.ToTensor(),
transforms.Normalize(0, 1)])
data = MNIST(train=True).transform_first(transform)
data_loader = gluon.data.DataLoader(data, batch_size=32, num_workers=1)
for data, label in data_loader:
# do something with data and label
```

```eval_rst
.. autosummary::
:nosignatures:

Compose
Cast
ToTensor
Normalize
RandomResizedCrop
CenterCrop
Resize
RandomFlipLeftRight
RandomFlipTopBottom
RandomBrightness
RandomContrast
RandomSaturation
RandomHue
RandomColorJitter
RandomLighting
```

## API Reference

<script type="text/javascript" src='../../../_static/js/auto_module_index.js'></script>
Expand All @@ -84,6 +129,9 @@ In the rest of this document, we list routines provided by the `gluon.data` pack

.. automodule:: mxnet.gluon.data.vision.datasets
:members:

.. automodule:: mxnet.gluon.data.vision.transforms
:members:

```

Expand Down
168 changes: 168 additions & 0 deletions python/mxnet/gluon/data/vision/transforms.py
Expand Up @@ -32,6 +32,22 @@ class Compose(Sequential):
----------
transforms : list of transform Blocks.
The list of transforms to be composed.


Inputs:
- **data**: input tensor with shape of the first transform Block requires.

Outputs:
- **out**: output tensor with shape of the last transform Block produces.

Examples
--------
>>> transformer = transforms.Compose([transforms.Resize(300),
... transforms.CenterCrop(256),
... transforms.ToTensor()])
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 3x256x256 @cpu(0)>
"""
def __init__(self, transforms):
super(Compose, self).__init__()
Expand Down Expand Up @@ -60,6 +76,13 @@ class Cast(HybridBlock):
----------
dtype : str, default 'float32'
The target data type, in string or `numpy.dtype`.


Inputs:
- **data**: input tensor with arbitrary shape.

Outputs:
- **out**: output tensor with the same shape as `data`.
"""
def __init__(self, dtype='float32'):
super(Cast, self).__init__()
Expand All @@ -75,6 +98,31 @@ class ToTensor(HybridBlock):
Converts an image NDArray of shape (H x W x C) in the range
[0, 255] to a float32 tensor NDArray of shape (C x H x W) in
the range [0, 1).

Inputs:
- **data**: input tensor with (H x W x C) shape and uint8 type.

Outputs:
- **out**: output tensor with (C x H x W) shape and float32 type.

Examples
--------
>>> transformer = vision.transforms.ToTensor()
>>> image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
>>> transformer(image)
[[[ 0.85490197 0.72156864]
[ 0.09019608 0.74117649]
[ 0.61960787 0.92941177]
[ 0.96470588 0.1882353 ]]
[[ 0.6156863 0.73725492]
[ 0.46666667 0.98039216]
[ 0.44705883 0.45490196]
[ 0.01960784 0.8509804 ]]
[[ 0.39607844 0.03137255]
[ 0.72156864 0.52941179]
[ 0.16470589 0.7647059 ]
[ 0.05490196 0.70588237]]]
<NDArray 3x4x2 @cpu(0)>
"""
def __init__(self):
super(ToTensor, self).__init__()
Expand All @@ -100,6 +148,13 @@ class Normalize(HybridBlock):
The mean values.
std : float or tuple of floats
The standard deviation values.


Inputs:
- **data**: input tensor with (C x H x W) shape.

Outputs:
- **out**: output tensor with the shape as `data`.
"""
def __init__(self, mean, std):
super(Normalize, self).__init__()
Expand Down Expand Up @@ -129,6 +184,13 @@ class RandomResizedCrop(Block):
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.


Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.

Outputs:
- **out**: output tensor with (H x W x C) shape.
"""
def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
interpolation=2):
Expand All @@ -153,6 +215,20 @@ class CenterCrop(Block):
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.


Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.

Outputs:
- **out**: output tensor with (H x W x C) shape.

Examples
--------
>>> transformer = vision.transforms.CenterCrop(size=(1000, 500))
>>> image = mx.nd.random.uniform(0, 255, (2321, 3482, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 500x1000x3 @cpu(0)>
"""
def __init__(self, size, interpolation=2):
super(CenterCrop, self).__init__()
Expand All @@ -174,6 +250,20 @@ class Resize(Block):
interpolation : int
Interpolation method for resizing. By default uses bilinear
interpolation. See OpenCV's resize function for available choices.


Inputs:
- **data**: input tensor with (Hi x Wi x C) shape.

Outputs:
- **out**: output tensor with (H x W x C) shape.

Examples
--------
>>> transformer = vision.transforms.Resize(size=(1000, 500))
>>> image = mx.nd.random.uniform(0, 255, (224, 224, 3)).astype(dtype=np.uint8)
>>> transformer(image)
<NDArray 500x1000x3 @cpu(0)>
"""
def __init__(self, size, interpolation=2):
super(Resize, self).__init__()
Expand All @@ -188,6 +278,12 @@ def forward(self, x):
class RandomFlipLeftRight(HybridBlock):
"""Randomly flip the input image left to right with a probability
of 0.5.

Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self):
super(RandomFlipLeftRight, self).__init__()
Expand All @@ -199,6 +295,12 @@ def hybrid_forward(self, F, x):
class RandomFlipTopBottom(HybridBlock):
"""Randomly flip the input image top to bottom with a probability
of 0.5.

Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self):
super(RandomFlipTopBottom, self).__init__()
Expand All @@ -210,6 +312,19 @@ def hybrid_forward(self, F, x):
class RandomBrightness(HybridBlock):
"""Randomly jitters image brightness with a factor
chosen from `[max(0, 1 - brightness), 1 + brightness]`.

Parameters
----------
brightness: float
How much to jitter brightness. brightness factor is randomly
chosen from `[max(0, 1 - brightness), 1 + brightness]`.


Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, brightness):
super(RandomBrightness, self).__init__()
Expand All @@ -222,6 +337,19 @@ def hybrid_forward(self, F, x):
class RandomContrast(HybridBlock):
"""Randomly jitters image contrast with a factor
chosen from `[max(0, 1 - contrast), 1 + contrast]`.

Parameters
----------
contrast: float
How much to jitter contrast. contrast factor is randomly
chosen from `[max(0, 1 - contrast), 1 + contrast]`.


Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, contrast):
super(RandomContrast, self).__init__()
Expand All @@ -234,6 +362,19 @@ def hybrid_forward(self, F, x):
class RandomSaturation(HybridBlock):
"""Randomly jitters image saturation with a factor
chosen from `[max(0, 1 - saturation), 1 + saturation]`.

Parameters
----------
saturation: float
How much to jitter saturation. saturation factor is randomly
chosen from `[max(0, 1 - saturation), 1 + saturation]`.


Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, saturation):
super(RandomSaturation, self).__init__()
Expand All @@ -246,6 +387,19 @@ def hybrid_forward(self, F, x):
class RandomHue(HybridBlock):
"""Randomly jitters image hue with a factor
chosen from `[max(0, 1 - hue), 1 + hue]`.

Parameters
----------
hue: float
How much to jitter hue. hue factor is randomly
chosen from `[max(0, 1 - hue), 1 + hue]`.


Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, hue):
super(RandomHue, self).__init__()
Expand Down Expand Up @@ -273,6 +427,13 @@ class RandomColorJitter(HybridBlock):
hue : float
How much to jitter hue. hue factor is randomly
chosen from `[max(0, 1 - hue), 1 + hue]`.


Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
super(RandomColorJitter, self).__init__()
Expand All @@ -289,6 +450,13 @@ class RandomLighting(HybridBlock):
----------
alpha : float
Intensity of the image.


Inputs:
- **data**: input tensor with (H x W x C) shape.

Outputs:
- **out**: output tensor with same shape as `data`.
"""
def __init__(self, alpha):
super(RandomLighting, self).__init__()
Expand Down