/
dropout.py
179 lines (148 loc) · 6.12 KB
/
dropout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import numpy
import chainer
from chainer.backends import cuda
from chainer.backends import intel64
from chainer import configuration
from chainer import function_node
from chainer.utils import argument
from chainer.utils import type_check
class Dropout(function_node.FunctionNode):
"""Dropout regularization."""
def __init__(self, dropout_ratio, mask=None):
if not 0.0 <= dropout_ratio < 1.0:
raise ValueError('dropout_ratio must be in the range [0, 1)')
self.dropout_ratio = dropout_ratio
self.mask = mask
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 1)
type_check.expect(in_types[0].dtype.kind == 'f')
def forward(self, x):
if (intel64.should_use_ideep('>=auto')
and intel64.inputs_all_ready(x)
and self.mask is None):
return self._forward_ideep(x)
if self.mask is not None:
y = x[0] * self.mask
else:
scale = x[0].dtype.type(1. / (1 - self.dropout_ratio))
xp = cuda.get_array_module(*x)
if xp == numpy:
flag = xp.random.rand(*x[0].shape) >= self.dropout_ratio
self.mask = scale * flag
y = x[0] * self.mask
else:
rand = xp.random.rand(*x[0].shape, dtype=numpy.float32)
self.mask, y = cuda.elementwise(
'T x, R r, T scale, T ratio', 'T mask, T y',
'''
mask = (r >= ratio) * scale;
y = x * mask;
''',
'dropout_fwd',
)(x[0], rand, scale, self.dropout_ratio)
return y,
def _forward_ideep(self, x):
mask, y = intel64.ideep.dropout.Forward(
intel64.ideep.array(x[0]),
self.dropout_ratio)
self.mask = mask
return y,
def backward(self, x, gy):
return DropoutGrad(self.mask).apply(gy)
class DropoutGrad(function_node.FunctionNode):
"""Computes the gradient of the Dropout function."""
def __init__(self, mask):
self.mask = mask
def forward(self, inputs):
if (intel64.should_use_ideep('>=auto')
and intel64.inputs_all_ready(inputs)):
return self._forward_ideep(inputs)
y = inputs[0] * self.mask
return y,
def _forward_ideep(self, inputs):
return intel64.ideep.dropout.Backward(
intel64.ideep.array(self.mask),
intel64.ideep.array(inputs[0])),
def backward(self, indexes, gy):
return DropoutGrad(self.mask).apply(gy)
def dropout(x, ratio=.5, **kwargs):
"""dropout(x, ratio=.5, *, mask=None, return_mask=False)
Drops elements of input variable randomly.
This function drops input elements randomly with probability ``ratio`` and
scales the remaining elements by factor ``1 / (1 - ratio)``. In testing
mode (i.e., ``chainer.config.train`` is set to ``False``), it does nothing
and just returns ``x``.
.. warning::
``train`` argument is not supported anymore since v2.
Instead, use ``chainer.using_config('train', boolean)``.
See :func:`chainer.using_config`.
Args:
x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`):
Input variable. A :math:`(s_1, s_2, ..., s_N)` -shaped float array.
ratio (float):
Dropout ratio. The ``ratio`` must be ``0.0 <= ratio < 1.0``.
mask (`ndarray` or None):
The mask to be used for dropout.
You do not have to specify this value, unless you need to make
results deterministic.
If ``mask`` is not specified or set to ``None``, a mask will be
generated randomly according to the given ``ratio``.
If ``mask`` is specified, ``ratio`` will be ignored.
The shape and dtype must be the same as ``x`` and should be on the
same device.
Note that iDeep will not be used for this function if mask is
specified, as iDeep does not support it.
return_mask (bool):
If ``True``, the mask used for dropout is returned together with
the output variable.
The returned mask can later be reused by passing it to ``mask``
argument.
Returns:
~chainer.Variable or tuple:
When ``return_mask`` is ``False`` (default), returns the output
variable.
When ``True``, returns the tuple of the output variable and
mask (`ndarray`). The mask will be on the same device as the input.
The mask will become ``None`` when ``chainer.config.train`` is set
to ``False``.
See the paper by G. Hinton: `Improving neural networks by preventing \
co-adaptation of feature detectors <https://arxiv.org/abs/1207.0580>`_.
.. admonition:: Example
>>> x = np.array([[-1, 0], [2, -3], [-2, 1]], np.float32)
>>> with chainer.using_config('train', True):
... y = F.dropout(x)
>>> y.data
array([[-2., 0.],
[ 4., -6.],
[-0., 2.]], dtype=float32)
>>> with chainer.using_config('train', True):
... y = F.dropout(x, ratio=0.0) \
# dropout returns original input if ratio=0.0
>>> (x == y.data).all()
True
>>> with chainer.using_config('train', False):
... y = F.dropout(x) \
# dropout in test mode returns original input
>>> (x == y.data).all()
True
"""
mask = None
return_mask = False
if kwargs:
argument.check_unexpected_kwargs(
kwargs, train='train argument is not supported anymore. '
'Use chainer.using_config')
mask, return_mask = argument.parse_kwargs(
kwargs, ('mask', mask), ('return_mask', return_mask))
argument.assert_kwargs_empty(kwargs)
if configuration.config.train:
func = Dropout(ratio, mask)
out, = func.apply((x,))
mask = func.mask
else:
out = chainer.as_variable(x)
mask = None
if return_mask:
return out, mask
return out