/
function.py
539 lines (400 loc) · 17.8 KB
/
function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
import warnings
import weakref
import six
from chainer.backends import cuda
from chainer import configuration
# for backward compatibility
from chainer.function_hook import FunctionHook # NOQA
from chainer import function_node
from chainer import variable
def no_backprop_mode():
"""Make a context manager which disables back-propagation.
In this context, Chainer does not make a computational graph. It has the
benefit of reducing memory consumption. However, a
:class:`~chainer.Variable` created in this context does not hold a
reference to the :class:`~chainer.FunctionNode` that created itself so no
gradients are accumulated by :func:`~chainer.Variable.backward`.
In the following example, ``y`` is created in this context, which means
that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on
the gradients of ``x``.
>>> x = chainer.Variable(np.array([1,], np.float32))
>>> with chainer.no_backprop_mode():
... y = x + 1
>>> y.backward()
>>> x.grad is None
True
.. seealso::
See :func:`force_backprop_mode` for details on how to override this
context.
"""
return configuration.using_config('enable_backprop', False)
def force_backprop_mode():
"""Make a context manager which enables back-propagation.
When you want to enable back-propagation in :func:`no_backprop_mode`, call
this method. A :class:`~chainer.Variable` created in this context always
has a computational graph unless overridden by deeper contexts. If you call
this method outside of :func:`no_backprop_mode` context, it changes
nothing.
In the following example, ``y`` has a computational graph and calling
:func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the
gradients of the variables in the graph, in this case only ``x``.
>>> x = chainer.Variable(np.array([1,], np.float32))
>>> with chainer.no_backprop_mode():
... with chainer.force_backprop_mode():
... y = x + 1
>>> y.backward()
>>> x.grad
array([1.], dtype=float32)
.. seealso::
See :func:`no_backprop_mode` for details on disabled back-propagation
mode.
"""
return configuration.using_config('enable_backprop', True)
class FunctionAdapter(function_node.FunctionNode):
"""Adapter class to wrap Function with FunctionNode.
While :class:`~chainer.FunctionNode` provides the interface
of new-style differentiable functions, the old-style
:class:`~chainer.Function` can still be used for the backward
compatibility.
This class provides an adapter of there interface; it adds
:class:`~chainer.FunctionNode` interface to any
:class:`~chainer.Function` object by delegation.
.. note::
The ownership of :class:`FunctionAdapter` and :class:`~chainer.Function`
is a bit tricky.
At the initialization, :class:`FunctionAdapter` is owned by the
:class:`~chainer.Function` object.
Once the function is applied to variables, the ownership is reversed;
the adapter becomes the owner of the
:class:`~chainer.Function` object and the :class:`~chainer.Function`
object changes the reference to a weak one.
Args:
function (~chainer.Function): The function object to wrap.
.. versionadded:: 3.0.0
"""
_function = None
_weak_function = None
def __init__(self, function):
super(FunctionAdapter, self).__init__()
self._weak_function = weakref.ref(function)
function._owned_node = self
@property
def function(self):
"""The :class:`Function` object that this adapter is wrapping."""
func = self._function
if func is not None:
return func
weak_func = self._weak_function
return weak_func and weak_func()
@property
def label(self):
return self._function.label
@property
def _impl_name(self):
return self._function.__class__.__name__
def check_type_forward(self, in_types):
self._function.check_type_forward(in_types)
def forward(self, inputs):
# Retain all inputs by default in old-style functions.
self.retain_inputs(six.moves.range(len(inputs)))
return self._function.forward(inputs)
def backward(self, target_input_indexes, grad_outputs):
in_data = tuple([input.data for input in self.inputs])
grad_out_data = tuple([None if grad is None else grad.data
for grad in grad_outputs])
with cuda.get_device_from_array(*(in_data + grad_out_data)):
gxs = self._function.backward(in_data, grad_out_data)
for x, gx in six.moves.zip(self.inputs, gxs):
variable._check_grad_type(self, x, gx)
ret = []
for i in target_input_indexes:
if gxs[i] is None:
g = None
else:
# Intentionally not passing requires_grad=False so that
# backprop routines can raise an error when a further backprop
# is attempted against this gradient variable.
g = variable.Variable(gxs[i])
g.node._old_style_grad_generator = self._function.label
ret.append(g)
return tuple(ret)
class Function(object):
"""Old-style interface of a differentiable function.
This class provides an interface to implement an old-style differentiable
function (i.e., the function application is recorded to the computational
graph). The subclass of :class:`Function` that implement :meth:`forward`
and :meth:`backward` can be used to run the forward computation and
automatically induce the backpropagation procedure.
There is another way to implement such a function: subclassing
:class:`~chainer.FunctionNode`. There are mainly two
differences between them.
1. The *differentiable backprop* is available for
:class:`~chainer.FunctionNode`,
while it is not for :class:`Function` because the :meth:`backward`
of the latter directly operates on the arrays instead of
:class:`Variable` objects so that it cannot record the history of
the computation.
2. The information passed to :meth:`backward` is different. In
:class:`~chainer.FunctionNode`,
which inputs the function node has to compute
the gradients w.r.t. is passed so that it can omit unnecessary
computations, while :class:`Function` always has to compute gradients
w.r.t. all the input nodes.
The :class:`~chainer.FunctionNode` also accepts the
current gradient values of the input nodes so that the accumulation
work can be merged with the gradient computation if an efficient kernel
is available.
This class uses :class:`~chainer.FunctionAdapter` to convert
the interface to that of :class:`~chainer.FunctionNode` and
adds the :class:`~chainer.FunctionNode` object to the
computational graph.
See :class:`~chainer.FunctionNode` for the details of
building the computational graph in Chainer.
"""
_node = None
_owned_node = None
def __call__(self, *inputs):
"""Applies forward propagation with chaining backward references.
This method creates a new :class:`~chainer.FunctionAdapter`
object and runs the forward propagation using it.
See :class:`~chainer.FunctionNode` for the detailed
behavior of building the computational graph.
Args:
inputs: Tuple of input :class:`Variable`, :class:`numpy.ndarray` or
:class:`cupy.ndarray` objects.
If the input is an :class:`numpy.ndarray` or a
:class:`cupy.ndarray`, it is automatically wrapped with
:class:`Variable`.
Returns:
One :class:`Variable` object or a tuple of multiple
:class:`Variable` objects.
"""
node = self.node
# Swap the ownership
node._function = self
node._weak_function = None
self._node = weakref.ref(node)
self._owned_node = None
ret = node.apply(inputs)
if len(ret) == 1:
return ret[0]
else:
return tuple(ret)
@property
def inputs(self):
"""The input nodes of the function."""
return self.node.inputs
@property
def outputs(self):
"""Weak references to the output nodes of the function."""
return self.node.outputs
@property
def node(self):
"""The :class:`FunctionAdapter` object that wraps this Function.
If the Function does not have a node object, this property
automatically creates a new one.
"""
noderef = self._node
nd = (noderef and noderef()) or self._owned_node
if nd is not None:
return nd
nd = FunctionAdapter(self)
self._owned_node = nd
return nd
@property
def local_function_hooks(self):
"""Ordered Dictionary of registered function hooks.
See :attr:`FunctionNode.local_function_hooks` for the detail.
"""
return self.node.local_function_hooks
@property
def label(self):
"""Short text that represents the function.
The default implementation returns its type name.
Each function should override it to give more information.
"""
return self.__class__.__name__
@property
def output_data(self):
"""A tuple of the retained output arrays.
It has the same length as the :attr:`outputs`. Elements that are not
retained are set to ``None``.
"""
return self.node.output_data
@property
def rank(self):
"""The topological ordinal of the corresponding function node."""
return self.node.rank
@property
def stack(self):
return self.node.stack
def check_type_forward(self, in_types):
"""Checks types of input data before forward propagation.
Before :meth:`forward` is called, this function is called.
You need to validate types of input data in this function
using :ref:`the type checking utilities <type-check-utils>`.
Args:
in_types (~chainer.utils.type_check.TypeInfoTuple): The type
information of input data for :meth:`forward`.
"""
pass
def forward(self, inputs):
"""Applies forward propagation to input arrays.
It delegates the procedure to :meth:`forward_cpu` or
:meth:`forward_gpu` by default. Which it selects is determined by the
type of input arrays.
Implementations of :class:`Function` must implement either CPU/GPU
methods or this method.
Args:
inputs: Tuple of input array(s).
Returns:
Tuple of output array(s).
.. warning::
Implementations of :class:`Function` must take care that the
return value must be a tuple even if it returns only one array.
"""
if any(isinstance(x, cuda.ndarray) for x in inputs):
return self.forward_gpu(inputs)
else:
return self.forward_cpu(inputs)
def forward_cpu(self, inputs):
"""Applies forward propagation to input arrays on CPU.
Args:
inputs: Tuple of :class:`numpy.ndarray` object(s).
Returns:
tuple: Tuple of :class:`numpy.ndarray` object(s).
.. warning::
Implementations of :class:`Function` must take care that the
return value must be a tuple even if it returns only one array.
"""
raise NotImplementedError()
def forward_gpu(self, inputs):
"""Applies forward propagation to input arrays on GPU.
Args:
inputs: Tuple of :class:`cupy.ndarray` object(s).
Returns:
tuple: Tuple of :class:`cupy.ndarray` object(s).
.. warning::
Implementations of :class:`Function` must take care that the
return value must be a tuple even if it returns only one array.
"""
raise NotImplementedError()
def backward(self, inputs, grad_outputs):
"""Applies backprop to output gradient arrays.
It delegates the procedure to :meth:`backward_cpu` or
:meth:`backward_gpu` by default. Which it selects is determined by the
type of input arrays and output gradient arrays. Implementations of
:class:`Function` must implement either CPU/GPU methods or this method,
if the function is intended to be backprop-ed.
Args:
inputs: Tuple of input arrays.
grad_outputs: Tuple of output gradient arrays.
Returns:
tuple: Tuple of input gradient arrays. Some or all of them can be
``None``, if the function is not differentiable on
inputs.
.. warning::
Implementations of :class:`Function` must take care that the
return value must be a tuple even if it returns only one array.
"""
if any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs):
return self.backward_gpu(inputs, grad_outputs)
else:
return self.backward_cpu(inputs, grad_outputs)
def backward_cpu(self, inputs, grad_outputs):
"""Applies backprop to output gradient arrays on CPU.
Args:
inputs: Tuple of input :class:`numpy.ndarray` object(s).
grad_outputs: Tuple of output gradient :class:`numpy.ndarray`
object(s).
Returns:
tuple: Tuple of input gradient :class:`numpy.ndarray` object(s).
Some or all of them can be ``None``, if the function is not
differentiable on corresponding inputs.
.. warning::
Implementations of :class:`Function` must take care that the
return value must be a tuple even if it returns only one array.
"""
return tuple(None for _ in inputs)
def backward_gpu(self, inputs, grad_outputs):
"""Applies backprop to output gradient arrays on GPU.
Args:
inputs: Tuple of input :class:`cupy.ndarray`
object(s).
grad_outputs: Tuple of output gradient
:class:`cupy.ndarray` object(s).
Returns:
tuple: Tuple of input gradient :class:`cupy.ndarray`
object(s). Some or all of them can be ``None``, if the function is
not differentiable on corresponding inputs.
.. warning::
Implementations of :class:`Function` must take care that the
return value must be a tuple even if it returns only one array.
"""
return tuple(None for _ in inputs)
def unchain(self):
"""Purges in/out nodes and this function itself from the graph.
See :meth:`FunctionNode.unchain() <chainer.FunctionNode.unchain>`
for the detail.
"""
self.node.unchain()
def add_hook(self, hook, name=None):
"""Registers a function hook.
See :meth:`FunctionNode.add_hook` for the detail.
Args:
hook(~chainer.FunctionHook):
Function hook to be registered.
name(str): Name of the function hook.
name must be unique among function hooks
registered to the function. If ``None``,
default name of the function hook is used.
"""
self.node.add_hook(hook, name)
def delete_hook(self, name):
"""Unregisters the specified function hook.
Args:
name(str): the name of the function hook
to be unregistered.
"""
self.node.delete_hook(name)
def retain_inputs(self, indexes):
"""Lets specified input variable nodes keep data arrays.
By calling this method from :meth:`forward`, the function can specify
which inputs are required for backprop.
If this method is not called, the function keeps all input arrays. If
you want to release all input arrays, call this method by passing an
empty sequence. *Note that this behavior is different from that of*
:meth:`FunctionNode.retain_inputs() \
<chainer.FunctionNode.retain_inputs>`.
Note that **this method must not be called from the outside of**
:meth:`forward`.
Args:
indexes (iterable of int): Indexes of input variables that the
function will require for backprop.
"""
self.node.retain_inputs(indexes)
def retain_outputs(self, indexes, retain_after_backward=False):
"""Lets specified output variable nodes keep data arrays.
By calling this method from :meth:`forward`, the function can specify
which outputs are required for backprop. If this method is not called,
any output variables are not marked to keep the data array at the point
of returning from :meth:`__call__`. The retained arrays are stored to
:attr:`output_data`.
.. note::
It is STRONGLY RECOMMENDED to use this method if the function
requires some or all output arrays in backprop. The function can
also use output arrays just by keeping references to them directly,
whereas it might influence on the performance of later function
applications to the output variables.
Note that **this method must not be called from the outside of**
:meth:`forward`.
Args:
indexes (iterable of int): Indexes of input variables that the
function will require for backprop.
retain_after_backward (bool): This option has no effect. It is
left only for the backward compatibility.
"""
if retain_after_backward:
warnings.warn('retain_after_backward option has no effect',
DeprecationWarning)
self.node.retain_outputs(indexes)