Skip to content

Commit

Permalink
Merge pull request #3096 from beam2d/double-grad-check
Browse files Browse the repository at this point in the history
Add a second-order gradient check
  • Loading branch information
unnonouno committed Aug 18, 2017
2 parents b9b12f9 + d22faa5 commit 383ead7
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 13 deletions.
91 changes: 79 additions & 12 deletions chainer/gradient_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,18 +239,7 @@ def check_backward(func, x_data, y_grad, params=(),
# Note that `func` may not be a `Function` object.
y = identity.Identity().apply(y)

if y_grad is not None:
if len(y) != len(y_grad):
raise ValueError(
'`y_grad` must have the same length of output values')
for iy, igy in six.moves.zip(y, y_grad):
iy.grad = igy
else:
if len(y) != 1:
raise ValueError(
'When `y_grad` is `None`, the function must return a'
'zero-dimentional array')
y_grad = (1,)
y_grad = _set_y_grad(y, y_grad)

# We only need to call `backward` for one result `Variable`.
# `Variable.backward` method calls `Function.backward` of its creator.
Expand Down Expand Up @@ -337,3 +326,81 @@ def g():
gx_accum += gpi.dot(pi)

testing.assert_allclose(gx, gx_accum, atol=atol, rtol=rtol)


def check_double_backward(func, x_data, y_grad, x_grad_grad, params=(),
eps=1e-3, atol=1e-4, rtol=1e-3, no_grads=None,
dtype=None):
"""Test twice differentiation of a given procedure.
This function automatically checks if the backward procedure of ``func``
is correctly implemented for further differentiation. It first computes the
gradient of ``func`` w.r.t. its inputs in the same way as
:func:`~chainer.gradient_check.check_backward`. This function then further
invokes the backward procedure against the gradient variables, starting
from the initial gradient given by ``x_grad_grad``. It also computes the
second gradient using :func:`~chainer.gradient_check.numerical_grad`. The
resulting gradients are compared to confirm if the second-order gradients
are approximately correct.
Note that this function **DOES NOT** check if the first-order
differentiation is correct; the numerical gradient assumes that the
first-order gradient given by the usual :meth:`chainer.Variable.backward`
is correct. The implementation of each differentiable function should be
tested by :func:`~chainer.gradient_check.check_backward` first, and then
should be tested by this function if neccessary.
For the details of the arguments, see
:func:`~chainer.gradient_check.check_backward`. The additional argument
``x_grad_grad`` is a (tuple of) :class:`~chainer.Variable` (s) that
includes the initial gradient corresponding to the first-order gradient of
each input. Note that the default error tolerance ``atol`` and ``rtol`` are
slightly larger than those of
:func:`~chainer.gradient_check.check_backward` because the numerical
gradients of the second order differentiation are less accurate than those
of the first order gradients.
"""
x_data = _as_tuple(x_data)
n_x = len(x_data)

def first_order_grad(*inputs):
xs = inputs[:n_x]
gys = inputs[n_x:]

y = _as_tuple(func(*xs))
# Let all elements of y share the same creator.
# See the comment in check_backward.
y = identity.Identity().apply(y)

_set_y_grad(y, gys)
y[0].backward()

ret = tuple([x.grad_var for x in xs])
for x in xs:
x.grad_var = None
return ret

inputs = x_data + _as_tuple(y_grad)
check_backward(first_order_grad, inputs, x_grad_grad, params=params,
eps=eps, atol=atol, rtol=rtol, no_grads=no_grads,
dtype=dtype)


def _set_y_grad(y, y_grad):
if y_grad is not None:
if len(y) != len(y_grad):
raise ValueError(
'`y_grad` must have the same length of output values')
for iy, igy in six.moves.zip(y, y_grad):
if isinstance(igy, variable.Variable):
iy.grad_var = igy
else:
iy.grad = igy
else:
if len(y) != 1:
raise ValueError(
'When `y_grad` is `None`, the function must return a'
'zero-dimentional array')
y_grad = (1,)
return y_grad
33 changes: 32 additions & 1 deletion tests/chainer_tests/test_gradient_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def backward(self, inputs, grads):
return grads


# numpy.float16 is not tested because it is low precision.
# numpy.float16 is not tested because of the low precision.
@testing.parameterize(*testing.product({
'dtype': [None, numpy.float32, numpy.float64],
}))
Expand Down Expand Up @@ -367,4 +367,35 @@ def f(x, y):
gradient_check.check_backward(f, (x1, x2), g1, no_grads=[False, True])


class NewIdent(chainer.FunctionNode):

def forward(self, inputs):
return inputs

def backward(self, indexes, grad_outputs):
return NewIdent().apply(grad_outputs)


class TestCheckDoubleBackward(unittest.TestCase):

def check_multiple_input_output(self, xp):
arrays = xp.ones((6, 1), dtype='f')
x1, x2, gy1, gy2, ggx1, ggx2 = arrays

def f(x, y):
w1 = x + y
w2 = w1 + y
return w1 * w1, w2 * w2

gradient_check.check_double_backward(f, (x1, x2), (gy1, gy2),
(ggx1, ggx2))

def test_multiple_input_output_cpu(self):
self.check_multiple_input_output(numpy)

@attr.gpu
def test_multiple_input_output_gpu(self):
self.check_multiple_input_output(cuda.cupy)


testing.run_module(__name__, __file__)

0 comments on commit 383ead7

Please sign in to comment.