Merge pull request #3096 from beam2d/double-grad-check

Add a second-order gradient check
chainer · Aug 18, 2017 · 383ead7 · 383ead7
2 parents b9b12f9 + d22faa5
commit 383ead7
Show file tree

Hide file tree

Showing 2 changed files with 111 additions and 13 deletions.
diff --git a/chainer/gradient_check.py b/chainer/gradient_check.py
@@ -239,18 +239,7 @@ def check_backward(func, x_data, y_grad, params=(),
     # Note that `func` may not be a `Function` object.
     y = identity.Identity().apply(y)
 
-    if y_grad is not None:
-        if len(y) != len(y_grad):
-            raise ValueError(
-                '`y_grad` must have the same length of output values')
-        for iy, igy in six.moves.zip(y, y_grad):
-            iy.grad = igy
-    else:
-        if len(y) != 1:
-            raise ValueError(
-                'When `y_grad` is `None`, the function must return a'
-                'zero-dimentional array')
-        y_grad = (1,)
+    y_grad = _set_y_grad(y, y_grad)
 
     # We only need to call `backward` for one result `Variable`.
     # `Variable.backward` method calls `Function.backward` of its creator.
@@ -337,3 +326,81 @@ def g():
         gx_accum += gpi.dot(pi)
 
     testing.assert_allclose(gx, gx_accum, atol=atol, rtol=rtol)
+
+
+def check_double_backward(func, x_data, y_grad, x_grad_grad, params=(),
+                          eps=1e-3, atol=1e-4, rtol=1e-3, no_grads=None,
+                          dtype=None):
+    """Test twice differentiation of a given procedure.
+
+    This function automatically checks if the backward procedure of ``func``
+    is correctly implemented for further differentiation. It first computes the
+    gradient of ``func`` w.r.t. its inputs in the same way as
+    :func:`~chainer.gradient_check.check_backward`. This function then further
+    invokes the backward procedure against the gradient variables, starting
+    from the initial gradient given by ``x_grad_grad``. It also computes the
+    second gradient using :func:`~chainer.gradient_check.numerical_grad`. The
+    resulting gradients are compared to confirm if the second-order gradients
+    are approximately correct.
+
+    Note that this function **DOES NOT** check if the first-order
+    differentiation is correct; the numerical gradient assumes that the
+    first-order gradient given by the usual :meth:`chainer.Variable.backward`
+    is correct. The implementation of each differentiable function should be
+    tested by :func:`~chainer.gradient_check.check_backward` first, and then
+    should be tested by this function if neccessary.
+
+    For the details of the arguments, see
+    :func:`~chainer.gradient_check.check_backward`. The additional argument
+    ``x_grad_grad`` is a (tuple of) :class:`~chainer.Variable` (s) that
+    includes the initial gradient corresponding to the first-order gradient of
+    each input. Note that the default error tolerance ``atol`` and ``rtol`` are
+    slightly larger than those of
+    :func:`~chainer.gradient_check.check_backward` because the numerical
+    gradients of the second order differentiation are less accurate than those
+    of the first order gradients.
+
+    """
+    x_data = _as_tuple(x_data)
+    n_x = len(x_data)
+
+    def first_order_grad(*inputs):
+        xs = inputs[:n_x]
+        gys = inputs[n_x:]
+
+        y = _as_tuple(func(*xs))
+        # Let all elements of y share the same creator.
+        # See the comment in check_backward.
+        y = identity.Identity().apply(y)
+
+        _set_y_grad(y, gys)
+        y[0].backward()
+
+        ret = tuple([x.grad_var for x in xs])
+        for x in xs:
+            x.grad_var = None
+        return ret
+
+    inputs = x_data + _as_tuple(y_grad)
+    check_backward(first_order_grad, inputs, x_grad_grad, params=params,
+                   eps=eps, atol=atol, rtol=rtol, no_grads=no_grads,
+                   dtype=dtype)
+
+
+def _set_y_grad(y, y_grad):
+    if y_grad is not None:
+        if len(y) != len(y_grad):
+            raise ValueError(
+                '`y_grad` must have the same length of output values')
+        for iy, igy in six.moves.zip(y, y_grad):
+            if isinstance(igy, variable.Variable):
+                iy.grad_var = igy
+            else:
+                iy.grad = igy
+    else:
+        if len(y) != 1:
+            raise ValueError(
+                'When `y_grad` is `None`, the function must return a'
+                'zero-dimentional array')
+        y_grad = (1,)
+    return y_grad
diff --git a/tests/chainer_tests/test_gradient_check.py b/tests/chainer_tests/test_gradient_check.py
@@ -323,7 +323,7 @@ def backward(self, inputs, grads):
         return grads
 
 
-# numpy.float16 is not tested because it is low precision.
+# numpy.float16 is not tested because of the low precision.
 @testing.parameterize(*testing.product({
     'dtype': [None, numpy.float32, numpy.float64],
 }))
@@ -367,4 +367,35 @@ def f(x, y):
         gradient_check.check_backward(f, (x1, x2), g1, no_grads=[False, True])
 
 
+class NewIdent(chainer.FunctionNode):
+
+    def forward(self, inputs):
+        return inputs
+
+    def backward(self, indexes, grad_outputs):
+        return NewIdent().apply(grad_outputs)
+
+
+class TestCheckDoubleBackward(unittest.TestCase):
+
+    def check_multiple_input_output(self, xp):
+        arrays = xp.ones((6, 1), dtype='f')
+        x1, x2, gy1, gy2, ggx1, ggx2 = arrays
+
+        def f(x, y):
+            w1 = x + y
+            w2 = w1 + y
+            return w1 * w1, w2 * w2
+
+        gradient_check.check_double_backward(f, (x1, x2), (gy1, gy2),
+                                             (ggx1, ggx2))
+
+    def test_multiple_input_output_cpu(self):
+        self.check_multiple_input_output(numpy)
+
+    @attr.gpu
+    def test_multiple_input_output_gpu(self):
+        self.check_multiple_input_output(cuda.cupy)
+
+
 testing.run_module(__name__, __file__)