Merge 1cdf7e8 into 383ead7

chainer · Aug 18, 2017 · 68c6f62 · 68c6f62
2 parents 383ead7 + 1cdf7e8
commit 68c6f62
Show file tree

Hide file tree

Showing 2 changed files with 113 additions and 18 deletions.
diff --git a/chainer/functions/activation/relu.py b/chainer/functions/activation/relu.py
@@ -2,7 +2,7 @@
 
 import chainer
 from chainer import cuda
-from chainer import function
+from chainer import function_node
 from chainer import utils
 from chainer.utils import type_check
 
@@ -12,48 +12,103 @@
     _mode = cudnn.cudnn.CUDNN_ACTIVATION_RELU
 
 
-class ReLU(function.Function):
+class ReLU(function_node.FunctionNode):
 
     """Rectified Linear Unit."""
     # TODO(beam2d): Implement in-place version.
 
+    _use_cudnn = False
+
     def check_type_forward(self, in_types):
         type_check.expect(
             in_types.size() == 1,
             in_types[0].dtype.kind == 'f',
         )
 
     def forward_cpu(self, x):
-        self.retain_inputs(())
         self.retain_outputs((0,))
         return utils.force_array(numpy.maximum(x[0], 0, dtype=x[0].dtype)),
 
     def forward_gpu(self, x):
         if chainer.should_use_cudnn('==always') and x[0].flags.c_contiguous:
+            # cupy.activation_backward requires the input.
+            # So, we retain it for backward computation.
+            self.retain_inputs((0,))
             self._use_cudnn = True
             y = cudnn.activation_forward(x[0], _mode)
         else:
-            self.retain_inputs(())
-            self._use_cudnn = False
             y = cuda.cupy.maximum(x[0], 0)
         self.retain_outputs((0,))
         return y,
 
-    def backward_cpu(self, x, gy):
-        y = self.output_data[0]
-        return utils.force_array(gy[0] * (y > 0)),
-
-    def backward_gpu(self, x, gy):
-        y = self.output_data[0]
+    def backward(self, indexes, gy):
+        y = self.get_retained_outputs()[0]
         if chainer.should_use_cudnn('==always') and self._use_cudnn:
-            gx = cudnn.activation_backward(x[0], y, gy[0], _mode)
+            x = self.get_retained_inputs()[0]
+            return ReLUGrad3().apply((x, y, gy[0]))
         else:
-            gx = cuda.elementwise(
-                'T y, T gy', 'T gx',
-                'gx = y > 0 ? gy : (T)0',
-                'relu_bwd')(y, gy[0])
+            return ReLUGrad2().apply((y, gy[0]))
+
+
+def _heaviside(x):
+    return utils.force_array((x.data > 0).astype(x.dtype))
+
+
+class ReLUGrad2(function_node.FunctionNode):
+
+    def forward_cpu(self, inputs):
+        self.retain_inputs((0,))
+        b, c = inputs
+        y = (b > 0) * c
+        return utils.force_array(y, dtype=y.dtype),
+
+    def forward_gpu(self, inputs):
+        self.retain_inputs((0,))
+        b, c = inputs
+        gx = cuda.elementwise(
+            'T y, T gy', 'T gx',
+            'gx = y > 0 ? gy : (T)0',
+            'relu_bwd')(b, c)
         return gx,
 
+    def backward(self, indexes, gy):
+        ret = []
+        if 0 in indexes:
+            ret.append(None)
+        if 1 in indexes:
+            b = self.get_retained_inputs()[0]
+            gc = gy[0] * _heaviside(b)
+            ret.append(gc)
+        return ret
+
+
+class ReLUGrad3(function_node.FunctionNode):
+
+    def forward_cpu(self, inputs):
+        self.retain_inputs((0,))
+        _, b, c = inputs
+        y = (b > 0) * c
+        return y,
+
+    def forward_gpu(self, inputs):
+        self.retain_inputs((1,))
+        a, b, c = inputs
+        assert chainer.should_use_cudnn('==always')
+        y = cudnn.activation_backward(a, b, c, _mode)
+        return y,
+
+    def backward(self, indexes, gy):
+        ret = []
+        if 0 in indexes:
+            ret.append(None)
+        if 1 in indexes:
+            ret.append(None)
+        if 2 in indexes:
+            b = self.get_retained_inputs()[0]
+            gc = gy[0] * _heaviside(b)
+            ret.append(gc)
+        return ret
+
 
 def relu(x):
     """Rectified Linear Unit function.
@@ -81,4 +136,5 @@ def relu(x):
         (3, 2)
 
     """
-    return ReLU()(x)
+    y, = ReLU().apply((x,))
+    return y
diff --git a/tests/chainer_tests/functions_tests/activation_tests/test_relu.py b/tests/chainer_tests/functions_tests/activation_tests/test_relu.py
@@ -25,6 +25,7 @@ def setUp(self):
             if -0.1 < self.x[i] < 0.1:
                 self.x[i] = 0.5
         self.gy = numpy.random.uniform(-1, 1, self.shape).astype(self.dtype)
+        self.ggx = numpy.random.uniform(-1, 1, self.shape).astype(self.dtype)
         self.check_backward_options = {}
         if self.dtype == numpy.float16:
             self.check_backward_options = {'dtype': numpy.float64}
@@ -60,7 +61,7 @@ def test_forward_gpu_no_cudnn(self):
     def check_backward(self, x_data, y_grad, use_cudnn='always'):
         with chainer.using_config('use_cudnn', use_cudnn):
             gradient_check.check_backward(
-                functions.ReLU(), x_data, y_grad,
+                functions.relu, x_data, y_grad,
                 **self.check_backward_options)
 
     @condition.retry(3)
@@ -83,6 +84,44 @@ def test_backward_gpu_non_contiguous(self):
     def test_backward_cpu_no_cudnn(self):
         self.check_backward(cuda.to_gpu(self.x), cuda.to_gpu(self.gy), 'never')
 
+    def check_double_backward(self, x_data, y_grad, x_grad_grad,
+                              use_cudnn='always'):
+        def f(x):
+            x = functions.relu(x)
+            return x * x
+
+        with chainer.using_config('use_cudnn', use_cudnn):
+            gradient_check.check_double_backward(
+                f, x_data, y_grad, x_grad_grad,
+                **self.check_backward_options)
+
+    @condition.retry(1)
+    def test_double_backward_cpu(self):
+        self.check_double_backward(self.x, self.gy, self.ggx)
+
+    @attr.gpu
+    @condition.retry(1)
+    def test_double_backward_gpu(self):
+        self.check_double_backward(cuda.to_gpu(self.x),
+                                   cuda.to_gpu(self.gy),
+                                   cuda.to_gpu(self.ggx))
+
+    @attr.gpu
+    @condition.retry(3)
+    def test_double_backward_gpu_non_contiguous(self):
+        self.check_double_backward(
+            cuda.cupy.asfortranarray(cuda.to_gpu(self.x)),
+            cuda.cupy.asfortranarray(cuda.to_gpu(self.gy)),
+            cuda.cupy.asfortranarray(cuda.to_gpu(self.ggx)))
+
+    @attr.gpu
+    @condition.retry(3)
+    def test_double_backward_cpu_no_cudnn(self):
+        self.check_double_backward(cuda.to_gpu(self.x),
+                                   cuda.to_gpu(self.gy),
+                                   cuda.to_gpu(self.ggx),
+                                   'never')
+
 
 @testing.parameterize(*testing.product({
     'use_cudnn': ['always', 'auto', 'never'],