Merge pull request #3183 from unnonouno/new-embed_id

New EmbedID
chainer · Aug 23, 2017 · 69be927 · 69be927
2 parents adba7b8 + 0bfdec6
commit 69be927
Show file tree

Hide file tree

Showing 2 changed files with 174 additions and 8 deletions.
diff --git a/chainer/functions/connection/embed_id.py b/chainer/functions/connection/embed_id.py
@@ -3,11 +3,11 @@
 
 import chainer
 from chainer import cuda
-from chainer import function
+from chainer import function_node
 from chainer.utils import type_check
 
 
-class EmbedIDFunction(function.Function):
+class EmbedIDFunction(function_node.FunctionNode):
 
     def __init__(self, ignore_label=None):
         self.ignore_label = ignore_label
@@ -25,7 +25,9 @@ def check_type_forward(self, in_types):
         )
 
     def forward(self, inputs):
+        self.retain_inputs((0,))
         x, W = inputs
+        self._w_shape = W.shape
 
         if not type_check.same_types(*inputs):
             raise ValueError('numpy and cupy must not be used together\n'
@@ -48,11 +50,25 @@ def forward(self, inputs):
 
         return W.take(x, axis=0),
 
-    def backward(self, inputs, grad_outputs):
+    def backward(self, indexes, grad_outputs):
+        inputs = self.get_retained_inputs()
+        gW = EmbedIDGrad(
+            self._w_shape, self.ignore_label).apply(inputs + grad_outputs)[0]
+        return None, gW
+
+
+class EmbedIDGrad(function_node.FunctionNode):
+
+    def __init__(self, w_shape, ignore_label=None):
+        self.w_shape = w_shape
+        self.ignore_label = ignore_label
+
+    def forward(self, inputs):
+        self.retain_inputs((0,))
         xp = cuda.get_array_module(*inputs)
-        x, W = inputs
-        gy = grad_outputs[0]
-        gW = xp.zeros_like(W)
+        x, gy = inputs
+        self._gy_shape = gy.shape
+        gW = xp.zeros(self.w_shape, dtype=gy.dtype)
 
         if xp is numpy:
             # It is equivalent to `numpy.add.at(gW, x, gy)` but ufunc.at is
@@ -81,7 +97,27 @@ def backward(self, inputs, grad_outputs):
                     'embed_id_bwd_ignore_label')(
                         gy, xp.expand_dims(x, -1), gW.shape[1],
                         self.ignore_label, gW)
-        return None, gW
+        return gW,
+
+    def backward(self, indexes, grads):
+        xp = cuda.get_array_module(*grads)
+        x = self.get_retained_inputs()[0].data
+        ggW = grads[0]
+
+        if self.ignore_label is not None:
+            mask = x == self.ignore_label
+            # To prevent index out of bounds, we need to check if ignore_label
+            # is inside of W.
+            if not (0 <= self.ignore_label < self.w_shape[1]):
+                x = xp.where(mask, 0, x)
+
+        ggy = ggW[x]
+
+        if self.ignore_label is not None:
+            mask, zero, _ = xp.broadcast_arrays(
+                mask[..., None], xp.zeros((), 'f'), ggy.data)
+            ggy = chainer.functions.where(mask, zero, ggy)
+        return None, ggy
 
 
 def embed_id(x, W, ignore_label=None):
@@ -131,4 +167,4 @@ def embed_id(x, W, ignore_label=None):
                [ 0.,  0.,  0.]], dtype=float32)
 
     """
-    return EmbedIDFunction(ignore_label=ignore_label)(x, W)
+    return EmbedIDFunction(ignore_label=ignore_label).apply((x, W))[0]
diff --git a/tests/chainer_tests/functions_tests/connection_tests/test_embed_id.py b/tests/chainer_tests/functions_tests/connection_tests/test_embed_id.py
@@ -0,0 +1,130 @@
+import unittest
+
+import numpy
+
+import chainer
+from chainer import cuda
+from chainer.functions.connection import embed_id
+from chainer import gradient_check
+from chainer import testing
+from chainer.testing import attr
+from chainer.testing import condition
+
+
+@testing.parameterize(
+    {'x_data': [0, 1, 0], 'ignore_label': None},
+    {'x_data': [[0, 1, 0], [1, 0, 1]], 'ignore_label': None},
+    {'x_data': [0, 1, -1], 'ignore_label': -1},
+    {'x_data': [[0, 1, -1], [-1, 0, 1]], 'ignore_label': -1},
+)
+class TestEmbedID(unittest.TestCase):
+
+    def setUp(self):
+        self.x = numpy.array(self.x_data, dtype=numpy.int32)
+        self.W = numpy.random.uniform(-1, 1, (3, 2)).astype('f')
+        y_shape = self.x.shape + (2,)
+        self.gy = numpy.random.uniform(-1, 1, y_shape).astype(numpy.float32)
+        self.ggW = numpy.random.uniform(-1, 1, (3, 2)).astype('f')
+
+    def check_forward(self, x_data, W_data):
+        x = chainer.Variable(x_data)
+        W = chainer.Variable(W_data)
+        y = chainer.functions.embed_id(x, W, self.ignore_label)
+        self.assertEqual(y.data.dtype, numpy.float32)
+
+        y_expect = numpy.empty_like(self.gy)
+        for i in numpy.ndindex(self.x.shape):
+            if self.x[i] == -1:
+                y_expect[i] = 0
+            else:
+                y_expect[i] = self.W[int(self.x[i])]
+
+        testing.assert_allclose(y_expect, y.data, atol=0, rtol=0)
+
+    @condition.retry(3)
+    def test_forward_cpu(self):
+        self.check_forward(self.x, self.W)
+
+    @attr.gpu
+    @condition.retry(3)
+    def test_forward_gpu(self):
+        self.check_forward(cuda.to_gpu(self.x), cuda.to_gpu(self.W))
+
+    def check_backward(self, x_data, W_data, y_grad):
+        def f(x, W):
+            return chainer.functions.embed_id(x, W, self.ignore_label)
+
+        gradient_check.check_backward(f, (x_data, W_data), y_grad)
+
+    @condition.retry(3)
+    def test_backward_cpu(self):
+        self.check_backward(self.x, self.W, self.gy)
+
+    @attr.gpu
+    @condition.retry(3)
+    def test_backward_gpu(self):
+        self.check_backward(
+            cuda.to_gpu(self.x), cuda.to_gpu(self.W), cuda.to_gpu(self.gy))
+
+    def check_double_backward(self, x_data, W_data, gy_data, ggW_data):
+        def f(W):
+            y = chainer.functions.embed_id(
+                x_data, W, self.ignore_label)
+            return y * y
+
+        gradient_check.check_double_backward(
+            f,  W_data, gy_data, ggW_data)
+
+    @condition.retry(3)
+    def test_double_backward_cpu(self):
+        self.check_double_backward(self.x, self.W, self.gy, self.ggW)
+
+    @attr.gpu
+    @condition.retry(3)
+    def test_double_backward_gpu(self):
+        self.check_double_backward(
+            cuda.to_gpu(self.x), cuda.to_gpu(self.W), cuda.to_gpu(self.gy),
+            cuda.to_gpu(self.ggW))
+
+
+@testing.parameterize(
+    {'x_data': [0, 1, 0], 'ignore_label': None},
+    {'x_data': [[0, 1, 0], [1, 0, 1]], 'ignore_label': None},
+    {'x_data': [0, 1, -1], 'ignore_label': -1},
+    {'x_data': [[0, 1, -1], [-1, 0, 1]], 'ignore_label': -1},
+    {'x_data': [0, 1, 2], 'ignore_label': 2},
+    {'x_data': [[0, 1, 0], [1, 0, 1]], 'ignore_label': 1},
+)
+class TestEmbedIdGrad(unittest.TestCase):
+
+    n_unit = (4,)
+    w_shape = (4, 2)
+
+    def setUp(self):
+        self.x = numpy.array(self.x_data, dtype='i')
+        self.gy = numpy.random.uniform(
+            -1, 1, self.x.shape + (2,)).astype('f')
+        self.ggW = numpy.random.uniform(-1, 1, self.w_shape).astype('f')
+
+    def check_backward(self, x, gy, ggW):
+        return
+
+        def f(x, gy):
+            emb = embed_id.EmbedIDGrad(
+                self.w_shape, self.ignore_label)
+            return emb.apply((x, numpy.zeros(()), gy))[0]
+
+        gradient_check.check_backward(f, (x, gy), (ggW,))
+
+    @condition.retry(3)
+    def test_backward_cpu(self):
+        self.check_backward(self.x, self.gy, self.ggW)
+
+    @attr.gpu
+    @condition.retry(3)
+    def test_backward_gpu(self):
+        self.check_backward(
+            cuda.to_gpu(self.x), cuda.to_gpu(self.gy), cuda.to_gpu(self.ggW))
+
+
+testing.run_module(__name__, __file__)