Skip to content

Commit

Permalink
Merge pull request #3183 from unnonouno/new-embed_id
Browse files Browse the repository at this point in the history
New EmbedID
  • Loading branch information
beam2d committed Aug 23, 2017
2 parents adba7b8 + 0bfdec6 commit 69be927
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 8 deletions.
52 changes: 44 additions & 8 deletions chainer/functions/connection/embed_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

import chainer
from chainer import cuda
from chainer import function
from chainer import function_node
from chainer.utils import type_check


class EmbedIDFunction(function.Function):
class EmbedIDFunction(function_node.FunctionNode):

def __init__(self, ignore_label=None):
self.ignore_label = ignore_label
Expand All @@ -25,7 +25,9 @@ def check_type_forward(self, in_types):
)

def forward(self, inputs):
self.retain_inputs((0,))
x, W = inputs
self._w_shape = W.shape

if not type_check.same_types(*inputs):
raise ValueError('numpy and cupy must not be used together\n'
Expand All @@ -48,11 +50,25 @@ def forward(self, inputs):

return W.take(x, axis=0),

def backward(self, inputs, grad_outputs):
def backward(self, indexes, grad_outputs):
inputs = self.get_retained_inputs()
gW = EmbedIDGrad(
self._w_shape, self.ignore_label).apply(inputs + grad_outputs)[0]
return None, gW


class EmbedIDGrad(function_node.FunctionNode):

def __init__(self, w_shape, ignore_label=None):
self.w_shape = w_shape
self.ignore_label = ignore_label

def forward(self, inputs):
self.retain_inputs((0,))
xp = cuda.get_array_module(*inputs)
x, W = inputs
gy = grad_outputs[0]
gW = xp.zeros_like(W)
x, gy = inputs
self._gy_shape = gy.shape
gW = xp.zeros(self.w_shape, dtype=gy.dtype)

if xp is numpy:
# It is equivalent to `numpy.add.at(gW, x, gy)` but ufunc.at is
Expand Down Expand Up @@ -81,7 +97,27 @@ def backward(self, inputs, grad_outputs):
'embed_id_bwd_ignore_label')(
gy, xp.expand_dims(x, -1), gW.shape[1],
self.ignore_label, gW)
return None, gW
return gW,

def backward(self, indexes, grads):
xp = cuda.get_array_module(*grads)
x = self.get_retained_inputs()[0].data
ggW = grads[0]

if self.ignore_label is not None:
mask = x == self.ignore_label
# To prevent index out of bounds, we need to check if ignore_label
# is inside of W.
if not (0 <= self.ignore_label < self.w_shape[1]):
x = xp.where(mask, 0, x)

ggy = ggW[x]

if self.ignore_label is not None:
mask, zero, _ = xp.broadcast_arrays(
mask[..., None], xp.zeros((), 'f'), ggy.data)
ggy = chainer.functions.where(mask, zero, ggy)
return None, ggy


def embed_id(x, W, ignore_label=None):
Expand Down Expand Up @@ -131,4 +167,4 @@ def embed_id(x, W, ignore_label=None):
[ 0., 0., 0.]], dtype=float32)
"""
return EmbedIDFunction(ignore_label=ignore_label)(x, W)
return EmbedIDFunction(ignore_label=ignore_label).apply((x, W))[0]
130 changes: 130 additions & 0 deletions tests/chainer_tests/functions_tests/connection_tests/test_embed_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import unittest

import numpy

import chainer
from chainer import cuda
from chainer.functions.connection import embed_id
from chainer import gradient_check
from chainer import testing
from chainer.testing import attr
from chainer.testing import condition


@testing.parameterize(
{'x_data': [0, 1, 0], 'ignore_label': None},
{'x_data': [[0, 1, 0], [1, 0, 1]], 'ignore_label': None},
{'x_data': [0, 1, -1], 'ignore_label': -1},
{'x_data': [[0, 1, -1], [-1, 0, 1]], 'ignore_label': -1},
)
class TestEmbedID(unittest.TestCase):

def setUp(self):
self.x = numpy.array(self.x_data, dtype=numpy.int32)
self.W = numpy.random.uniform(-1, 1, (3, 2)).astype('f')
y_shape = self.x.shape + (2,)
self.gy = numpy.random.uniform(-1, 1, y_shape).astype(numpy.float32)
self.ggW = numpy.random.uniform(-1, 1, (3, 2)).astype('f')

def check_forward(self, x_data, W_data):
x = chainer.Variable(x_data)
W = chainer.Variable(W_data)
y = chainer.functions.embed_id(x, W, self.ignore_label)
self.assertEqual(y.data.dtype, numpy.float32)

y_expect = numpy.empty_like(self.gy)
for i in numpy.ndindex(self.x.shape):
if self.x[i] == -1:
y_expect[i] = 0
else:
y_expect[i] = self.W[int(self.x[i])]

testing.assert_allclose(y_expect, y.data, atol=0, rtol=0)

@condition.retry(3)
def test_forward_cpu(self):
self.check_forward(self.x, self.W)

@attr.gpu
@condition.retry(3)
def test_forward_gpu(self):
self.check_forward(cuda.to_gpu(self.x), cuda.to_gpu(self.W))

def check_backward(self, x_data, W_data, y_grad):
def f(x, W):
return chainer.functions.embed_id(x, W, self.ignore_label)

gradient_check.check_backward(f, (x_data, W_data), y_grad)

@condition.retry(3)
def test_backward_cpu(self):
self.check_backward(self.x, self.W, self.gy)

@attr.gpu
@condition.retry(3)
def test_backward_gpu(self):
self.check_backward(
cuda.to_gpu(self.x), cuda.to_gpu(self.W), cuda.to_gpu(self.gy))

def check_double_backward(self, x_data, W_data, gy_data, ggW_data):
def f(W):
y = chainer.functions.embed_id(
x_data, W, self.ignore_label)
return y * y

gradient_check.check_double_backward(
f, W_data, gy_data, ggW_data)

@condition.retry(3)
def test_double_backward_cpu(self):
self.check_double_backward(self.x, self.W, self.gy, self.ggW)

@attr.gpu
@condition.retry(3)
def test_double_backward_gpu(self):
self.check_double_backward(
cuda.to_gpu(self.x), cuda.to_gpu(self.W), cuda.to_gpu(self.gy),
cuda.to_gpu(self.ggW))


@testing.parameterize(
{'x_data': [0, 1, 0], 'ignore_label': None},
{'x_data': [[0, 1, 0], [1, 0, 1]], 'ignore_label': None},
{'x_data': [0, 1, -1], 'ignore_label': -1},
{'x_data': [[0, 1, -1], [-1, 0, 1]], 'ignore_label': -1},
{'x_data': [0, 1, 2], 'ignore_label': 2},
{'x_data': [[0, 1, 0], [1, 0, 1]], 'ignore_label': 1},
)
class TestEmbedIdGrad(unittest.TestCase):

n_unit = (4,)
w_shape = (4, 2)

def setUp(self):
self.x = numpy.array(self.x_data, dtype='i')
self.gy = numpy.random.uniform(
-1, 1, self.x.shape + (2,)).astype('f')
self.ggW = numpy.random.uniform(-1, 1, self.w_shape).astype('f')

def check_backward(self, x, gy, ggW):
return

def f(x, gy):
emb = embed_id.EmbedIDGrad(
self.w_shape, self.ignore_label)
return emb.apply((x, numpy.zeros(()), gy))[0]

gradient_check.check_backward(f, (x, gy), (ggW,))

@condition.retry(3)
def test_backward_cpu(self):
self.check_backward(self.x, self.gy, self.ggW)

@attr.gpu
@condition.retry(3)
def test_backward_gpu(self):
self.check_backward(
cuda.to_gpu(self.x), cuda.to_gpu(self.gy), cuda.to_gpu(self.ggW))


testing.run_module(__name__, __file__)

0 comments on commit 69be927

Please sign in to comment.