diff --git a/tests/chainer_tests/functions_tests/loss_tests/test_softmax_cross_entropy.py b/tests/chainer_tests/functions_tests/loss_tests/test_softmax_cross_entropy.py index a2496caf7f62..73addc48894f 100644 --- a/tests/chainer_tests/functions_tests/loss_tests/test_softmax_cross_entropy.py +++ b/tests/chainer_tests/functions_tests/loss_tests/test_softmax_cross_entropy.py @@ -11,37 +11,10 @@ from chainer.testing import attr -@testing.parameterize(*(testing.product({ - 'shape': [None, (2, 3), (2, 3, 2), (2, 3, 2, 2)], - 'cache_score': [True, False], - 'normalize': [True, False], - 'ignore_index': [None, (slice(None),), (0,), (0, 1), (0, 1, 0)], - 'dtype': [numpy.float32], - 'weight_apply': [False, True], - 'enable_double_backprop': [False, True], - 'label_dtype': [numpy.int32], -}) + testing.product({ - 'shape': [None, (2, 3), (2, 3, 2), (2, 3, 2, 2)], - 'cache_score': [False], - 'normalize': [True], - 'ignore_index': [(0, 1)], - 'dtype': [numpy.float16, numpy.float32, numpy.float64], - 'weight_apply': [False, True], - 'enable_double_backprop': [False, True], - 'label_dtype': [numpy.int8, numpy.int16, numpy.int32, numpy.int64], -}) + testing.product({ - 'shape': [(0, 3), (0, 3, 2), (0, 3, 2, 2)], - 'cache_score': [True, False], - 'normalize': [True, False], - 'ignore_index': [None], - 'dtype': [numpy.float16, numpy.float32, numpy.float64], - 'weight_apply': [False, True], - 'enable_double_backprop': [False], - 'label_dtype': [numpy.int32], -}))) -class TestSoftmaxCrossEntropy(unittest.TestCase): +class SoftmaxCrossEntropyTestBase(object): def setUp(self): + self.shape, self.ignore_index = self.shape_ignore if self.shape is None: if self.dtype == numpy.float16: self.x = numpy.array([[-5, 1]], dtype=self.dtype) @@ -56,37 +29,50 @@ def setUp(self): if (self.ignore_index is not None and len(self.ignore_index) <= self.t.ndim): self.t[self.ignore_index] = -1 - self.gy = numpy.random.uniform(-1, 1, ()).astype(self.x.dtype) + if self.reduce == 'mean': + self.gy = numpy.random.uniform(-1, 1, ()).astype(self.x.dtype) + else: + self.gy = numpy.random.uniform( + -1, 1, self.t.shape).astype(self.dtype) self.ggx = numpy.random.uniform( -1, 1, self.x.shape).astype(self.x.dtype) - if self.dtype == numpy.float16: - self.check_forward_options = {'atol': 1e-2, 'rtol': 1e-2} - self.check_backward_options = { - 'dtype': numpy.float64, 'atol': 1e-2, 'rtol': 1e-2} - else: - self.check_forward_options = {'atol': 1e-3, 'rtol': 1e-3} - self.check_backward_options = { - 'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3} + if self.weight_apply: self.class_weight = numpy.random.uniform( 0, 10, (self.x.shape[1],)).astype(self.dtype) else: self.class_weight = None + if self.dtype == numpy.float16: + self.check_forward_options = {'atol': 5e-4, 'rtol': 5e-3} + self.check_backward_options = {'atol': 5e-3, 'rtol': 5e-2} + self.check_double_backward_options = {'atol': 5e-3, 'rtol': 5e-2} + else: + self.check_forward_options = {} + self.check_backward_options = {} + self.check_double_backward_options = {} + def check_forward(self, x_data, t_data, class_weight, use_cudnn='always'): x = chainer.Variable(x_data) t = chainer.Variable(t_data) with chainer.using_config('use_cudnn', use_cudnn): loss = functions.softmax_cross_entropy( - x, t, normalize=self.normalize, + x, t, normalize=self.normalize, reduce=self.reduce, cache_score=self.cache_score, class_weight=class_weight, enable_double_backprop=self.enable_double_backprop) - self.assertEqual(loss.data.shape, ()) + self.assertEqual(loss.data.shape, self.gy.shape) self.assertEqual(loss.data.dtype, self.dtype) if not self.enable_double_backprop: assert (loss.creator.y is not None) == self.cache_score - loss_value = float(cuda.to_cpu(loss.data)) + loss_value = cuda.to_cpu(loss.data) + if self.reduce == 'mean': + self.check_forward_with_reduce( + float(loss_value), t_data, class_weight) + else: + self.check_forward_without_reduce(loss_value, t_data, class_weight) + + def check_forward_with_reduce(self, loss_value, t_data, class_weight): # Compute expected value loss_expect = 0.0 count = 0 @@ -117,6 +103,23 @@ def check_forward(self, x_data, t_data, class_weight, use_cudnn='always'): testing.assert_allclose( loss_expect, loss_value, **self.check_forward_options) + def check_forward_without_reduce(self, loss_value, t_data, class_weight): + x = numpy.rollaxis(self.x, 1, self.x.ndim).reshape( + (self.t.size, self.x.shape[1])) + t = self.t.ravel() + l = loss_value.ravel() + for xi, ti, li in six.moves.zip(x, t, l): + if ti == -1: + continue + log_z = numpy.ufunc.reduce(numpy.logaddexp, xi) + if class_weight is None: + loss_expect = -(xi - log_z)[ti] + else: + loss_expect = -(xi - log_z)[ti] * class_weight[ti] + + testing.assert_allclose( + loss_expect, li, **self.check_forward_options) + def test_forward_cpu(self): self.check_forward(self.x, self.t, self.class_weight) @@ -133,44 +136,108 @@ def test_forward_gpu_no_cudnn(self): None if not self.weight_apply else cuda.to_gpu(self.class_weight), 'never') - def check_backward(self, x_data, t_data, class_weight, use_cudnn='always'): + def check_backward(self, x_data, t_data, g_data, class_weight, + use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn): func = functions.SoftmaxCrossEntropy( - cache_score=self.cache_score, class_weight=class_weight) + cache_score=self.cache_score, class_weight=class_weight, + reduce=self.reduce) gradient_check.check_backward( - func, (x_data, t_data), None, + func, (x_data, t_data), g_data, dtype=numpy.float64, **self.check_backward_options) def test_backward_cpu(self): - self.check_backward(self.x, self.t, self.class_weight) + g_data = None + if self.reduce == 'no': + g_data = self.gy + self.check_backward(self.x, self.t, g_data, self.class_weight) @attr.gpu def test_backward_gpu(self): + g_data = None + if self.reduce == 'no': + g_data = cuda.to_gpu(self.gy) + weight = None + if not self.weight_apply: + weight = cuda.to_gpu(self.class_weight) self.check_backward( - cuda.to_gpu(self.x), cuda.to_gpu(self.t), - None if not self.weight_apply else cuda.to_gpu(self.class_weight)) + cuda.to_gpu(self.x), cuda.to_gpu(self.t), g_data, weight) @attr.gpu def test_backward_gpu_no_cudnn(self): + g_data = None + if self.reduce == 'no': + g_data = cuda.to_gpu(self.gy) + weight = None + if not self.weight_apply: + weight = cuda.to_gpu(self.class_weight) self.check_backward( - cuda.to_gpu(self.x), cuda.to_gpu(self.t), - None if not self.weight_apply else cuda.to_gpu(self.class_weight), - 'never') + cuda.to_gpu(self.x), cuda.to_gpu(self.t), g_data, weight, 'never') + + +test_cases = testing.product({ + # test each option flags + 'reduce': ['mean', 'no'], + 'cache_score': [True, False], + 'normalize': [True, False], + 'weight_apply': [True, False], + 'shape_ignore': [(None, None), + ((2, 3, 2, 2), (0, 1, 0))], + 'dtype': [numpy.float32], + 'label_dtype': [numpy.int32], +}) + testing.product({ + # test floating dtypes + 'reduce': ['mean', 'no'], + 'cache_score': [False], + 'normalize': [True], + 'weight_apply': [True], + 'shape_ignore': [(None, None), + ((2, 3), (slice(None),)), + ((2, 3, 2), (0,)), + ((2, 3, 2, 2), (0, 1, 0))], + 'dtype': [numpy.float16, numpy.float64], + 'label_dtype': [numpy.int32], +}) + testing.product({ + # test label dtypes + 'reduce': ['mean', 'no'], + 'cache_score': [False], + 'normalize': [True], + 'weight_apply': [True], + 'shape_ignore': [(None, None), + ((2, 3), (slice(None),)), + ((2, 3, 2), (0,)), + ((2, 3, 2, 2), (0, 1, 0))], + 'dtype': [numpy.float32], + 'label_dtype': [numpy.int8, numpy.int16, numpy.int64], +}) + + +@testing.parameterize(*test_cases) +@testing.fix_random() +class TestSoftmaxCrossEntropyDisableDoubleBackprop( + SoftmaxCrossEntropyTestBase, unittest.TestCase): + + enable_double_backprop = False + + +@testing.parameterize(*test_cases) +@testing.fix_random() +class TestSoftmaxCrossEntropyEnableDoubleBackprop( + SoftmaxCrossEntropyTestBase, unittest.TestCase): + + enable_double_backprop = True def check_double_backward(self, x_data, t_data, gy_data, ggx_data, class_weight, use_cudnn='always'): def f(x): return functions.softmax_cross_entropy( x, t_data, self.normalize, self.cache_score, class_weight, - enable_double_backprop=True) - - if not self.enable_double_backprop: - return + reduce=self.reduce, enable_double_backprop=True) with chainer.using_config('use_cudnn', use_cudnn): gradient_check.check_double_backward( - f, x_data, gy_data, ggx_data, - **self.check_backward_options) + f, x_data, gy_data, ggx_data, dtype=numpy.float64, + **self.check_double_backward_options) def test_double_backward_cpu(self): self.check_double_backward( @@ -302,157 +369,6 @@ def test_variable_assertion(self): enable_double_backprop=self.enable_double_backprop) -@testing.parameterize(*(testing.product({ - 'shape': [None, (2, 3), (2, 3, 2), (2, 3, 2, 2)], - 'cache_score': [True, False], - 'normalize': [True, False], - 'ignore_index': [None, (slice(None),), (0,), (0, 1), (0, 1, 0)], - 'dtype': [numpy.float32], - 'weight_apply': [False, True], - 'use_cudnn': ['always', 'auto', 'never'], - 'enable_double_backprop': [False, True], -}) + testing.product({ - 'shape': [None, (2, 3), (2, 3, 2), (2, 3, 2, 2)], - 'cache_score': [False], - 'normalize': [True, False], - 'ignore_index': [(0, 1)], - 'dtype': [numpy.float16, numpy.float32, numpy.float64], - 'weight_apply': [False, True], - 'use_cudnn': ['always', 'auto', 'never'], - 'enable_double_backprop': [False, True], -}))) -class TestElementwiseSoftmaxCrossEntropy(unittest.TestCase): - - def setUp(self): - if self.shape is None: - if self.dtype == numpy.float16: - self.x = numpy.array([[-5, 1]], dtype=self.dtype) - else: - self.x = numpy.array([[-1000, 1]], dtype=self.dtype) - self.t = numpy.array([0], dtype=numpy.int32) - else: - self.x = numpy.random.uniform(-1, 1, self.shape).astype(self.dtype) - out_shape = (self.shape[0],) + self.shape[2:] - self.t = numpy.random.randint( - 0, self.shape[1], out_shape).astype(numpy.int32) - if (self.ignore_index is not None and - len(self.ignore_index) <= self.t.ndim): - self.t[self.ignore_index] = -1 - self.g = numpy.random.uniform(-1, 1, self.t.shape).astype(self.dtype) - self.ggx = numpy.random.uniform(-1, 1, self.x.shape).astype(self.dtype) - if self.dtype == numpy.float16: - self.check_forward_options = {'atol': 1e-2, 'rtol': 1e-2} - self.check_backward_options = { - 'dtype': numpy.float64, 'atol': 1e-2, 'rtol': 1e-2} - else: - self.check_forward_options = {'atol': 1e-3, 'rtol': 1e-3} - self.check_backward_options = { - 'dtype': numpy.float64, 'atol': 5e-4, 'rtol': 5e-3} - if self.weight_apply: - self.class_weight = numpy.random.uniform( - 0, 10, (self.x.shape[1],)).astype(self.dtype) - else: - self.class_weight = None - - def check_forward(self, x_data, t_data, class_weight): - x = chainer.Variable(x_data) - t = chainer.Variable(t_data) - loss = functions.softmax_cross_entropy( - x, t, cache_score=self.cache_score, normalize=self.normalize, - class_weight=class_weight, reduce='no', - enable_double_backprop=self.enable_double_backprop) - self.assertEqual(loss.shape, t_data.shape) - self.assertEqual(loss.data.dtype, self.dtype) - if not self.enable_double_backprop: - assert (loss.creator.y is not None) == self.cache_score - loss_value = cuda.to_cpu(loss.data) - - x = numpy.rollaxis(self.x, 1, self.x.ndim).reshape( - (self.t.size, self.x.shape[1])) - t = self.t.ravel() - l = loss_value.ravel() - for xi, ti, li in six.moves.zip(x, t, l): - if ti == -1: - continue - log_z = numpy.ufunc.reduce(numpy.logaddexp, xi) - if class_weight is None: - loss_expect = -(xi - log_z)[ti] - else: - loss_expect = -(xi - log_z)[ti] * class_weight[ti] - - testing.assert_allclose( - loss_expect, li, **self.check_forward_options) - - def test_forward_cpu(self): - with chainer.using_config('use_cudnn', self.use_cudnn): - self.check_forward(self.x, self.t, self.class_weight) - - @attr.gpu - def test_forward_gpu(self): - if not self.weight_apply: - weight = None - else: - weight = cuda.to_gpu(self.class_weight) - with chainer.using_config('use_cudnn', self.use_cudnn): - self.check_forward( - cuda.to_gpu(self.x), cuda.to_gpu(self.t), weight) - - def check_backward( - self, x_data, t_data, g_data, class_weight): - func = functions.SoftmaxCrossEntropy( - cache_score=self.cache_score, - class_weight=class_weight, reduce='no') - gradient_check.check_backward( - func, (x_data, t_data), g_data, - **self.check_backward_options) - - def test_backward_cpu(self): - with chainer.using_config('use_cudnn', self.use_cudnn): - self.check_backward(self.x, self.t, self.g, self.class_weight) - - @attr.gpu - def test_backward_gpu(self): - if not self.weight_apply: - weight = None - else: - weight = cuda.to_gpu(self.class_weight) - with chainer.using_config('use_cudnn', self.use_cudnn): - self.check_backward( - cuda.to_gpu(self.x), cuda.to_gpu(self.t), cuda.to_gpu(self.g), - weight) - - def check_double_backward( - self, x_data, t_data, g_data, ggx_data, class_weight): - - def f(x): - return functions.softmax_cross_entropy( - x, t_data, self.normalize, self.cache_score, class_weight, - reduce='no', enable_double_backprop=True) - - if not self.enable_double_backprop: - return - - gradient_check.check_double_backward( - f, x_data, g_data, ggx_data, - **self.check_backward_options) - - def test_double_backward_cpu(self): - with chainer.using_config('use_cudnn', self.use_cudnn): - self.check_double_backward( - self.x, self.t, self.g, self.ggx, self.class_weight) - - @attr.gpu - def test_double_backward_gpu(self): - if not self.weight_apply: - weight = None - else: - weight = cuda.to_gpu(self.class_weight) - with chainer.using_config('use_cudnn', self.use_cudnn): - self.check_double_backward( - cuda.to_gpu(self.x), cuda.to_gpu(self.t), cuda.to_gpu(self.g), - cuda.to_gpu(self.ggx), weight) - - @testing.parameterize(*testing.product({ 'enable_double_backprop': [True, False], }))