From 8b2c890c7136b9f2ee496e920bceea238b27d9f5 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 2 Feb 2016 15:18:34 +0900 Subject: [PATCH 01/15] Add DEBUG variable --- chainer/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chainer/__init__.py b/chainer/__init__.py index 812defdb8d95..99ffe36103fb 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -29,4 +29,6 @@ OFF = flag.OFF AUTO = flag.AUTO +DEBUG = False + basic_math.install_variable_arithmetics() From 3dd7827925ddbe9342d100833159d477de02a691 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 2 Feb 2016 15:18:41 +0900 Subject: [PATCH 02/15] Check nan on forward/backward computation --- chainer/function.py | 7 +++++ chainer/variable.py | 6 ++++ tests/chainer_tests/test_function.py | 46 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/chainer/function.py b/chainer/function.py index 90091769c598..2c1f1578a144 100644 --- a/chainer/function.py +++ b/chainer/function.py @@ -1,6 +1,7 @@ import os import weakref +import chainer from chainer import cuda from chainer import flag from chainer.utils import type_check @@ -105,6 +106,12 @@ def __call__(self, *inputs): outputs = self.forward(in_data) assert type(outputs) == tuple + if chainer.DEBUG: + if any(cuda.get_array_module(out).isnan(out).any() + for out in outputs): + msg = 'NaN is detected on forward computation' + raise RuntimeError(msg) + out_v = flag.aggregate_flags([x.volatile for x in inputs]) ret = tuple([variable.Variable(y, volatile=out_v) for y in outputs]) diff --git a/chainer/variable.py b/chainer/variable.py index 60b156277741..932f4e4bff56 100644 --- a/chainer/variable.py +++ b/chainer/variable.py @@ -2,6 +2,7 @@ import numpy +import chainer from chainer import cuda from chainer import flag @@ -328,6 +329,11 @@ def add_cand(cand): gxs = func.backward(in_data, out_grad) assert len(gxs) == len(in_data) + if chainer.DEBUG: + if any(cuda.get_array_module(gx).isnan(gx).any() for gx in gxs): + msg = 'NaN is detected on backward computation' + raise RuntimeError(msg) + if not retain_grad: for y in outputs: if y is not None and y is not self: diff --git a/tests/chainer_tests/test_function.py b/tests/chainer_tests/test_function.py index a01fb439b98b..f5dfd1e8fa2f 100644 --- a/tests/chainer_tests/test_function.py +++ b/tests/chainer_tests/test_function.py @@ -316,4 +316,50 @@ def test_forward_invalid(self): f(v) +class TestFunctionDebug(unittest.TestCase): + + def setUp(self): + chainer.DEBUG = True + self.one = numpy.array([1], numpy.float32) + self.nan = numpy.array([float('nan')], numpy.float32) + self.f = chainer.Function() + + def tearDown(self): + chainer.DEBUG = False + + def check_debug_forward(self, x_data): + x = chainer.Variable(x_data) + with self.assertRaises(RuntimeError): + self.f(x) + + def test_debug_forward_cpu(self): + self.f.forward_cpu = mock.MagicMock(return_value=(self.nan,)) + self.check_debug_forward(self.one) + + @attr.gpu + def test_debug_forward_gpu(self): + self.f.forward_gpu = mock.MagicMock( + return_value=(cuda.to_gpu(self.nan),)) + self.check_debug_forward(cuda.to_gpu(self.one)) + + def check_debug_backward(self, x_data): + x = chainer.Variable(x_data) + with self.assertRaises(RuntimeError): + y = self.f(x) + y.backward() + + def test_debug_backward_cpu(self): + self.f.forward_cpu = mock.MagicMock(return_value=(self.one,)) + self.f.backward_cpu = mock.MagicMock(return_value=(self.nan,)) + self.check_debug_backward(self.one) + + @attr.gpu + def test_debug_backward_gpu(self): + self.f.forward_gpu = mock.MagicMock( + return_value=(cuda.to_gpu(self.one),)) + self.f.backward_gpu = mock.MagicMock( + return_value=(cuda.to_gpu(self.nan),)) + self.check_debug_backward(cuda.to_gpu(self.one)) + + testing.run_module(__name__, __file__) From b08d4901d23d62adff151cf594b78c491d8094cb Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 2 Feb 2016 16:19:29 +0900 Subject: [PATCH 03/15] Flake8 --- chainer/variable.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chainer/variable.py b/chainer/variable.py index 932f4e4bff56..1d40ea5c40ce 100644 --- a/chainer/variable.py +++ b/chainer/variable.py @@ -330,7 +330,8 @@ def add_cand(cand): assert len(gxs) == len(in_data) if chainer.DEBUG: - if any(cuda.get_array_module(gx).isnan(gx).any() for gx in gxs): + if any(cuda.get_array_module(gx).isnan(gx).any() + for gx in gxs): msg = 'NaN is detected on backward computation' raise RuntimeError(msg) From 7633bcb7a8daa93dd39af6ed0e8bb45ac1c98c0b Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 9 Feb 2016 19:22:53 +0900 Subject: [PATCH 04/15] Save stack --- chainer/function.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chainer/function.py b/chainer/function.py index 2c1f1578a144..3c4a7bd6127a 100644 --- a/chainer/function.py +++ b/chainer/function.py @@ -1,4 +1,5 @@ import os +import traceback import weakref import chainer @@ -99,6 +100,9 @@ def __call__(self, *inputs): """ in_data = tuple([x.data for x in inputs]) + if chainer.DEBUG: + self._stack = traceback.extract_stack() + if self.type_check_enable: self._check_data_type_forward(in_data) # Forward prop From bf219595003e7b3e100ef5b2569eb30e17c30480 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Mon, 29 Feb 2016 00:08:19 +0900 Subject: [PATCH 05/15] Fix test --- tests/chainer_tests/test_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/chainer_tests/test_function.py b/tests/chainer_tests/test_function.py index f5dfd1e8fa2f..13de992516ec 100644 --- a/tests/chainer_tests/test_function.py +++ b/tests/chainer_tests/test_function.py @@ -344,8 +344,8 @@ def test_debug_forward_gpu(self): def check_debug_backward(self, x_data): x = chainer.Variable(x_data) + y = self.f(x) with self.assertRaises(RuntimeError): - y = self.f(x) y.backward() def test_debug_backward_cpu(self): From 8c3111f8d063a0540002c7f51f11dc920e06614c Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Mon, 29 Feb 2016 00:31:42 +0900 Subject: [PATCH 06/15] Use a method to set/get debug status instead of using a variable --- chainer/__init__.py | 12 +++++++++++- chainer/function.py | 4 ++-- chainer/variable.py | 2 +- tests/chainer_tests/test_function.py | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/chainer/__init__.py b/chainer/__init__.py index 99ffe36103fb..410b585640a3 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -29,6 +29,16 @@ OFF = flag.OFF AUTO = flag.AUTO -DEBUG = False +_debug = False + + +def get_debug(): + return _debug + + +def set_debug(debug): + global _debug + _debug = debug + basic_math.install_variable_arithmetics() diff --git a/chainer/function.py b/chainer/function.py index 3c4a7bd6127a..1359e4c50018 100644 --- a/chainer/function.py +++ b/chainer/function.py @@ -100,7 +100,7 @@ def __call__(self, *inputs): """ in_data = tuple([x.data for x in inputs]) - if chainer.DEBUG: + if chainer.get_debug(): self._stack = traceback.extract_stack() if self.type_check_enable: @@ -110,7 +110,7 @@ def __call__(self, *inputs): outputs = self.forward(in_data) assert type(outputs) == tuple - if chainer.DEBUG: + if chainer.get_debug(): if any(cuda.get_array_module(out).isnan(out).any() for out in outputs): msg = 'NaN is detected on forward computation' diff --git a/chainer/variable.py b/chainer/variable.py index 1d40ea5c40ce..49d890dd2f7d 100644 --- a/chainer/variable.py +++ b/chainer/variable.py @@ -329,7 +329,7 @@ def add_cand(cand): gxs = func.backward(in_data, out_grad) assert len(gxs) == len(in_data) - if chainer.DEBUG: + if chainer.get_debug(): if any(cuda.get_array_module(gx).isnan(gx).any() for gx in gxs): msg = 'NaN is detected on backward computation' diff --git a/tests/chainer_tests/test_function.py b/tests/chainer_tests/test_function.py index 13de992516ec..8cac2bd0d689 100644 --- a/tests/chainer_tests/test_function.py +++ b/tests/chainer_tests/test_function.py @@ -319,13 +319,13 @@ def test_forward_invalid(self): class TestFunctionDebug(unittest.TestCase): def setUp(self): - chainer.DEBUG = True + chainer.set_debug(True) self.one = numpy.array([1], numpy.float32) self.nan = numpy.array([float('nan')], numpy.float32) self.f = chainer.Function() def tearDown(self): - chainer.DEBUG = False + chainer.set_debug(False) def check_debug_forward(self, x_data): x = chainer.Variable(x_data) From b9cc3fe662bb469fb3b2383e9e1d66e99972a180 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Mon, 29 Feb 2016 00:35:07 +0900 Subject: [PATCH 07/15] Write docstring --- chainer/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/chainer/__init__.py b/chainer/__init__.py index 410b585640a3..6f2b18c9a162 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -33,10 +33,20 @@ def get_debug(): + """Get the debug mode. + + Returns: + bool: Return True if Chainer is in debug mode. + """ return _debug def set_debug(debug): + """Set the debug mode. + + Args: + debug (bool): New debug mode. + """ global _debug _debug = debug From 792f36cc0b8c3d73dfa3b543602b808d3d92b1b9 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Mon, 29 Feb 2016 17:16:26 +0900 Subject: [PATCH 08/15] Show stacktrace when an error is occured on bakward --- chainer/function.py | 7 +++++ chainer/variable.py | 13 ++++++++- tests/chainer_tests/test_variable.py | 40 ++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/chainer/function.py b/chainer/function.py index 1359e4c50018..16e033742938 100644 --- a/chainer/function.py +++ b/chainer/function.py @@ -143,6 +143,13 @@ def label(self): """ return self.__class__.__name__ + @property + def stack(self): + if hasattr(self, '_stack'): + return self._stack + else: + return None + def _check_data_type_forward(self, in_data): in_type = type_check.get_types(in_data, 'in_types', False) try: diff --git a/chainer/variable.py b/chainer/variable.py index 49d890dd2f7d..27e7ba9691e2 100644 --- a/chainer/variable.py +++ b/chainer/variable.py @@ -1,4 +1,5 @@ import heapq +import traceback import numpy @@ -10,11 +11,21 @@ def _check_grad_type(func, x, gx): def make_message(message): if func: - detail = '''Function {0} ({1}) has a bug. + detail = 'Function `{0}` ({1}) has a bug.\n'.format( + type(func).__name__, func.label) + + stack = func.stack + if stack: + detail += 'Stacktrace of the function is below:\n' + for line in traceback.format_list(func._stack): + detail += line + + detail += ''' Please report this error to the issue tracker with the stack trace, the information of your environment, and your script: https://github.com/pfnet/chainer/issues/new. '''.format(type(func).__name__, func.label) + else: detail = '' diff --git a/tests/chainer_tests/test_variable.py b/tests/chainer_tests/test_variable.py index e0e575b57c95..cec53f1fc461 100644 --- a/tests/chainer_tests/test_variable.py +++ b/tests/chainer_tests/test_variable.py @@ -1,3 +1,4 @@ +import inspect import unittest import numpy as np @@ -523,4 +524,43 @@ def test_shape_mismatch_gpu(self): self.check_shape_mismatch(cuda.to_gpu(self.x)) +class TestVariableBackwardErrorTraceback(unittest.TestCase): + + def setUp(self): + self.x = np.array([1], np.float32) + chainer.set_debug(True) + + def tearDown(self): + chainer.set_debug(False) + + def check_traceback(self, x_data): + xp = cuda.get_array_module(x_data) + + class DummyFunction(chainer.Function): + label = 'dummy_function' + + def forward(self, inputs): + return xp.array(1, np.float32), + + def backward(self, inputs, grads): + return xp.array([1, 2], np.float32), + + x = chainer.Variable(x_data) + line = inspect.currentframe(0).f_lineno + 1 + y = DummyFunction()(x) # `line` is THIS line + try: + y.backward() + self.fail() + except ValueError as e: + self.assertIn('Stacktrace', str(e)) + self.assertIn('line %d' % line, str(e)) + + def test_traceback_cpu(self): + self.check_traceback(self.x) + + @attr.gpu + def test_traceback_gpu(self): + self.check_traceback(cuda.to_gpu(self.x)) + + testing.run_module(__name__, __file__) From a2156622eb6495dba84a2285967c7f2e5ff45305 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Mon, 29 Feb 2016 18:28:43 +0900 Subject: [PATCH 09/15] Remove argument of currentframe method --- tests/chainer_tests/test_variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/chainer_tests/test_variable.py b/tests/chainer_tests/test_variable.py index cec53f1fc461..041afd63482a 100644 --- a/tests/chainer_tests/test_variable.py +++ b/tests/chainer_tests/test_variable.py @@ -546,7 +546,7 @@ def backward(self, inputs, grads): return xp.array([1, 2], np.float32), x = chainer.Variable(x_data) - line = inspect.currentframe(0).f_lineno + 1 + line = inspect.currentframe().f_lineno + 1 y = DummyFunction()(x) # `line` is THIS line try: y.backward() From 65b376c1d76040f5d94997031f99f68750c3436e Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 1 Mar 2016 11:51:18 +0900 Subject: [PATCH 10/15] Rename method --- chainer/__init__.py | 2 +- chainer/function.py | 4 ++-- chainer/variable.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/chainer/__init__.py b/chainer/__init__.py index 6f2b18c9a162..47894ebf7d47 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -32,7 +32,7 @@ _debug = False -def get_debug(): +def is_debug(): """Get the debug mode. Returns: diff --git a/chainer/function.py b/chainer/function.py index 16e033742938..514d999906dc 100644 --- a/chainer/function.py +++ b/chainer/function.py @@ -100,7 +100,7 @@ def __call__(self, *inputs): """ in_data = tuple([x.data for x in inputs]) - if chainer.get_debug(): + if chainer.is_debug(): self._stack = traceback.extract_stack() if self.type_check_enable: @@ -110,7 +110,7 @@ def __call__(self, *inputs): outputs = self.forward(in_data) assert type(outputs) == tuple - if chainer.get_debug(): + if chainer.is_debug(): if any(cuda.get_array_module(out).isnan(out).any() for out in outputs): msg = 'NaN is detected on forward computation' diff --git a/chainer/variable.py b/chainer/variable.py index 27e7ba9691e2..bdcfc801b4fc 100644 --- a/chainer/variable.py +++ b/chainer/variable.py @@ -340,7 +340,7 @@ def add_cand(cand): gxs = func.backward(in_data, out_grad) assert len(gxs) == len(in_data) - if chainer.get_debug(): + if chainer.is_debug(): if any(cuda.get_array_module(gx).isnan(gx).any() for gx in gxs): msg = 'NaN is detected on backward computation' From 9bfa861f06fdce87ba91b42085e1c55a47003492 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 1 Mar 2016 11:51:54 +0900 Subject: [PATCH 11/15] Fix docstring --- chainer/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainer/__init__.py b/chainer/__init__.py index 47894ebf7d47..e16d8f1b870d 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -36,7 +36,7 @@ def is_debug(): """Get the debug mode. Returns: - bool: Return True if Chainer is in debug mode. + bool: Return ``True`` if Chainer is in debug mode. """ return _debug From 533a432f9a55ddc070f948f49f9cead4b0255ffc Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 1 Mar 2016 12:04:13 +0900 Subject: [PATCH 12/15] Add debug mode section to the document --- docs/source/reference/core.rst | 1 + docs/source/reference/core/debug.rst | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 docs/source/reference/core/debug.rst diff --git a/docs/source/reference/core.rst b/docs/source/reference/core.rst index 54e4dbefeb7c..7b7cb67f4bcb 100644 --- a/docs/source/reference/core.rst +++ b/docs/source/reference/core.rst @@ -12,4 +12,5 @@ Core functionalities core/link core/optimizer core/serializer + core/debug core/function_set diff --git a/docs/source/reference/core/debug.rst b/docs/source/reference/core/debug.rst new file mode 100644 index 000000000000..f10ce1218996 --- /dev/null +++ b/docs/source/reference/core/debug.rst @@ -0,0 +1,7 @@ +Debug mode +========== + +.. currentmodule:: chainer + +.. autofunction:: is_debug +.. autofunction:: set_debug From cf43eb198b4f74cb73cc5da9106135dbbdd0b282 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 1 Mar 2016 12:04:37 +0900 Subject: [PATCH 13/15] Wrote notice about thread safety --- chainer/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/chainer/__init__.py b/chainer/__init__.py index e16d8f1b870d..475f03d0484a 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -35,6 +35,11 @@ def is_debug(): """Get the debug mode. + note:: + + This method is not thread safe. Don't call the method in + multi-threading environment. + Returns: bool: Return ``True`` if Chainer is in debug mode. """ @@ -44,6 +49,11 @@ def is_debug(): def set_debug(debug): """Set the debug mode. + note:: + + This method is not thread safe. Don't call the method in + multi-threading environment. + Args: debug (bool): New debug mode. """ From 3a92b04061828acbac9d66e15e8e4b99437b1229 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 1 Mar 2016 12:11:12 +0900 Subject: [PATCH 14/15] Write brief description of debug mode --- docs/source/reference/core/debug.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/source/reference/core/debug.rst b/docs/source/reference/core/debug.rst index f10ce1218996..46f86f58f29c 100644 --- a/docs/source/reference/core/debug.rst +++ b/docs/source/reference/core/debug.rst @@ -1,6 +1,12 @@ Debug mode ========== +In debug mode, Chainer checks values of variables on runtime and shows more +detailed error messages. +It helps you to debug your programs. +Instead it requires additional overhead time. + + .. currentmodule:: chainer .. autofunction:: is_debug From 9bb2f177bb9463482d7f4cb89efcce3fcc9b44f1 Mon Sep 17 00:00:00 2001 From: Yuya Unno Date: Tue, 1 Mar 2016 12:17:18 +0900 Subject: [PATCH 15/15] Fix note about multi-threading environment --- chainer/__init__.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/chainer/__init__.py b/chainer/__init__.py index 475f03d0484a..b8a3a74100ce 100644 --- a/chainer/__init__.py +++ b/chainer/__init__.py @@ -35,11 +35,6 @@ def is_debug(): """Get the debug mode. - note:: - - This method is not thread safe. Don't call the method in - multi-threading environment. - Returns: bool: Return ``True`` if Chainer is in debug mode. """ @@ -51,8 +46,8 @@ def set_debug(debug): note:: - This method is not thread safe. Don't call the method in - multi-threading environment. + This method changes global state. When you use this method on + multi-threading environment, it may affects other threads. Args: debug (bool): New debug mode.