Merge branch 'master' into fix-sce-test

chainer · Jun 29, 2018 · 7c5e89f · 7c5e89f
2 parents 653cada + 000181f
commit 7c5e89f
Show file tree

Hide file tree

Showing 256 changed files with 13,602 additions and 2,051 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -59,12 +59,15 @@ script:
   - flake8
   - autopep8 -r . --diff | tee check_autopep8
   - test ! -s check_autopep8
-  - cd tests
-  - CHAINER_TEST_GPU_LIMIT=0 pytest -m "not slow and not cudnn and not ideep" chainer_tests
+  - pushd tests
+  - pytest -m "not slow and not gpu and not cudnn and not ideep" chainer_tests
+  - popd
   - if [[ $TRAVIS_OS_NAME == "linux" ]]; then
-      cd ..;
       READTHEDOCS=True python setup.py develop;
     fi
+  - pushd docs
+  - make html
+  - popd
 
 sudo: false
 

diff --git a/appveyor.yml b/appveyor.yml
@@ -66,6 +66,5 @@ test_script:
   # Run the project tests
   - "%CMD_IN_ENV% pip install -U -e .[appveyor]"
   # Avoid interuption confirmation of cmd.exe
-  - "echo SET CHAINER_TEST_GPU_LIMIT=0 > tmp.bat"
-  - "echo python -m pytest --timeout=60 -m \"not cudnn and not ideep and not slow\" tests >> tmp.bat"
+  - "echo python -m pytest --timeout=60 -m \"not gpu and not cudnn and not ideep and not slow\" tests > tmp.bat"
   - "call tmp.bat < nul"
diff --git a/chainer/__init__.py b/chainer/__init__.py
@@ -147,6 +147,8 @@ def is_arrays_compatible(arrays):
 global_config.use_ideep = os.environ.get('CHAINER_USE_IDEEP', 'never')
 global_config.lazy_grad_sum = bool(int(
     os.environ.get('CHAINER_LAZY_GRAD_SUM', '0')))
+global_config.cudnn_fast_batch_normalization = bool(int(
+    os.environ.get('CHAINER_CUDNN_FAST_BATCH_NORMALIZATION', '0')))
 
 _chainer_dtype = os.environ.get('CHAINER_DTYPE', 'float32')
 if _chainer_dtype not in ('float16', 'float32', 'float64'):

diff --git a/chainer/_version.py b/chainer/_version.py
@@ -1 +1 @@
-__version__ = '5.0.0b1'
+__version__ = '5.0.0b2'
diff --git a/chainer/backends/cuda.py b/chainer/backends/cuda.py
@@ -28,8 +28,12 @@
 operations.
 """
 
+import binascii
 import functools
+import itertools
 import os
+import threading
+import time
 import warnings
 
 import numpy
@@ -424,6 +428,35 @@ def copy(array, out=None, out_device=None, stream=None):
     return out
 
 
+def copyto(dst, src):
+    """Copies the elements of an ndarray to those of another one.
+
+    This function can copy the CPU/GPU arrays to the destination arrays on
+    another device.
+
+    Args:
+        dst (numpy.ndarray or cupy.ndarray): Destination array.
+        src (numpy.ndarray or cupy.ndarray): Source array.
+
+    """
+    if isinstance(dst, numpy.ndarray):
+        numpy.copyto(dst, to_cpu(src))
+    elif isinstance(dst, ndarray):
+        if isinstance(src, numpy.ndarray):
+            if dst.flags.c_contiguous or dst.flags.f_contiguous:
+                dst.set(src)
+            else:
+                cupy.copyto(dst, to_gpu(src, device=dst.device))
+        elif isinstance(src, ndarray):
+            cupy.copyto(dst, src)
+        else:
+            raise TypeError('cannot copy from non-array object of type {}'
+                            .format(type(src)))
+    else:
+        raise TypeError('cannot copy to non-array object of type {}'.format(
+            type(dst)))
+
+
 # ------------------------------------------------------------------------------
 # Function result memoization
 # ------------------------------------------------------------------------------
@@ -645,3 +678,36 @@ def should_use_cudnn_tensor_core(dtype):
     if use_tensor_core is None:
         use_tensor_core = cudnn.is_tensor_core_available(dtype)
     return use_tensor_core
+
+
+# ------------------------------------------------------------------------------
+# cupy.cudnn utility
+# ------------------------------------------------------------------------------
+
+def get_cudnn_dropout_states():
+    if not cudnn_enabled:
+        raise RuntimeError('cuDNN is not enabled.')
+
+    thread_id = threading.current_thread().ident
+    return get_cudnn_dropout_states_core(thread_id)
+
+
+_dropout_states_count = itertools.count()
+
+
+@memoize(for_each_device=True)
+def get_cudnn_dropout_states_core(thread_id):
+    states_id = next(_dropout_states_count)
+    seed = os.getenv('CHAINER_SEED')
+    if seed is None:
+        try:
+            seed_str = binascii.hexlify(os.urandom(8))
+            seed = numpy.uint64(int(seed_str, 16))
+        except NotImplementedError:
+            seed = numpy.uint64(time.clock() * 1000000)
+    else:
+        seed = numpy.uint64(seed)
+
+    seed += numpy.uint64(states_id)
+    handle = cudnn.get_handle()
+    return cudnn.DropoutStates(handle, seed)
diff --git a/chainer/computational_graph.py b/chainer/computational_graph.py
@@ -61,16 +61,16 @@ class ComputationalGraph(object):
 
     Args:
         nodes (list): List of nodes. Each node is either
-             :class:`VariableNode` object or :class:`Function` object.
+             :class:`VariableNode` object or :class:`FunctionNode` object.
         edges (list): List of edges. Each edge consists of pair of nodes.
         variable_style (dict): Dot node style for variable.
         function_style (dict): Dot node style for function.
         rankdir (str): Direction of the graph that must be
             TB (top to bottom), BT (bottom to top), LR (left to right)
             or RL (right to left).
-        remove_variable (bool): If ``True``, :class:`~chainer.Variable`\\ s are
+        remove_variable (bool): If ``True``, :class:`VariableNode`\\ s are
             removed from the resulting computational graph. Only
-            :class:`~chainer.Function`\\ s are shown in the output.
+            :class:`FunctionNode`\\ s are shown in the output.
         show_name (bool): If ``True``, the ``name`` attribute of each node is
             added to the label of the node. Default is ``True``.
 
@@ -162,8 +162,7 @@ def dump(self, format='dot'):
         """
         if format == 'dot':
             return self._to_dot()
-        else:
-            NotImplementedError('Currently, only dot format is supported.')
+        raise NotImplementedError('Currently, only dot format is supported.')
 
 
 def _skip_variable(nodes, edges):
@@ -200,7 +199,7 @@ def build_computational_graph(
         outputs(list): nodes from which the graph is constructed.
             Each element of outputs must be either :class:`~chainer.Variable`
             object, :class:`~chainer.variable.VariableNode` object, or
-            :class:`~chainer.Function` object.
+            :class:`~chainer.FunctionNode` object.
         remove_split(bool): It must be ``True``. This argument is left for
             backward compatibility.
         variable_style(dict): Dot node style for variable.
@@ -209,9 +208,9 @@ def build_computational_graph(
         rankdir (str): Direction of the graph that must be
             TB (top to bottom), BT (bottom to top), LR (left to right)
             or RL (right to left).
-        remove_variable (bool): If ``True``, :class:`~chainer.Variable`\\ s are
+        remove_variable (bool): If ``True``, :class:`VariableNode`\\ s are
             removed from the resulting computational graph. Only
-            :class:`~chainer.Function`\\ s are shown in the output.
+            :class:`FunctionNode`\\ s are shown in the output.
         show_name (bool): If ``True``, the ``name`` attribute of each node is
             added to the label of the node. Default is ``True``.
 

diff --git a/chainer/distribution.py b/chainer/distribution.py
@@ -8,11 +8,13 @@ class Distribution(object):
     `Distribution` is a bass class for dealing with probability distributions.
 
     This class provides the following capabilities.
+
     1. Sampling random points.
-    2. Evaluating a probability-related function at a given realization value.
-        (e.g., probability density function, probability mass function)
-    3. Obtaining properties of distributions.
-        (e.g., mean, variance)
+
+    2. Evaluating a probability-related function at a given realization \
+    value. (e.g., probability density function, probability mass function)
+
+    3. Obtaining properties of distributions. (e.g., mean, variance)
 
     Note that every method and property that computes them from
     `chainer.Variable` can basically be differentiated.
@@ -50,13 +52,13 @@ class Distribution(object):
         >>> cov = np.random.normal(size=shape + (d, d)).astype(np.float32)
         >>> cov = np.matmul(cov, np.rollaxis(cov, -1, -2))
         >>> l = np.linalg.cholesky(cov)
-        >>> dist = D.MultivariateNormal(loc, l)
-        >>> dist.event_shape
+        >>> dist = D.MultivariateNormal(loc, l)  # doctest: +SKIP
+        >>> dist.event_shape  # doctest: +SKIP
         (2,)
-        >>> dist.batch_shape
+        >>> dist.batch_shape  # doctest: +SKIP
         (4, 3)
-        >>> sample = dist.sample(sample_shape=(6, 5))
-        >>> sample.shape
+        >>> sample = dist.sample(sample_shape=(6, 5))  # doctest: +SKIP
+        >>> sample.shape  # doctest: +SKIP
         (6, 5, 4, 3, 2)
 
     Every probability-related function takes realization value whose shape is
@@ -345,6 +347,8 @@ def register_kl(Dist1, Dist2):
         calculate a KL divergence value between an instance of `Dist1` and
         an instance of `Dist2` is registered.
 
+        .. code-block:: python
+
             from chainer import distributions
             @distributions.register_kl(Dist1, Dist2)
             def _kl_dist1_dist2(dist1, dist2):
@@ -379,7 +383,7 @@ def kl_divergence(dist1, dist2):
 
     Returns:
         ~chainer.Variable: Output variable representing kl divergence
-            :math:`D_{KL}(p||q)`.
+        :math:`D_{KL}(p||q)`.
 
     Using `register_kl`, we can define behavior of `kl_divergence` for any two
     distributions.
@@ -415,7 +419,7 @@ def cross_entropy(dist1, dist2):
 
     Returns:
         ~chainer.Variable: Output variable representing cross entropy
-            :math:`H(p,q)`.
+        :math:`H(p,q)`.
 
     """
     return dist1.entropy() + kl_divergence(dist1, dist2)
diff --git a/chainer/distributions/__init__.py b/chainer/distributions/__init__.py
@@ -1,3 +1,4 @@
 """Collection of distribution implementations."""
 
+from chainer.distributions.laplace import Laplace  # NOQA
 from chainer.distributions.normal import Normal  # NOQA
diff --git a/chainer/distributions/laplace.py b/chainer/distributions/laplace.py
@@ -0,0 +1,149 @@
+import chainer
+from chainer.backends import cuda
+from chainer import distribution
+from chainer.functions.array import broadcast
+from chainer.functions.math import exponential
+from chainer import utils
+import math
+import numpy
+
+
+class LaplaceCDF(chainer.function_node.FunctionNode):
+
+    def forward(self, inputs):
+        x, = inputs
+        xp = cuda.get_array_module(x)
+        y = 0.5 - 0.5 * xp.sign(x) * xp.expm1(-abs(x))
+        self.retain_outputs((0,))
+        return utils.force_array(y, x.dtype),
+
+    def backward(self, target_input_indexes, grad_outputs):
+        gy, = grad_outputs
+        y, = self.get_retained_outputs()
+        return (0.5 - abs(y - 0.5)) * gy,
+
+
+class LaplaceICDF(chainer.function_node.FunctionNode):
+
+    def forward(self, inputs):
+        self.retain_inputs((0,))
+        x, = inputs
+        xp = cuda.get_array_module(x)
+        x = 1 - 2 * x
+        y = xp.sign(x) * xp.log1p(-abs(x))
+        return utils.force_array(y, x.dtype),
+
+    def backward(self, target_input_indexes, grad_outputs):
+        gy, = grad_outputs
+        x, = self.get_retained_inputs()
+        return gy / (0.5 - abs(x - 0.5)),
+
+
+def _laplace_cdf(x):
+    y, = LaplaceCDF().apply((x,))
+    return y
+
+
+def _laplace_icdf(x):
+    y, = LaplaceICDF().apply((x,))
+    return y
+
+
+class Laplace(distribution.Distribution):
+
+    """Laplace Distribution.
+
+    The probability density function of the distribution is expressed as
+
+    .. math::
+        p(x;\\mu,b) = \\frac{1}{2b}
+            \\exp\\left(-\\frac{|x-\\mu|}{b}\\right)
+
+    Args:
+        loc(:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
+        :class:`cupy.ndarray`): Parameter of distribution representing the \
+        location :math:`\\mu`.
+        scale(:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
+        :class:`cupy.ndarray`): Parameter of distribution representing the \
+        scale :math:`b`.
+    """
+
+    def __init__(self, loc, scale):
+        super(Laplace, self).__init__()
+        self.loc = chainer.as_variable(loc)
+        self.scale = chainer.as_variable(scale)
+
+    @property
+    def batch_shape(self):
+        return self.loc.shape
+
+    def cdf(self, x):
+        bl = broadcast.broadcast_to(self.loc, x.shape)
+        bs = broadcast.broadcast_to(self.scale, x.shape)
+        return _laplace_cdf((x - bl) / bs)
+
+    @property
+    def entropy(self):
+        return 1. + exponential.log(2 * self.scale)
+
+    @property
+    def event_shape(self):
+        return ()
+
+    def icdf(self, x):
+        return self.loc + self.scale * _laplace_icdf(x)
+
+    @property
+    def _is_gpu(self):
+        return isinstance(self.loc.data, cuda.ndarray)
+
+    def log_prob(self, x):
+        bl = broadcast.broadcast_to(self.loc, x.shape)
+        bs = broadcast.broadcast_to(self.scale, x.shape)
+        return - exponential.log(2 * bs) - abs(x - bl) / bs
+
+    @property
+    def mean(self):
+        return self.loc
+
+    @property
+    def mode(self):
+        return self.loc
+
+    def prob(self, x):
+        bl = broadcast.broadcast_to(self.loc, x.shape)
+        bs = broadcast.broadcast_to(self.scale, x.shape)
+        return 0.5 / bs * exponential.exp(- abs(x - bl) / bs)
+
+    def sample_n(self, n):
+        if self._is_gpu:
+            eps = cuda.cupy.random.laplace(
+                size=(n,) + self.loc.shape).astype(numpy.float32)
+        else:
+            eps = numpy.random.laplace(
+                size=(n,) + self.loc.shape).astype(numpy.float32)
+
+        noise = broadcast.broadcast_to(self.scale, eps.shape) * eps
+        noise += broadcast.broadcast_to(self.loc, eps.shape)
+
+        return noise
+
+    @property
+    def stddev(self):
+        return math.sqrt(2) * self.scale
+
+    @property
+    def support(self):
+        return 'real'
+
+    @property
+    def variance(self):
+        return 2 * self.scale ** 2
+
+
+@distribution.register_kl(Laplace, Laplace)
+def _kl_laplace_laplace(dist1, dist2):
+    diff = abs(dist1.loc - dist2.loc)
+    return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
+        + diff / dist2.scale \
+        + dist1.scale / dist2.scale * exponential.exp(- diff / dist1.scale) - 1