Skip to content

Commit

Permalink
Merge branch 'master' into fix-sce-test
Browse files Browse the repository at this point in the history
  • Loading branch information
toslunar committed Jun 29, 2018
2 parents 653cada + 000181f commit 7c5e89f
Show file tree
Hide file tree
Showing 256 changed files with 13,602 additions and 2,051 deletions.
9 changes: 6 additions & 3 deletions .travis.yml
Expand Up @@ -59,12 +59,15 @@ script:
- flake8
- autopep8 -r . --diff | tee check_autopep8
- test ! -s check_autopep8
- cd tests
- CHAINER_TEST_GPU_LIMIT=0 pytest -m "not slow and not cudnn and not ideep" chainer_tests
- pushd tests
- pytest -m "not slow and not gpu and not cudnn and not ideep" chainer_tests
- popd
- if [[ $TRAVIS_OS_NAME == "linux" ]]; then
cd ..;
READTHEDOCS=True python setup.py develop;
fi
- pushd docs
- make html
- popd

sudo: false

Expand Down
3 changes: 1 addition & 2 deletions appveyor.yml
Expand Up @@ -66,6 +66,5 @@ test_script:
# Run the project tests
- "%CMD_IN_ENV% pip install -U -e .[appveyor]"
# Avoid interuption confirmation of cmd.exe
- "echo SET CHAINER_TEST_GPU_LIMIT=0 > tmp.bat"
- "echo python -m pytest --timeout=60 -m \"not cudnn and not ideep and not slow\" tests >> tmp.bat"
- "echo python -m pytest --timeout=60 -m \"not gpu and not cudnn and not ideep and not slow\" tests > tmp.bat"
- "call tmp.bat < nul"
2 changes: 2 additions & 0 deletions chainer/__init__.py
Expand Up @@ -147,6 +147,8 @@ def is_arrays_compatible(arrays):
global_config.use_ideep = os.environ.get('CHAINER_USE_IDEEP', 'never')
global_config.lazy_grad_sum = bool(int(
os.environ.get('CHAINER_LAZY_GRAD_SUM', '0')))
global_config.cudnn_fast_batch_normalization = bool(int(
os.environ.get('CHAINER_CUDNN_FAST_BATCH_NORMALIZATION', '0')))

_chainer_dtype = os.environ.get('CHAINER_DTYPE', 'float32')
if _chainer_dtype not in ('float16', 'float32', 'float64'):
Expand Down
2 changes: 1 addition & 1 deletion chainer/_version.py
@@ -1 +1 @@
__version__ = '5.0.0b1'
__version__ = '5.0.0b2'
66 changes: 66 additions & 0 deletions chainer/backends/cuda.py
Expand Up @@ -28,8 +28,12 @@
operations.
"""

import binascii
import functools
import itertools
import os
import threading
import time
import warnings

import numpy
Expand Down Expand Up @@ -424,6 +428,35 @@ def copy(array, out=None, out_device=None, stream=None):
return out


def copyto(dst, src):
"""Copies the elements of an ndarray to those of another one.
This function can copy the CPU/GPU arrays to the destination arrays on
another device.
Args:
dst (numpy.ndarray or cupy.ndarray): Destination array.
src (numpy.ndarray or cupy.ndarray): Source array.
"""
if isinstance(dst, numpy.ndarray):
numpy.copyto(dst, to_cpu(src))
elif isinstance(dst, ndarray):
if isinstance(src, numpy.ndarray):
if dst.flags.c_contiguous or dst.flags.f_contiguous:
dst.set(src)
else:
cupy.copyto(dst, to_gpu(src, device=dst.device))
elif isinstance(src, ndarray):
cupy.copyto(dst, src)
else:
raise TypeError('cannot copy from non-array object of type {}'
.format(type(src)))
else:
raise TypeError('cannot copy to non-array object of type {}'.format(
type(dst)))


# ------------------------------------------------------------------------------
# Function result memoization
# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -645,3 +678,36 @@ def should_use_cudnn_tensor_core(dtype):
if use_tensor_core is None:
use_tensor_core = cudnn.is_tensor_core_available(dtype)
return use_tensor_core


# ------------------------------------------------------------------------------
# cupy.cudnn utility
# ------------------------------------------------------------------------------

def get_cudnn_dropout_states():
if not cudnn_enabled:
raise RuntimeError('cuDNN is not enabled.')

thread_id = threading.current_thread().ident
return get_cudnn_dropout_states_core(thread_id)


_dropout_states_count = itertools.count()


@memoize(for_each_device=True)
def get_cudnn_dropout_states_core(thread_id):
states_id = next(_dropout_states_count)
seed = os.getenv('CHAINER_SEED')
if seed is None:
try:
seed_str = binascii.hexlify(os.urandom(8))
seed = numpy.uint64(int(seed_str, 16))
except NotImplementedError:
seed = numpy.uint64(time.clock() * 1000000)
else:
seed = numpy.uint64(seed)

seed += numpy.uint64(states_id)
handle = cudnn.get_handle()
return cudnn.DropoutStates(handle, seed)
15 changes: 7 additions & 8 deletions chainer/computational_graph.py
Expand Up @@ -61,16 +61,16 @@ class ComputationalGraph(object):
Args:
nodes (list): List of nodes. Each node is either
:class:`VariableNode` object or :class:`Function` object.
:class:`VariableNode` object or :class:`FunctionNode` object.
edges (list): List of edges. Each edge consists of pair of nodes.
variable_style (dict): Dot node style for variable.
function_style (dict): Dot node style for function.
rankdir (str): Direction of the graph that must be
TB (top to bottom), BT (bottom to top), LR (left to right)
or RL (right to left).
remove_variable (bool): If ``True``, :class:`~chainer.Variable`\\ s are
remove_variable (bool): If ``True``, :class:`VariableNode`\\ s are
removed from the resulting computational graph. Only
:class:`~chainer.Function`\\ s are shown in the output.
:class:`FunctionNode`\\ s are shown in the output.
show_name (bool): If ``True``, the ``name`` attribute of each node is
added to the label of the node. Default is ``True``.
Expand Down Expand Up @@ -162,8 +162,7 @@ def dump(self, format='dot'):
"""
if format == 'dot':
return self._to_dot()
else:
NotImplementedError('Currently, only dot format is supported.')
raise NotImplementedError('Currently, only dot format is supported.')


def _skip_variable(nodes, edges):
Expand Down Expand Up @@ -200,7 +199,7 @@ def build_computational_graph(
outputs(list): nodes from which the graph is constructed.
Each element of outputs must be either :class:`~chainer.Variable`
object, :class:`~chainer.variable.VariableNode` object, or
:class:`~chainer.Function` object.
:class:`~chainer.FunctionNode` object.
remove_split(bool): It must be ``True``. This argument is left for
backward compatibility.
variable_style(dict): Dot node style for variable.
Expand All @@ -209,9 +208,9 @@ def build_computational_graph(
rankdir (str): Direction of the graph that must be
TB (top to bottom), BT (bottom to top), LR (left to right)
or RL (right to left).
remove_variable (bool): If ``True``, :class:`~chainer.Variable`\\ s are
remove_variable (bool): If ``True``, :class:`VariableNode`\\ s are
removed from the resulting computational graph. Only
:class:`~chainer.Function`\\ s are shown in the output.
:class:`FunctionNode`\\ s are shown in the output.
show_name (bool): If ``True``, the ``name`` attribute of each node is
added to the label of the node. Default is ``True``.
Expand Down
26 changes: 15 additions & 11 deletions chainer/distribution.py
Expand Up @@ -8,11 +8,13 @@ class Distribution(object):
`Distribution` is a bass class for dealing with probability distributions.
This class provides the following capabilities.
1. Sampling random points.
2. Evaluating a probability-related function at a given realization value.
(e.g., probability density function, probability mass function)
3. Obtaining properties of distributions.
(e.g., mean, variance)
2. Evaluating a probability-related function at a given realization \
value. (e.g., probability density function, probability mass function)
3. Obtaining properties of distributions. (e.g., mean, variance)
Note that every method and property that computes them from
`chainer.Variable` can basically be differentiated.
Expand Down Expand Up @@ -50,13 +52,13 @@ class Distribution(object):
>>> cov = np.random.normal(size=shape + (d, d)).astype(np.float32)
>>> cov = np.matmul(cov, np.rollaxis(cov, -1, -2))
>>> l = np.linalg.cholesky(cov)
>>> dist = D.MultivariateNormal(loc, l)
>>> dist.event_shape
>>> dist = D.MultivariateNormal(loc, l) # doctest: +SKIP
>>> dist.event_shape # doctest: +SKIP
(2,)
>>> dist.batch_shape
>>> dist.batch_shape # doctest: +SKIP
(4, 3)
>>> sample = dist.sample(sample_shape=(6, 5))
>>> sample.shape
>>> sample = dist.sample(sample_shape=(6, 5)) # doctest: +SKIP
>>> sample.shape # doctest: +SKIP
(6, 5, 4, 3, 2)
Every probability-related function takes realization value whose shape is
Expand Down Expand Up @@ -345,6 +347,8 @@ def register_kl(Dist1, Dist2):
calculate a KL divergence value between an instance of `Dist1` and
an instance of `Dist2` is registered.
.. code-block:: python
from chainer import distributions
@distributions.register_kl(Dist1, Dist2)
def _kl_dist1_dist2(dist1, dist2):
Expand Down Expand Up @@ -379,7 +383,7 @@ def kl_divergence(dist1, dist2):
Returns:
~chainer.Variable: Output variable representing kl divergence
:math:`D_{KL}(p||q)`.
:math:`D_{KL}(p||q)`.
Using `register_kl`, we can define behavior of `kl_divergence` for any two
distributions.
Expand Down Expand Up @@ -415,7 +419,7 @@ def cross_entropy(dist1, dist2):
Returns:
~chainer.Variable: Output variable representing cross entropy
:math:`H(p,q)`.
:math:`H(p,q)`.
"""
return dist1.entropy() + kl_divergence(dist1, dist2)
1 change: 1 addition & 0 deletions chainer/distributions/__init__.py
@@ -1,3 +1,4 @@
"""Collection of distribution implementations."""

from chainer.distributions.laplace import Laplace # NOQA
from chainer.distributions.normal import Normal # NOQA
149 changes: 149 additions & 0 deletions chainer/distributions/laplace.py
@@ -0,0 +1,149 @@
import chainer
from chainer.backends import cuda
from chainer import distribution
from chainer.functions.array import broadcast
from chainer.functions.math import exponential
from chainer import utils
import math
import numpy


class LaplaceCDF(chainer.function_node.FunctionNode):

def forward(self, inputs):
x, = inputs
xp = cuda.get_array_module(x)
y = 0.5 - 0.5 * xp.sign(x) * xp.expm1(-abs(x))
self.retain_outputs((0,))
return utils.force_array(y, x.dtype),

def backward(self, target_input_indexes, grad_outputs):
gy, = grad_outputs
y, = self.get_retained_outputs()
return (0.5 - abs(y - 0.5)) * gy,


class LaplaceICDF(chainer.function_node.FunctionNode):

def forward(self, inputs):
self.retain_inputs((0,))
x, = inputs
xp = cuda.get_array_module(x)
x = 1 - 2 * x
y = xp.sign(x) * xp.log1p(-abs(x))
return utils.force_array(y, x.dtype),

def backward(self, target_input_indexes, grad_outputs):
gy, = grad_outputs
x, = self.get_retained_inputs()
return gy / (0.5 - abs(x - 0.5)),


def _laplace_cdf(x):
y, = LaplaceCDF().apply((x,))
return y


def _laplace_icdf(x):
y, = LaplaceICDF().apply((x,))
return y


class Laplace(distribution.Distribution):

"""Laplace Distribution.
The probability density function of the distribution is expressed as
.. math::
p(x;\\mu,b) = \\frac{1}{2b}
\\exp\\left(-\\frac{|x-\\mu|}{b}\\right)
Args:
loc(:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): Parameter of distribution representing the \
location :math:`\\mu`.
scale(:class:`~chainer.Variable` or :class:`numpy.ndarray` or \
:class:`cupy.ndarray`): Parameter of distribution representing the \
scale :math:`b`.
"""

def __init__(self, loc, scale):
super(Laplace, self).__init__()
self.loc = chainer.as_variable(loc)
self.scale = chainer.as_variable(scale)

@property
def batch_shape(self):
return self.loc.shape

def cdf(self, x):
bl = broadcast.broadcast_to(self.loc, x.shape)
bs = broadcast.broadcast_to(self.scale, x.shape)
return _laplace_cdf((x - bl) / bs)

@property
def entropy(self):
return 1. + exponential.log(2 * self.scale)

@property
def event_shape(self):
return ()

def icdf(self, x):
return self.loc + self.scale * _laplace_icdf(x)

@property
def _is_gpu(self):
return isinstance(self.loc.data, cuda.ndarray)

def log_prob(self, x):
bl = broadcast.broadcast_to(self.loc, x.shape)
bs = broadcast.broadcast_to(self.scale, x.shape)
return - exponential.log(2 * bs) - abs(x - bl) / bs

@property
def mean(self):
return self.loc

@property
def mode(self):
return self.loc

def prob(self, x):
bl = broadcast.broadcast_to(self.loc, x.shape)
bs = broadcast.broadcast_to(self.scale, x.shape)
return 0.5 / bs * exponential.exp(- abs(x - bl) / bs)

def sample_n(self, n):
if self._is_gpu:
eps = cuda.cupy.random.laplace(
size=(n,) + self.loc.shape).astype(numpy.float32)
else:
eps = numpy.random.laplace(
size=(n,) + self.loc.shape).astype(numpy.float32)

noise = broadcast.broadcast_to(self.scale, eps.shape) * eps
noise += broadcast.broadcast_to(self.loc, eps.shape)

return noise

@property
def stddev(self):
return math.sqrt(2) * self.scale

@property
def support(self):
return 'real'

@property
def variance(self):
return 2 * self.scale ** 2


@distribution.register_kl(Laplace, Laplace)
def _kl_laplace_laplace(dist1, dist2):
diff = abs(dist1.loc - dist2.loc)
return exponential.log(dist2.scale) - exponential.log(dist1.scale) \
+ diff / dist2.scale \
+ dist1.scale / dist2.scale * exponential.exp(- diff / dist1.scale) - 1

0 comments on commit 7c5e89f

Please sign in to comment.