Merge branch 'master' of https://github.com/chainer/chainer into conc…

…ise-install # Conflicts: # docker/python2/Dockerfile # docker/python3/Dockerfile
chainer · Feb 21, 2018 · 9942510 · 9942510
2 parents 349a283 + 2175a6f
commit 9942510
Show file tree

Hide file tree

Showing 64 changed files with 977 additions and 1,164 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 <div align="center"><img src="docs/image/chainer_red_h.png" width="400"/></div>
 
-# Chainer: a deep learning framework
+# Chainer: A deep learning framework
 
 [![pypi](https://img.shields.io/pypi/v/chainer.svg)](https://pypi.python.org/pypi/chainer)
 [![GitHub license](https://img.shields.io/github/license/chainer/chainer.svg)](https://github.com/chainer/chainer)

diff --git a/chainer/_version.py b/chainer/_version.py
@@ -1 +1 @@
-__version__ = '4.0.0b3'
+__version__ = '4.0.0b4'
diff --git a/chainer/backends/cuda.py b/chainer/backends/cuda.py
@@ -9,6 +9,7 @@
  imported name                original name
 ============================ =================================
  ``chainer.cuda.cupy``        :mod:`cupy`
+ ``chainer.cuda.cupyx``       :mod:`cupyx`
  ``chainer.cuda.ndarray``     :class:`cupy.ndarray`
  ``chainer.cuda.cupy.cuda``   :mod:`cupy.cuda`
  ``chainer.cuda.Device``      :class:`cupy.cuda.Device`
@@ -40,6 +41,7 @@
     import cupy
     from cupy import cuda  # NOQA
     from cupy.cuda import cublas  # NOQA
+    import cupyx  # NOQA
 
     from cupy import ndarray  # NOQA
 

diff --git a/chainer/functions/array/resize_images.py b/chainer/functions/array/resize_images.py
@@ -116,7 +116,7 @@ def forward(self, inputs):
         if xp is numpy:
             scatter_add = numpy.add.at
         else:
-            scatter_add = xp.scatter_add
+            scatter_add = cuda.cupyx.scatter_add
 
         gx = xp.zeros(self.input_shape, dtype=gy.dtype)
         gy = gy.reshape(B, C, -1)

diff --git a/chainer/functions/array/scatter_add.py b/chainer/functions/array/scatter_add.py
@@ -44,7 +44,7 @@ def forward(self, xs):
         if xp is numpy:
             numpy.add.at(y, self.slices, b),
         else:
-            xp.scatter_add(y, self.slices, b),
+            cuda.cupyx.scatter_add(y, self.slices, b),
         return y,
 
     def backward(self, indexes, grad_outputs):
@@ -94,7 +94,7 @@ def scatter_add(a, slices, b):
 
     .. seealso::
         :func:`numpy.add.at` and
-        :func:`cupy.scatter_add`.
+        :func:`cupyx.scatter_add`.
 
     """
     y, = ScatterAdd(slices).apply((a, b))

diff --git a/chainer/functions/array/spatial_transformer_sampler.py b/chainer/functions/array/spatial_transformer_sampler.py
@@ -233,7 +233,7 @@ def _backward(self, inputs, grad_outputs):
         if xp is numpy:
             scatter_add = numpy.add.at
         else:
-            scatter_add = xp.scatter_add
+            scatter_add = cuda.cupyx.scatter_add
         gx = xp.zeros_like(x_pad)
         gy = gy.reshape(B, C, -1)
         for b in range(B):

diff --git a/chainer/functions/connection/n_step_gru.py b/chainer/functions/connection/n_step_gru.py
@@ -1,20 +1,16 @@
 import itertools
 
 import numpy
-import six
 
 import chainer
 from chainer.backends import cuda
 from chainer.functions.activation import sigmoid
 from chainer.functions.activation import tanh
 from chainer.functions.array import concat
-from chainer.functions.array import reshape
 from chainer.functions.array import split_axis
-from chainer.functions.array import stack
 from chainer.functions.connection import linear
 from chainer.functions.connection import n_step_rnn
 from chainer.functions.connection.n_step_rnn import get_random_state
-from chainer.functions.noise import dropout
 from chainer.utils import argument
 
 
@@ -317,70 +313,25 @@ def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs,
         return hy, ys
 
     else:
-        direction = 2 if use_bi_direction else 1
-        hx = split_axis.split_axis(hx, n_layers * direction, axis=0,
-                                   force_tuple=True)
-        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
-
-        xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws]
-        hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws]
-        xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs]
-        hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs]
-
-        xs_next = xs
-        hy = []
-        for layer in six.moves.range(n_layers):
-
-            def _one_directional_loop(di):
-                # di=0, forward GRU
-                # di=1, backward GRU
-                xs_list = xs_next if di == 0 else reversed(xs_next)
-                layer_idx = direction * layer + di
-                h = hx[layer_idx]
-                h_list = []
-                for x in xs_list:
-                    batch = x.shape[0]
-                    if h.shape[0] > batch:
-                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
-                    else:
-                        h_rest = None
-
-                    if layer > 0:
-                        x = dropout.dropout(x, ratio=dropout_ratio)
-
-                    gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
-                    gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])
-
-                    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
-                    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)
-
-                    r = sigmoid.sigmoid(W_r_x + U_r_h)
-                    z = sigmoid.sigmoid(W_z_x + U_z_h)
-                    h_bar = tanh.tanh(W_x + r * U_x)
-                    h_bar = (1 - z) * h_bar + z * h
-                    if h_rest is not None:
-                        h = concat.concat([h_bar, h_rest], axis=0)
-                    else:
-                        h = h_bar
-                    h_list.append(h_bar)
-                return h, h_list
-
-            # Forward GRU
-            h, h_forward = _one_directional_loop(di=0)
-            hy.append(h)
-
-            if use_bi_direction:
-                # Backward GRU
-                h, h_backward = _one_directional_loop(di=1)
-                h_backward.reverse()
-                # Concat
-                xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
-                           six.moves.zip(h_forward, h_backward)]
-                hy.append(h)
-            else:
-                # Uni-directional GRU
-                xs_next = h_forward
-
-        ys = xs_next
-        hy = stack.stack(hy)
-        return hy, tuple(ys)
+        hy, _, ys = n_step_rnn.n_step_rnn_impl(
+            _gru, n_layers, dropout_ratio, hx, None, ws, bs, xs,
+            use_bi_direction)
+        return hy, ys
+
+
+def _gru(x, h, c, w, b):
+    xw = concat.concat([w[0], w[1], w[2]], axis=0)
+    hw = concat.concat([w[3], w[4], w[5]], axis=0)
+    xb = concat.concat([b[0], b[1], b[2]], axis=0)
+    hb = concat.concat([b[3], b[4], b[5]], axis=0)
+
+    gru_x = linear.linear(x, xw, xb)
+    gru_h = linear.linear(h, hw, hb)
+
+    W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
+    U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)
+
+    r = sigmoid.sigmoid(W_r_x + U_r_h)
+    z = sigmoid.sigmoid(W_z_x + U_z_h)
+    h_bar = tanh.tanh(W_x + r * U_x)
+    return (1 - z) * h_bar + z * h, None
diff --git a/chainer/functions/connection/n_step_lstm.py b/chainer/functions/connection/n_step_lstm.py
@@ -1,19 +1,15 @@
 import itertools
 
 import numpy
-import six
 
 import chainer
 from chainer.backends import cuda
 from chainer.functions.activation import lstm
-from chainer.functions.array import concat
 from chainer.functions.array import reshape
-from chainer.functions.array import split_axis
 from chainer.functions.array import stack
 from chainer.functions.connection import linear
 from chainer.functions.connection import n_step_rnn
 from chainer.functions.connection.n_step_rnn import get_random_state
-from chainer.functions.noise import dropout
 from chainer.utils import argument
 
 
@@ -449,80 +445,16 @@ def n_step_lstm_base(
         return hy, cy, ys
 
     else:
-        direction = 2 if use_bi_direction else 1
-        split_size = n_layers * direction
-        hx = split_axis.split_axis(hx, split_size, axis=0, force_tuple=True)
-        hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
-        cx = split_axis.split_axis(cx, split_size, axis=0, force_tuple=True)
-        cx = [reshape.reshape(c, c.shape[1:]) for c in cx]
-
-        xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
-        hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
-        xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
-        hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]
-
-        xs_next = xs
-        hy = []
-        cy = []
-        for layer in six.moves.range(n_layers):
-
-            def _one_directional_loop(di):
-                # di=0, forward LSTM
-                # di=1, backward LSTM
-                h_list = []
-                c_list = []
-                layer_idx = direction * layer + di
-                h = hx[layer_idx]
-                c = cx[layer_idx]
-                if di == 0:
-                    xs_list = xs_next
-                else:
-                    xs_list = reversed(xs_next)
-                for x in xs_list:
-                    batch = x.shape[0]
-                    if h.shape[0] > batch:
-                        h, h_rest = split_axis.split_axis(h, [batch], axis=0)
-                        c, c_rest = split_axis.split_axis(c, [batch], axis=0)
-                    else:
-                        h_rest = None
-                        c_rest = None
-
-                    if layer != 0:
-                        x = dropout.dropout(x, ratio=dropout_ratio)
-                    lstm_in = linear.linear(x, xws[layer_idx],
-                                            xbs[layer_idx]) + \
-                        linear.linear(h, hws[layer_idx], hbs[layer_idx])
-
-                    c_bar, h_bar = lstm.lstm(c, lstm_in)
-                    if h_rest is not None:
-                        h = concat.concat([h_bar, h_rest], axis=0)
-                        c = concat.concat([c_bar, c_rest], axis=0)
-                    else:
-                        h = h_bar
-                        c = c_bar
-                    h_list.append(h_bar)
-                    c_list.append(c_bar)
-                return h, c, h_list, c_list
-
-            h, c, h_forward, c_forward = _one_directional_loop(di=0)
-            hy.append(h)
-            cy.append(c)
-
-            if use_bi_direction:
-                # BiLSTM
-                h, c, h_backward, c_backward = _one_directional_loop(di=1)
-                hy.append(h)
-                cy.append(c)
-
-                h_backward.reverse()
-                # concat
-                xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
-                           zip(h_forward, h_backward)]
-            else:
-                # Uni-directional RNN
-                xs_next = h_forward
-
-        ys = xs_next
-        hy = stack.stack(hy)
-        cy = stack.stack(cy)
-        return hy, cy, tuple(ys)
+        return n_step_rnn.n_step_rnn_impl(
+            _lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs,
+            use_bi_direction)
+
+
+def _lstm(x, h, c, w, b):
+    xw = _stack_weight([w[2], w[0], w[1], w[3]])
+    hw = _stack_weight([w[6], w[4], w[5], w[7]])
+    xb = _stack_weight([b[2], b[0], b[1], b[3]])
+    hb = _stack_weight([b[6], b[4], b[5], b[7]])
+    lstm_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
+    c_bar, h_bar = lstm.lstm(c, lstm_in)
+    return h_bar, c_bar