Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/chainer/chainer into conc…
Browse files Browse the repository at this point in the history
…ise-install

# Conflicts:
#	docker/python2/Dockerfile
#	docker/python3/Dockerfile
  • Loading branch information
vilyair committed Feb 21, 2018
2 parents 349a283 + 2175a6f commit 9942510
Show file tree
Hide file tree
Showing 64 changed files with 977 additions and 1,164 deletions.
2 changes: 1 addition & 1 deletion README.md
@@ -1,6 +1,6 @@
<div align="center"><img src="docs/image/chainer_red_h.png" width="400"/></div>

# Chainer: a deep learning framework
# Chainer: A deep learning framework

[![pypi](https://img.shields.io/pypi/v/chainer.svg)](https://pypi.python.org/pypi/chainer)
[![GitHub license](https://img.shields.io/github/license/chainer/chainer.svg)](https://github.com/chainer/chainer)
Expand Down
2 changes: 1 addition & 1 deletion chainer/_version.py
@@ -1 +1 @@
__version__ = '4.0.0b3'
__version__ = '4.0.0b4'
2 changes: 2 additions & 0 deletions chainer/backends/cuda.py
Expand Up @@ -9,6 +9,7 @@
imported name original name
============================ =================================
``chainer.cuda.cupy`` :mod:`cupy`
``chainer.cuda.cupyx`` :mod:`cupyx`
``chainer.cuda.ndarray`` :class:`cupy.ndarray`
``chainer.cuda.cupy.cuda`` :mod:`cupy.cuda`
``chainer.cuda.Device`` :class:`cupy.cuda.Device`
Expand Down Expand Up @@ -40,6 +41,7 @@
import cupy
from cupy import cuda # NOQA
from cupy.cuda import cublas # NOQA
import cupyx # NOQA

from cupy import ndarray # NOQA

Expand Down
2 changes: 1 addition & 1 deletion chainer/functions/array/resize_images.py
Expand Up @@ -116,7 +116,7 @@ def forward(self, inputs):
if xp is numpy:
scatter_add = numpy.add.at
else:
scatter_add = xp.scatter_add
scatter_add = cuda.cupyx.scatter_add

gx = xp.zeros(self.input_shape, dtype=gy.dtype)
gy = gy.reshape(B, C, -1)
Expand Down
4 changes: 2 additions & 2 deletions chainer/functions/array/scatter_add.py
Expand Up @@ -44,7 +44,7 @@ def forward(self, xs):
if xp is numpy:
numpy.add.at(y, self.slices, b),
else:
xp.scatter_add(y, self.slices, b),
cuda.cupyx.scatter_add(y, self.slices, b),
return y,

def backward(self, indexes, grad_outputs):
Expand Down Expand Up @@ -94,7 +94,7 @@ def scatter_add(a, slices, b):
.. seealso::
:func:`numpy.add.at` and
:func:`cupy.scatter_add`.
:func:`cupyx.scatter_add`.
"""
y, = ScatterAdd(slices).apply((a, b))
Expand Down
2 changes: 1 addition & 1 deletion chainer/functions/array/spatial_transformer_sampler.py
Expand Up @@ -233,7 +233,7 @@ def _backward(self, inputs, grad_outputs):
if xp is numpy:
scatter_add = numpy.add.at
else:
scatter_add = xp.scatter_add
scatter_add = cuda.cupyx.scatter_add
gx = xp.zeros_like(x_pad)
gy = gy.reshape(B, C, -1)
for b in range(B):
Expand Down
93 changes: 22 additions & 71 deletions chainer/functions/connection/n_step_gru.py
@@ -1,20 +1,16 @@
import itertools

import numpy
import six

import chainer
from chainer.backends import cuda
from chainer.functions.activation import sigmoid
from chainer.functions.activation import tanh
from chainer.functions.array import concat
from chainer.functions.array import reshape
from chainer.functions.array import split_axis
from chainer.functions.array import stack
from chainer.functions.connection import linear
from chainer.functions.connection import n_step_rnn
from chainer.functions.connection.n_step_rnn import get_random_state
from chainer.functions.noise import dropout
from chainer.utils import argument


Expand Down Expand Up @@ -317,70 +313,25 @@ def n_step_gru_base(n_layers, dropout_ratio, hx, ws, bs, xs,
return hy, ys

else:
direction = 2 if use_bi_direction else 1
hx = split_axis.split_axis(hx, n_layers * direction, axis=0,
force_tuple=True)
hx = [reshape.reshape(h, h.shape[1:]) for h in hx]

xws = [concat.concat([w[0], w[1], w[2]], axis=0) for w in ws]
hws = [concat.concat([w[3], w[4], w[5]], axis=0) for w in ws]
xbs = [concat.concat([b[0], b[1], b[2]], axis=0) for b in bs]
hbs = [concat.concat([b[3], b[4], b[5]], axis=0) for b in bs]

xs_next = xs
hy = []
for layer in six.moves.range(n_layers):

def _one_directional_loop(di):
# di=0, forward GRU
# di=1, backward GRU
xs_list = xs_next if di == 0 else reversed(xs_next)
layer_idx = direction * layer + di
h = hx[layer_idx]
h_list = []
for x in xs_list:
batch = x.shape[0]
if h.shape[0] > batch:
h, h_rest = split_axis.split_axis(h, [batch], axis=0)
else:
h_rest = None

if layer > 0:
x = dropout.dropout(x, ratio=dropout_ratio)

gru_x = linear.linear(x, xws[layer_idx], xbs[layer_idx])
gru_h = linear.linear(h, hws[layer_idx], hbs[layer_idx])

W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

r = sigmoid.sigmoid(W_r_x + U_r_h)
z = sigmoid.sigmoid(W_z_x + U_z_h)
h_bar = tanh.tanh(W_x + r * U_x)
h_bar = (1 - z) * h_bar + z * h
if h_rest is not None:
h = concat.concat([h_bar, h_rest], axis=0)
else:
h = h_bar
h_list.append(h_bar)
return h, h_list

# Forward GRU
h, h_forward = _one_directional_loop(di=0)
hy.append(h)

if use_bi_direction:
# Backward GRU
h, h_backward = _one_directional_loop(di=1)
h_backward.reverse()
# Concat
xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
six.moves.zip(h_forward, h_backward)]
hy.append(h)
else:
# Uni-directional GRU
xs_next = h_forward

ys = xs_next
hy = stack.stack(hy)
return hy, tuple(ys)
hy, _, ys = n_step_rnn.n_step_rnn_impl(
_gru, n_layers, dropout_ratio, hx, None, ws, bs, xs,
use_bi_direction)
return hy, ys


def _gru(x, h, c, w, b):
xw = concat.concat([w[0], w[1], w[2]], axis=0)
hw = concat.concat([w[3], w[4], w[5]], axis=0)
xb = concat.concat([b[0], b[1], b[2]], axis=0)
hb = concat.concat([b[3], b[4], b[5]], axis=0)

gru_x = linear.linear(x, xw, xb)
gru_h = linear.linear(h, hw, hb)

W_r_x, W_z_x, W_x = split_axis.split_axis(gru_x, 3, axis=1)
U_r_h, U_z_h, U_x = split_axis.split_axis(gru_h, 3, axis=1)

r = sigmoid.sigmoid(W_r_x + U_r_h)
z = sigmoid.sigmoid(W_z_x + U_z_h)
h_bar = tanh.tanh(W_x + r * U_x)
return (1 - z) * h_bar + z * h, None
94 changes: 13 additions & 81 deletions chainer/functions/connection/n_step_lstm.py
@@ -1,19 +1,15 @@
import itertools

import numpy
import six

import chainer
from chainer.backends import cuda
from chainer.functions.activation import lstm
from chainer.functions.array import concat
from chainer.functions.array import reshape
from chainer.functions.array import split_axis
from chainer.functions.array import stack
from chainer.functions.connection import linear
from chainer.functions.connection import n_step_rnn
from chainer.functions.connection.n_step_rnn import get_random_state
from chainer.functions.noise import dropout
from chainer.utils import argument


Expand Down Expand Up @@ -449,80 +445,16 @@ def n_step_lstm_base(
return hy, cy, ys

else:
direction = 2 if use_bi_direction else 1
split_size = n_layers * direction
hx = split_axis.split_axis(hx, split_size, axis=0, force_tuple=True)
hx = [reshape.reshape(h, h.shape[1:]) for h in hx]
cx = split_axis.split_axis(cx, split_size, axis=0, force_tuple=True)
cx = [reshape.reshape(c, c.shape[1:]) for c in cx]

xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]

xs_next = xs
hy = []
cy = []
for layer in six.moves.range(n_layers):

def _one_directional_loop(di):
# di=0, forward LSTM
# di=1, backward LSTM
h_list = []
c_list = []
layer_idx = direction * layer + di
h = hx[layer_idx]
c = cx[layer_idx]
if di == 0:
xs_list = xs_next
else:
xs_list = reversed(xs_next)
for x in xs_list:
batch = x.shape[0]
if h.shape[0] > batch:
h, h_rest = split_axis.split_axis(h, [batch], axis=0)
c, c_rest = split_axis.split_axis(c, [batch], axis=0)
else:
h_rest = None
c_rest = None

if layer != 0:
x = dropout.dropout(x, ratio=dropout_ratio)
lstm_in = linear.linear(x, xws[layer_idx],
xbs[layer_idx]) + \
linear.linear(h, hws[layer_idx], hbs[layer_idx])

c_bar, h_bar = lstm.lstm(c, lstm_in)
if h_rest is not None:
h = concat.concat([h_bar, h_rest], axis=0)
c = concat.concat([c_bar, c_rest], axis=0)
else:
h = h_bar
c = c_bar
h_list.append(h_bar)
c_list.append(c_bar)
return h, c, h_list, c_list

h, c, h_forward, c_forward = _one_directional_loop(di=0)
hy.append(h)
cy.append(c)

if use_bi_direction:
# BiLSTM
h, c, h_backward, c_backward = _one_directional_loop(di=1)
hy.append(h)
cy.append(c)

h_backward.reverse()
# concat
xs_next = [concat.concat([hfi, hbi], axis=1) for (hfi, hbi) in
zip(h_forward, h_backward)]
else:
# Uni-directional RNN
xs_next = h_forward

ys = xs_next
hy = stack.stack(hy)
cy = stack.stack(cy)
return hy, cy, tuple(ys)
return n_step_rnn.n_step_rnn_impl(
_lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs,
use_bi_direction)


def _lstm(x, h, c, w, b):
xw = _stack_weight([w[2], w[0], w[1], w[3]])
hw = _stack_weight([w[6], w[4], w[5], w[7]])
xb = _stack_weight([b[2], b[0], b[1], b[3]])
hb = _stack_weight([b[6], b[4], b[5], b[7]])
lstm_in = linear.linear(x, xw, xb) + linear.linear(h, hw, hb)
c_bar, h_bar = lstm.lstm(c, lstm_in)
return h_bar, c_bar

0 comments on commit 9942510

Please sign in to comment.