From c0d97447d81221f031e81515600663dd6eb89579 Mon Sep 17 00:00:00 2001 From: karita Date: Tue, 19 Dec 2017 01:21:57 +0900 Subject: [PATCH 01/17] add travis test --- .travis.yml | 41 ++++++++++++++++++++++++++++++ test/test_e2e_model.py | 55 ++++++++++++++++++++++++++++++++++++++++ test/test_io_voxforge.py | 54 +++++++++++++++++++++++++-------------- test/test_loss.py | 37 +++++++++++++++++++++++++++ 4 files changed, 168 insertions(+), 19 deletions(-) create mode 100644 .travis.yml create mode 100644 test/test_e2e_model.py create mode 100644 test/test_loss.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000000..10847df475f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,41 @@ +language: python + +cache: + - pip + - ccache + +matrix: + include: + - os: linux + python: "2.7" + - os: linux + python: "3.6" + + +install: + - pip install -U pip wheel + - python setup.py sdist + - pip install dist/*.tar.gz + - pip install pytest hacking mock + - pip install autopep8 + - pip install -r tools/requirements.txt + - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp27-cp27mu-linux_x86_64.whl; fi + - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp27mu-linux_x86_64.whl; fi + - cd tools && make warp-ctc && cd - + + +script: + # - flake8 + # - autopep8 -r . --global-config .pep8 --diff | tee check_autopep8 + # - test ! -s check_autopep8 + - cd tests + - pytest test + +sudo: false + +addons: + apt: + packages: + - cmake + - python-dev + - python3-dev diff --git a/test/test_e2e_model.py b/test/test_e2e_model.py new file mode 100644 index 00000000000..1041eefde4d --- /dev/null +++ b/test/test_e2e_model.py @@ -0,0 +1,55 @@ +# coding: utf-8 + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + + +import sys +import argparse +sys.path.append("./src/nets") + + +import numpy + +import e2e_asr_attctc_th +import e2e_asr_attctc + +args = argparse.Namespace( + elayers = 4, + subsample = "1_2_2_1_1", + etype = "vggblstmp", + eunits = 100, + eprojs = 100, + dlayers=1, + dunits=300, + atype="location", + aconv_chans=10, + aconv_filts=100, + mtlalpha=0.5, + adim=320, + dropout_rate=0.0, + beam_size=3, + penalty=0.5, + maxlenratio=1.0, + minlenratio=0.0, + verbose = True, + char_list = [u"あ", u"い", u"う", u"え", u"お"], + outdir = None +) + + + +def test_model_trainable_and_decodable(): + for m in [e2e_asr_attctc, e2e_asr_attctc_th]: + model = m.Loss(m.E2E(40, 5, args), 0.5) + out_data = "1 2 3 4" + data = [ + ("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid=out_data)), + ("bbb", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid=out_data)) + ] + attn_loss = model(data) + attn_loss.backward() # trainable + + in_data = data[0][1]["feat"] + y = model.predictor.recognize(in_data, args, args.char_list) # decodable + diff --git a/test/test_io_voxforge.py b/test/test_io_voxforge.py index 52c8b164d2e..cae00b35c6e 100644 --- a/test/test_io_voxforge.py +++ b/test/test_io_voxforge.py @@ -1,22 +1,38 @@ # coding: utf-8 +import sys +sys.path.append("./src/utils") + +import os import numpy -import kaldi_io -import kaldi_io_py -import lazy_io - -train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp" - -r1 = kaldi_io_py.read_mat_scp(train_scp) -r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp) -r3 = lazy_io.read_dict_scp(train_scp) - -for k, v1 in r1: - k = str(k) - print(k) - v2 = r2[k] - v3 = r3[k] - assert v1.shape == v2.shape - assert v1.shape == v3.shape - numpy.testing.assert_allclose(v1, v2, atol=1e-5) - numpy.testing.assert_allclose(v1, v3, atol=0) + + +# TODO: use much smaller corpus like AN4 and download if it does not exists +def test_voxforge_feats(): + import kaldi_io_py + import lazy_io + try: + import kaldi_io + except: + print("skip test_voxforge_feats because kaldi_io (kaldi-python) is not installed") + return + + + train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp" + if not os.path.exists(train_scp): + print("skip test_voxforge_feats because voxforge scp has not been created") + return + + r1 = kaldi_io_py.read_mat_scp(train_scp) + r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp) + r3 = lazy_io.read_dict_scp(train_scp) + + for k, v1 in r1: + k = str(k) + print(k) + v2 = r2[k] + v3 = r3[k] + assert v1.shape == v2.shape + assert v1.shape == v3.shape + numpy.testing.assert_allclose(v1, v2, atol=1e-5) + numpy.testing.assert_allclose(v1, v3, atol=0) diff --git a/test/test_loss.py b/test/test_loss.py new file mode 100644 index 00000000000..f65f7137518 --- /dev/null +++ b/test/test_loss.py @@ -0,0 +1,37 @@ +# coding: utf-8 + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + + +import sys +sys.path.append("./src/nets") + +import numpy +import chainer +import chainer.functions as F +import torch +from warpctc_pytorch import CTCLoss +from e2e_asr_attctc_th import pad_list + + +def test_loss(): + n_out = 7 + n_batch = 3 + input_length = numpy.array([11, 17, 15], dtype=numpy.int32) + label_length = numpy.array([4, 2, 3], dtype=numpy.int32) + np_pred = [numpy.random.rand(il, n_out).astype(numpy.float32) for il in input_length] + np_target = [numpy.random.randint(0, n_out, size=ol, dtype=numpy.int32) for ol in label_length] + + # NOTE: np_pred[i] seems to be transposed and used axis=-1 in e2e_asr_attctc.py + ch_pred = F.separate(F.pad_sequence(np_pred), axis=-2) + ch_target = F.pad_sequence(np_target, padding=-1) + ch_loss = F.connectionist_temporal_classification(ch_pred, ch_target, 0, input_length, label_length).data + + th_pred = pad_list([torch.autograd.Variable(torch.from_numpy(x)) for x in np_pred]).transpose(0, 1) + th_target = torch.autograd.Variable(torch.from_numpy(numpy.concatenate(np_target))) + th_ilen = torch.autograd.Variable(torch.from_numpy(input_length)) + th_olen = torch.autograd.Variable(torch.from_numpy(label_length)) + # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does + th_loss = (CTCLoss()(th_pred, th_target, th_ilen, th_olen) / n_batch).data.numpy()[0] + numpy.testing.assert_allclose(th_loss, ch_loss, 0.05) From 23f5696fe79bbe5f0d9db752b7a2c37434d5a2ae Mon Sep 17 00:00:00 2001 From: karita Date: Tue, 19 Dec 2017 01:24:41 +0900 Subject: [PATCH 02/17] fix .travis.yml --- .travis.yml | 15 ++++++----- src/nets/e2e_asr_attctc.py | 47 ++++++++++++++++++++++++++++++++ src/nets/e2e_asr_attctc_th.py | 50 ++--------------------------------- test/test_e2e_model.py | 16 ++++++----- test/test_io_voxforge.py | 10 +++---- test/test_loss.py | 19 ++++++++----- tools/Makefile | 14 ++++++++++ 7 files changed, 98 insertions(+), 73 deletions(-) diff --git a/.travis.yml b/.travis.yml index 10847df475f..8c693adb209 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,21 +14,22 @@ matrix: install: - pip install -U pip wheel - - python setup.py sdist - - pip install dist/*.tar.gz - pip install pytest hacking mock - pip install autopep8 - - pip install -r tools/requirements.txt - - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp27-cp27mu-linux_x86_64.whl; fi - - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp27mu-linux_x86_64.whl; fi - - cd tools && make warp-ctc && cd - + # unable to install pytorch as https://github.com/pytorch/pytorch/issues/4178 + # - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp27-cp27mu-linux_x86_64.whl; fi + # - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl; fi + # - cd tools && make warp-ctc && cd - + - grep -v cupy tools/requirements.txt | pip install -r /dev/stdin + - cd tools && git clone https://github.com/vesis84/kaldi-io-for-python.git && cd - script: + # TODO test coding style? # - flake8 # - autopep8 -r . --global-config .pep8 --diff | tee check_autopep8 # - test ! -s check_autopep8 - - cd tests + - export PYTHONPATH=`pwd`/src/nets:`pwd`/src/utils - pytest test sudo: false diff --git a/src/nets/e2e_asr_attctc.py b/src/nets/e2e_asr_attctc.py index c7715ee232b..542b0a73d6a 100644 --- a/src/nets/e2e_asr_attctc.py +++ b/src/nets/e2e_asr_attctc.py @@ -793,3 +793,50 @@ def __call__(self, xs, ilens): xs = [xs[i, :ilens[i], :] for i in range(len(ilens))] return xs, ilens + + +if __name__ == '__main__': + import numpy + # from typing import NamedTuple + from argparse import Namespace + args = Namespace( + elayers = 4, + subsample = "1_2_2_1_1", + etype = "vggblstmp", + eunits = 100, + eprojs = 100, + dlayers=1, + dunits=300, + # attention related + atype="location", + aconv_chans=10, + aconv_filts=100, + mtlalpha=0.5, + # defaults + adim=320, + dropout_rate=0.0, + beam_size=3, + penalty=0.5, + + maxlenratio=1.0, + minlenratio=0.0, + + verbose = True, + char_list = ["a", "b", "c", "d", "e"], + outdir = None + ) + + model = Loss(E2E(40, 5, args), 0.5) + out_data = "1 2 3 4" + data = [ + ("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid=out_data)), + ("bbb", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid=out_data)) + ] + attn_loss = model(data) + print(attn_loss) + attn_loss.backward() + + in_data = data[0][1]["feat"] + y = model.predictor.recognize(in_data, args, args.char_list) + print(y) + print("OK") diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index 76897ea2074..46b4832dacb 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -84,7 +84,7 @@ def report(self, loss_ctc, loss_att, acc, mtl_loss): # TODO merge Loss and E2E: there is no need to make these separately class Loss(torch.nn.Module): - def __init__(self, predictor, mtlalpha=0.0): + def __init__(self, predictor, mtlalpha): super(Loss, self).__init__() self.mtlalpha = mtlalpha self.loss = None @@ -104,7 +104,7 @@ def forward(self, x): self.loss = alpha * loss_ctc + (1 - alpha) * loss_att if self.loss.data[0] < CTC_LOSS_THRESHOLD and not math.isnan(self.loss.data[0]): - self.reporter.report(loss_ctc, loss_att.data[0], acc, self.loss.data[0]) + self.reporter.report(loss_ctc.data[0], loss_att.data[0], acc, self.loss.data[0]) else: logging.warning('loss (=%f) is not correct', self.loss.data) @@ -847,49 +847,3 @@ def forward(self, xs, ilens): xs = [xs[i, :ilens[i]] for i in range(len(ilens))] xs = pad_list(xs, 0.0) return xs, ilens - - -if __name__ == '__main__': - import numpy - from typing import NamedTuple - class args(NamedTuple): - elayers = 4 - subsample = "1_2_2_1_1" - etype = "vggblstmp" - eunits = 100 - eprojs = 100 - dlayers=1 - dunits=300 - # attention related - atype="location" - aconv_chans=10 - aconv_filts=100 - mtlalpha=0.5 - # defaults - adim=320 - dropout_rate=0.0 - beam_size=3 - penalty=0.5 - - maxlenratio=1.0 - minlenratio=0.0 - - verbose = True - char_list = ["a", "b", "c", "d", "e"] - outdir = None - - model = Loss(E2E(40, 5, args)) - model.cuda() - out_data = "1 2 3 4" - data = [ - ("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid=out_data)), - ("bbb", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid=out_data)) - ] - attn_loss = model(data) - print(attn_loss.data[0]) - attn_loss.backward() - - in_data = data[0][1]["feat"] - y = model.predictor.recognize(in_data, args, args.char_list) - print(y) - print("OK") diff --git a/test/test_e2e_model.py b/test/test_e2e_model.py index 1041eefde4d..2ced58c04b4 100644 --- a/test/test_e2e_model.py +++ b/test/test_e2e_model.py @@ -4,15 +4,12 @@ # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) -import sys +import importlib import argparse -sys.path.append("./src/nets") - +import pytest import numpy -import e2e_asr_attctc_th -import e2e_asr_attctc args = argparse.Namespace( elayers = 4, @@ -40,7 +37,14 @@ def test_model_trainable_and_decodable(): - for m in [e2e_asr_attctc, e2e_asr_attctc_th]: + for m_str in ["e2e_asr_attctc", "e2e_asr_attctc_th"]: + try: + import torch + except: + if m_str[-3:] == "_th": + pytest.skip("pytorch is not installed") + + m = importlib.import_module(m_str) model = m.Loss(m.E2E(40, 5, args), 0.5) out_data = "1 2 3 4" data = [ diff --git a/test/test_io_voxforge.py b/test/test_io_voxforge.py index cae00b35c6e..44d12d5481b 100644 --- a/test/test_io_voxforge.py +++ b/test/test_io_voxforge.py @@ -1,8 +1,8 @@ # coding: utf-8 +import os import sys -sys.path.append("./src/utils") -import os +import pytest import numpy @@ -13,14 +13,12 @@ def test_voxforge_feats(): try: import kaldi_io except: - print("skip test_voxforge_feats because kaldi_io (kaldi-python) is not installed") - return + pytest.skip("kaldi_io (kaldi-python) is not installed") train_scp = "scp:egs/voxforge/asr1/data/tr_it/feats.scp" if not os.path.exists(train_scp): - print("skip test_voxforge_feats because voxforge scp has not been created") - return + pytest.skip("voxforge scp has not been created") r1 = kaldi_io_py.read_mat_scp(train_scp) r2 = kaldi_io.RandomAccessBaseFloatMatrixReader(train_scp) diff --git a/test/test_loss.py b/test/test_loss.py index f65f7137518..a4fea589515 100644 --- a/test/test_loss.py +++ b/test/test_loss.py @@ -4,18 +4,25 @@ # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) -import sys -sys.path.append("./src/nets") - +import pytest import numpy import chainer import chainer.functions as F -import torch -from warpctc_pytorch import CTCLoss -from e2e_asr_attctc_th import pad_list def test_loss(): + try: + import torch + except: + pytest.skip("pytorch is not installed") + try: + from warpctc_pytorch import CTCLoss + except: + pytest.skip("warpctc_pytorch is not installed") + + from e2e_asr_attctc_th import pad_list + + n_out = 7 n_batch = 3 input_length = numpy.array([11, 17, 15], dtype=numpy.int32) diff --git a/tools/Makefile b/tools/Makefile index 28ad00fba2f..4842c2b38e5 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,3 +1,6 @@ +.PHONY: all clean + + all: kaldi venv venv/bin/activate nkf kaldi-io-for-python.git kaldi-io-for-python.git: @@ -27,3 +30,14 @@ nkf: clean: rm -fr kaldi_github kaldi kaldi_python venv nkf kaldi-io-for-python ../src/utils/kaldi_io_py.py find -iname "*.pyc" -delete + + +# optional deps for pytorch backends +pytorch: + pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp27-cp27mu-linux_x86_64.whl + +warp-ctc: + git clone https://github.com/SeanNaren/warp-ctc.git + cd warp-ctc && mkdir build && cd build && cmake .. && make -j4 + pip install cffi + cd warp-ctc/pytorch_binding && python setup.py install # maybe need to: apt-get install python-dev From a7a43f370b55858284ca66e3ba1ef10d32fe1097 Mon Sep 17 00:00:00 2001 From: karita Date: Tue, 19 Dec 2017 14:17:50 +0900 Subject: [PATCH 03/17] make pytorch initialization consistent to chainer and test partialy --- src/nets/e2e_asr_attctc_th.py | 27 ++++++++++++++- test/test_initialization.py | 62 +++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 test/test_initialization.py diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index 46b4832dacb..b8e65904895 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -40,6 +40,29 @@ def _ilens_to_index(ilens): return x[1:] +def lecun_normal_init_parameters(module): + for p in module.parameters(): + data = p.data + if data.dim() == 1: + # bias + data.zero_() + elif data.dim() == 2: + # linear weight + n = data.size(1) + stdv = 1. / math.sqrt(n) + data.normal_(0, stdv) + elif data.dim() == 4: + # conv weight + n = data.size(1) + for k in data.size()[2:]: + n *= k + stdv = 1. / math.sqrt(n) + data.normal_(0, stdv) + else: + raise NotImplementedError + + + # get output dim for latter BLSTM def _get_vgg2l_odim(idim, in_channel=3, out_channel=128): idim = idim / in_channel @@ -161,6 +184,8 @@ def __init__(self, idim, odim, args): # decoder self.dec = Decoder(args.eprojs, odim, args.dlayers, args.dunits, self.sos, self.eos, self.att, self.verbose, self.char_list) + # maybe consistent to chainer + lecun_normal_init_parameters(self) # x[i]: ('utt_id', {'ilen':'xxx',...}}) def forward(self, data): @@ -272,7 +297,7 @@ def forward(self, hpad, ilens, ys): # expected shape of seqLength x batchSize x alphabet_size y_hat = y_hat.transpose(0, 1) - self.loss = to_cuda(self, self.loss_fn(y_hat, y_true, ilens, olens)) + self.loss = to_cuda(self, self.loss_fn(y_hat, y_true, ilens, olens)) / len(ys) logging.info('ctc loss:' + str(self.loss.data[0])) return self.loss diff --git a/test/test_initialization.py b/test/test_initialization.py new file mode 100644 index 00000000000..79df38932f3 --- /dev/null +++ b/test/test_initialization.py @@ -0,0 +1,62 @@ +# coding: utf-8 + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + + +import importlib +import argparse + +import pytest +import numpy + + +args = argparse.Namespace( + elayers = 4, + subsample = "1_2_2_1_1", + etype = "vggblstmp", + eunits = 100, + eprojs = 100, + dlayers=1, + dunits=300, + atype="location", + aconv_chans=10, + aconv_filts=100, + mtlalpha=0.5, + adim=320, + dropout_rate=0.0, + beam_size=3, + penalty=0.5, + maxlenratio=1.0, + minlenratio=0.0, + verbose = True, + char_list = [u"あ", u"い", u"う", u"え", u"お"], + outdir = None +) + + + +def test_lecun_init_torch(): + try: + import torch + except ImportError: + pytest.skip("pytorch is not installed") + + import e2e_asr_attctc_th as m + model = m.Loss(m.E2E(40, 5, args), 0.5) + b = model.predictor.ctc.ctc_lo.bias.data.numpy() + assert numpy.all(b == 0.0) + w = model.predictor.ctc.ctc_lo.weight.data.numpy() + numpy.testing.assert_allclose(w.mean(), 0.0, 1e-2, 1e-2) + numpy.testing.assert_allclose(w.var(), 1.0 / w.shape[1], 1e-2, 1e-2) + + +def test_lecun_init_chainer(): + import e2e_asr_attctc as m + model = m.Loss(m.E2E(40, 5, args), 0.5) + b = model.predictor.ctc.ctc_lo.b.data + assert numpy.all(b == 0.0) + w = model.predictor.ctc.ctc_lo.W.data + numpy.testing.assert_allclose(w.mean(), 0.0, 1e-2, 1e-2) + numpy.testing.assert_allclose(w.var(), 1.0 / w.shape[1], 1e-2, 1e-2) + From 2462ce3a827e88cc0ce8a897f4c2fcc900ca597a Mon Sep 17 00:00:00 2001 From: karita Date: Tue, 19 Dec 2017 14:24:15 +0900 Subject: [PATCH 04/17] fix encoding utf-8 --- src/nets/e2e_asr_attctc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nets/e2e_asr_attctc.py b/src/nets/e2e_asr_attctc.py index 542b0a73d6a..1eb70c14dff 100644 --- a/src/nets/e2e_asr_attctc.py +++ b/src/nets/e2e_asr_attctc.py @@ -466,8 +466,8 @@ def __call__(self, hs, ys): idx_true = y_true_[y_true_ != -1] seq_hat = [self.char_list[int(idx)] for idx in idx_hat] seq_true = [self.char_list[int(idx)] for idx in idx_true] - seq_hat = "".join(seq_hat).encode('utf-8').replace('', ' ') - seq_true = "".join(seq_true).encode('utf-8').replace('', ' ') + seq_hat = "".join(seq_hat).replace('', ' ') + seq_true = "".join(seq_true).replace('', ' ') logging.info("groundtruth[%d]: " + seq_true, i) logging.info("prediction [%d]: " + seq_hat, i) From d3f6bb102c02c85602bdd0c47d9f853ed4f9f293 Mon Sep 17 00:00:00 2001 From: karita Date: Wed, 20 Dec 2017 00:40:16 +0900 Subject: [PATCH 05/17] more compatible chainer/pytorch --- src/bin/asr_train_th.py | 2 +- src/nets/e2e_asr_attctc_th.py | 34 +++++++++++++++++++++++- src/utils/concatjson.py | 2 +- src/utils/filt.py | 2 +- src/utils/json2trn.py | 2 +- src/utils/mergejson.py | 2 +- src/utils/scp2json.py | 2 +- src/utils/text2token.py | 2 +- test/test_initialization.py | 34 ++++++++++++++++++++++-- test/test_loss.py | 49 ++++++++++++++++++++++++++++++++++- 10 files changed, 120 insertions(+), 11 deletions(-) diff --git a/src/bin/asr_train_th.py b/src/bin/asr_train_th.py index ee71322d3d9..2fd3b670c74 100755 --- a/src/bin/asr_train_th.py +++ b/src/bin/asr_train_th.py @@ -460,7 +460,7 @@ def main(): # Setup an optimizer if args.opt == 'adadelta': - optimizer = torch.optim.Adadelta(model.parameters(), eps=args.eps) + optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index b8e65904895..5ba495a7382 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -144,6 +144,12 @@ def pad_list(xs, pad_value=float("nan")): return pad +def set_forget_bias_to_one(bias): + n = bias.size(0) + start, end = n//4, n//2 + bias.data[start:end].fill_(1.) + + class E2E(torch.nn.Module): def __init__(self, idim, odim, args): super(E2E, self).__init__() @@ -184,9 +190,35 @@ def __init__(self, idim, odim, args): # decoder self.dec = Decoder(args.eprojs, odim, args.dlayers, args.dunits, self.sos, self.eos, self.att, self.verbose, self.char_list) - # maybe consistent to chainer + + # weight initialization + self.init_like_chainer() + # additional forget-bias init in encoder ? + # for m in self.modules(): + # if isinstance(m, torch.nn.LSTM): + # for name, p in m.named_parameters(): + # if "bias_ih" in name: + # set_forget_bias_to_one(p) + + def init_like_chainer(self): + """ + chainer basically uses LeCun way: W ~ Normal(0, fan_in ** -0.5), b = 0 + pytorch basically uses W, b ~ Uniform(-fan_in**-0.5, fan_in**-0.5) + + however, there are two exceptions as far as I know. + - EmbedID.W ~ Normal(0, 1) + - LSTM.upward.b[forget_gate_range] = 1 (but not used in NStepLSTM) + """ lecun_normal_init_parameters(self) + # exceptions + # embed weight ~ Normal(0, 1) + self.dec.embed.weight.data.normal_(0, 1) + # forget-bias = 1.0 + # https://discuss.pytorch.org/t/set-forget-gate-bias-of-lstm/1745 + set_forget_bias_to_one(self.dec.decoder.bias_ih) + + # x[i]: ('utt_id', {'ilen':'xxx',...}}) def forward(self, data): ''' diff --git a/src/utils/concatjson.py b/src/utils/concatjson.py index f187195bc43..08e70d35b91 100755 --- a/src/utils/concatjson.py +++ b/src/utils/concatjson.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # encoding: utf-8 # Copyright 2017 Johns Hopkins University (Shinji Watanabe) diff --git a/src/utils/filt.py b/src/utils/filt.py index 7bb9e3535be..1a3fb46398f 100755 --- a/src/utils/filt.py +++ b/src/utils/filt.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # Apache 2.0 diff --git a/src/utils/json2trn.py b/src/utils/json2trn.py index 6b274c76cce..c0cea44a3ce 100755 --- a/src/utils/json2trn.py +++ b/src/utils/json2trn.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # encoding: utf-8 # Copyright 2017 Johns Hopkins University (Shinji Watanabe) diff --git a/src/utils/mergejson.py b/src/utils/mergejson.py index 9894b7438db..ef8500e37bb 100755 --- a/src/utils/mergejson.py +++ b/src/utils/mergejson.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # encoding: utf-8 # Copyright 2017 Johns Hopkins University (Shinji Watanabe) diff --git a/src/utils/scp2json.py b/src/utils/scp2json.py index 00b5eee0399..ad05534c62c 100755 --- a/src/utils/scp2json.py +++ b/src/utils/scp2json.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # encoding: utf-8 # Copyright 2017 Johns Hopkins University (Shinji Watanabe) diff --git a/src/utils/text2token.py b/src/utils/text2token.py index d72993db0af..a8dd1670db4 100755 --- a/src/utils/text2token.py +++ b/src/utils/text2token.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 # Copyright 2017 Johns Hopkins University (Shinji Watanabe) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) diff --git a/test/test_initialization.py b/test/test_initialization.py index 79df38932f3..4618f5fdc87 100644 --- a/test/test_initialization.py +++ b/test/test_initialization.py @@ -15,8 +15,8 @@ elayers = 4, subsample = "1_2_2_1_1", etype = "vggblstmp", - eunits = 100, - eprojs = 100, + eunits = 320, + eprojs = 320, dlayers=1, dunits=300, atype="location", @@ -50,6 +50,22 @@ def test_lecun_init_torch(): numpy.testing.assert_allclose(w.mean(), 0.0, 1e-2, 1e-2) numpy.testing.assert_allclose(w.var(), 1.0 / w.shape[1], 1e-2, 1e-2) + for name, p in model.named_parameters(): + print(name) + data = p.data.numpy() + if "embed" in name: + numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) + numpy.testing.assert_allclose(data.var(), 1.0, 5e-2, 5e-2) + elif "predictor.dec.decoder.bias_ih" in name: + assert data.sum() == data.size // 4 + elif data.ndim == 1: + assert numpy.all(data == 0.0) + else: + numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) + numpy.testing.assert_allclose(data.var(), 1.0 / numpy.prod(data.shape[1:]), 5e-2, 5e-2) + + + def test_lecun_init_chainer(): import e2e_asr_attctc as m @@ -59,4 +75,18 @@ def test_lecun_init_chainer(): w = model.predictor.ctc.ctc_lo.W.data numpy.testing.assert_allclose(w.mean(), 0.0, 1e-2, 1e-2) numpy.testing.assert_allclose(w.var(), 1.0 / w.shape[1], 1e-2, 1e-2) + + for name, p in model.namedparams(): + print(name) + data = p.data + if "decoder/upward/b" in name: + assert data.sum() == data.size // 4 + elif "embed" in name: + numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) + numpy.testing.assert_allclose(data.var(), 1.0, 5e-2, 5e-2) + elif data.ndim == 1: + assert numpy.all(data == 0.0) + else: + numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) + numpy.testing.assert_allclose(data.var(), 1.0 / numpy.prod(data.shape[1:]), 5e-2, 5e-2) diff --git a/test/test_loss.py b/test/test_loss.py index a4fea589515..3a396a7ee11 100644 --- a/test/test_loss.py +++ b/test/test_loss.py @@ -10,7 +10,7 @@ import chainer.functions as F -def test_loss(): +def test_ctc_loss(): try: import torch except: @@ -42,3 +42,50 @@ def test_loss(): # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does th_loss = (CTCLoss()(th_pred, th_target, th_ilen, th_olen) / n_batch).data.numpy()[0] numpy.testing.assert_allclose(th_loss, ch_loss, 0.05) + + + +def test_attn_loss(): + try: + import torch + except: + pytest.skip("pytorch is not installed") + from e2e_asr_attctc_th import pad_list + + n_out = 7 + _sos = n_out - 1 + _eos = n_out - 1 + n_batch = 3 + label_length = numpy.array([4, 2, 3], dtype=numpy.int32) + np_pred = numpy.random.rand(n_batch, max(label_length) + 1, n_out).astype(numpy.float32) + # NOTE: 0 is only used for CTC, never appeared in attn target + np_target = [numpy.random.randint(1, n_out-1, size=ol, dtype=numpy.int32) for ol in label_length] + + eos = numpy.array([_eos], 'i') + sos = numpy.array([_sos], 'i') + ys_in = [F.concat([sos, y], axis=0) for y in np_target] + ys_out = [F.concat([y, eos], axis=0) for y in np_target] + + # padding for ys with -1 + # pys: utt x olen + pad_ys_in = F.pad_sequence(ys_in, padding=_eos) + pad_ys_out = F.pad_sequence(ys_out, padding=-1) # NOTE: -1 is default ignore index for chainer + + y_all = F.reshape(np_pred, (n_batch * (max(label_length) + 1), n_out)) + ch_loss = F.softmax_cross_entropy(y_all, F.concat(pad_ys_out, axis=0)) + + # NOTE: this index 0 is only for CTC not attn. so it can be ignored + # unfortunately, torch cross_entropy does not accept out-of-bound ids + th_ignore = 0 + th_pred = torch.autograd.Variable(torch.from_numpy(y_all.data)) + th_target = pad_list([torch.autograd.Variable(torch.from_numpy(t.data)).long() + for t in ys_out], th_ignore) + th_olen = torch.autograd.Variable(torch.from_numpy(label_length)) + th_loss = torch.nn.functional.cross_entropy(th_pred, th_target.view(-1), + ignore_index=th_ignore, size_average=True) + print(ch_loss) + print(th_loss) + + # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does + + numpy.testing.assert_allclose(th_loss.data[0], ch_loss.data, 0.05) From fe60ae8fb0469761b8f8e3fe1a791e2655c57b24 Mon Sep 17 00:00:00 2001 From: karita Date: Wed, 20 Dec 2017 00:49:48 +0900 Subject: [PATCH 06/17] remove unused main --- src/nets/e2e_asr_attctc.py | 47 -------------------------------------- 1 file changed, 47 deletions(-) diff --git a/src/nets/e2e_asr_attctc.py b/src/nets/e2e_asr_attctc.py index 1eb70c14dff..943fdaa14d3 100644 --- a/src/nets/e2e_asr_attctc.py +++ b/src/nets/e2e_asr_attctc.py @@ -793,50 +793,3 @@ def __call__(self, xs, ilens): xs = [xs[i, :ilens[i], :] for i in range(len(ilens))] return xs, ilens - - -if __name__ == '__main__': - import numpy - # from typing import NamedTuple - from argparse import Namespace - args = Namespace( - elayers = 4, - subsample = "1_2_2_1_1", - etype = "vggblstmp", - eunits = 100, - eprojs = 100, - dlayers=1, - dunits=300, - # attention related - atype="location", - aconv_chans=10, - aconv_filts=100, - mtlalpha=0.5, - # defaults - adim=320, - dropout_rate=0.0, - beam_size=3, - penalty=0.5, - - maxlenratio=1.0, - minlenratio=0.0, - - verbose = True, - char_list = ["a", "b", "c", "d", "e"], - outdir = None - ) - - model = Loss(E2E(40, 5, args), 0.5) - out_data = "1 2 3 4" - data = [ - ("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid=out_data)), - ("bbb", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid=out_data)) - ] - attn_loss = model(data) - print(attn_loss) - attn_loss.backward() - - in_data = data[0][1]["feat"] - y = model.predictor.recognize(in_data, args, args.char_list) - print(y) - print("OK") From f08c91162a06243263b85a182c8518d3f8ef4dca Mon Sep 17 00:00:00 2001 From: karita Date: Wed, 20 Dec 2017 02:04:42 +0900 Subject: [PATCH 07/17] make accuracy consistent --- src/nets/e2e_asr_attctc_th.py | 13 ++++++----- test/test_loss.py | 42 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index 5ba495a7382..33ff3f0079b 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -474,6 +474,13 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): return c, w +def th_accuracy(y_all, pad_target, ignore_label): + acc = 0 + pad_pred = y_all.data.view(pad_target.size(0), pad_target.size(1), y_all.size(1)).max(2)[1] + mask = pad_target.data != ignore_label + return torch.sum(pad_pred.masked_select(mask) == pad_target.data.masked_select(mask)) / torch.sum(mask) + + # ------------- Decoder Network ---------------------------------------------------------------------------------------- class Decoder(torch.nn.Module): def __init__(self, eprojs, odim, dlayers, dunits, sos, eos, att, verbose=0, char_list=None): @@ -551,11 +558,7 @@ def forward(self, hpad, hlen, ys): # -1: eos, which is removed in the loss computation self.loss *= (np.mean([len(x) for x in ys_in]) - 1) # acc = F.accuracy(y_all, F.concat(pad_ys_out, axis=0), ignore_label=-1) - acc = 0 - pred_pad = y_all.data.view(len(ys), olength, y_all.size(1)).max(2)[1] - for i in range(len(ys)): - acc += torch.sum(pred_pad[i, :ys[i].size(0)] == ys[i].data) - acc /= sum(map(len, ys)) + acc = th_accuracy(y_all, pad_ys_out, ignore_label=ignore_id) logging.info('att loss:' + str(self.loss.data)) # show predicted character sequence for debug diff --git a/test/test_loss.py b/test/test_loss.py index 3a396a7ee11..0c758d20e23 100644 --- a/test/test_loss.py +++ b/test/test_loss.py @@ -89,3 +89,45 @@ def test_attn_loss(): # NOTE: warpctc_pytorch.CTCLoss does not normalize itself by batch-size while chainer's default setting does numpy.testing.assert_allclose(th_loss.data[0], ch_loss.data, 0.05) + + + +def test_train_acc(): + try: + import torch + except: + pytest.skip("pytorch is not installed") + from e2e_asr_attctc_th import pad_list, th_accuracy + + n_out = 7 + _sos = n_out - 1 + _eos = n_out - 1 + n_batch = 3 + label_length = numpy.array([4, 2, 3], dtype=numpy.int32) + np_pred = numpy.random.rand(n_batch, max(label_length) + 1, n_out).astype(numpy.float32) + # NOTE: 0 is only used for CTC, never appeared in attn target + np_target = [numpy.random.randint(1, n_out-1, size=ol, dtype=numpy.int32) for ol in label_length] + + eos = numpy.array([_eos], 'i') + sos = numpy.array([_sos], 'i') + ys_in = [F.concat([sos, y], axis=0) for y in np_target] + ys_out = [F.concat([y, eos], axis=0) for y in np_target] + + # padding for ys with -1 + # pys: utt x olen + pad_ys_in = F.pad_sequence(ys_in, padding=_eos) + pad_ys_out = F.pad_sequence(ys_out, padding=-1) # NOTE: -1 is default ignore index for chainer + y_all = F.reshape(np_pred, (n_batch * (max(label_length) + 1), n_out)) + ch_acc = F.accuracy(y_all, F.concat(pad_ys_out, axis=0), ignore_label=-1) + + # NOTE: this index 0 is only for CTC not attn. so it can be ignored + # unfortunately, torch cross_entropy does not accept out-of-bound ids + th_ignore = 0 + th_pred = torch.autograd.Variable(torch.from_numpy(y_all.data)) + th_ys = [torch.autograd.Variable(torch.from_numpy(numpy.append(t, eos))).long() + for t in np_target] + th_target = pad_list(th_ys, th_ignore) + th_olen = torch.autograd.Variable(torch.from_numpy(label_length)) + th_acc = th_accuracy(th_pred, th_target, th_ignore) + + numpy.testing.assert_allclose(ch_acc.data, th_acc) From 4d48754f781641e2dd51bfa98b89d676836f2d06 Mon Sep 17 00:00:00 2001 From: karita Date: Wed, 20 Dec 2017 18:26:51 +0900 Subject: [PATCH 08/17] add more test --- src/bin/asr_train.py | 2 +- src/bin/asr_train_th.py | 19 +------- src/nets/e2e_asr_attctc.py | 11 +++-- src/nets/e2e_asr_attctc_th.py | 86 +++++++++++++++++++---------------- test/test_torch.py | 35 ++++++++++++++ 5 files changed, 91 insertions(+), 62 deletions(-) create mode 100644 test/test_torch.py diff --git a/src/bin/asr_train.py b/src/bin/asr_train.py index 6b0dd36d142..14688c4456f 100755 --- a/src/bin/asr_train.py +++ b/src/bin/asr_train.py @@ -281,7 +281,7 @@ def main(): # network archtecture # encoder parser.add_argument('--etype', default='blstmp', type=str, - choices=['blstmp', 'vggblstmp', 'vggblstm'], + choices=['blstm', 'blstmp', 'vggblstmp', 'vggblstm'], help='Type of encoder network architecture') parser.add_argument('--elayers', default=4, type=int, help='Number of encoder layers') diff --git a/src/bin/asr_train_th.py b/src/bin/asr_train_th.py index 2fd3b670c74..88d358eb8ec 100755 --- a/src/bin/asr_train_th.py +++ b/src/bin/asr_train_th.py @@ -3,23 +3,6 @@ # Copyright 2017 Johns Hopkins University (Shinji Watanabe) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) -""" -options (batch_size is only changed because of my poor GPU at home): --gpu -1 --outdir exp/train_si284_vggblstmp_e4_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150/results --debugmode 1 --dict data/lang_1char/train_si284_units.txt --debugdir exp/train_si284_vggblstmp_e4_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150 --minibatches 0 --verbose 0 --train-feat scp:dump/train_si284/deltafalse/feats.scp --valid-feat scp:dump/test_dev93/deltafalse/feats.scp --train-label dump/train_si284/deltafalse/data.json --valid-label dump/test_dev93/deltafalse/data.json --etype blstmp --elayers 4 --eunits 320 --eprojs 320 --subsample 1_2_2_1_1 --dlayers 1 --dunits 300 --atype location --aconv-chans 10 --aconv-filts 100 --mtlalpha 0.5 --batch-size 5 --maxlen-in 800 --maxlen-out 150 --opt adadelta --epochs 15 --gpu 0 - - -chainer result -this epoch [#.................................................] 3.13% - 400 iter, 0 epoch / 15 epochs - 0.67657 iters/sec. Estimated time to finish: 3 days, 6:31:44.616061. - - -pytorch result -this epoch [#.................................................] 2.35% - 300 iter, 0 epoch / 15 epochs - 1.4973 iters/sec. Estimated time to finish: 1 day, 11:30:13.571661. - -""" - import os import copy @@ -305,7 +288,7 @@ def main(): # network archtecture # encoder parser.add_argument('--etype', default='blstmp', type=str, - choices=['blstmp', 'vggblstmp', 'vggblstm'], + choices=['blstm', 'blstmp', 'vggblstmp', 'vggblstm'], help='Type of encoder network architecture') parser.add_argument('--elayers', default=4, type=int, help='Number of encoder layers') diff --git a/src/nets/e2e_asr_attctc.py b/src/nets/e2e_asr_attctc.py index 943fdaa14d3..9a47df81bd8 100644 --- a/src/nets/e2e_asr_attctc.py +++ b/src/nets/e2e_asr_attctc.py @@ -271,7 +271,7 @@ def reset(self): self.enc_h = None self.pre_compute_enc_h = None - def __call__(self, enc_hs, dec_z, scaling=2.0): + def __call__(self, enc_hs, dec_z, att_prev, scaling=2.0): ''' :param enc_hs: @@ -631,7 +631,10 @@ class Encoder(chainer.Chain): def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() with self.init_scope(): - if etype == 'blstmp': + if etype == 'blstm': + self.enc1 = BLSTM(idim, elayers, eunits, eprojs, dropout) + logging.info('BLSTM without projection for encoder') + elif etype == 'blstmp': self.enc1 = BLSTMP(idim, elayers, eunits, eprojs, subsample, dropout) logging.info('BLSTM with every-layer projection for encoder') elif etype == 'vggblstmp': @@ -656,7 +659,9 @@ def __call__(self, xs, ilens): :param ilens: :return: ''' - if self.etype == 'blstmp': + if self.etype == 'blstm': + xs, ilens = self.enc1(xs, ilens) + elif self.etype == 'blstmp': xs, ilens = self.enc1(xs, ilens) elif self.etype == 'vggblstmp': xs, ilens = self.enc1(xs, ilens) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index 33ff3f0079b..2fd44057e66 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -335,15 +335,21 @@ def forward(self, hpad, ilens, ys): return self.loss +def mask_by_length(xs, length, fill=0): + assert xs.size(0) == len(length) + ret = Variable(xs.data.new(*xs.size()).fill_(fill)) + for i, l in enumerate(length): + ret[i, :l] = xs[i, :l] + return ret + + # ------------- Attention Network -------------------------------------------------------------------------------------- # dot product based attention class AttDot(torch.nn.Module): def __init__(self, eprojs, dunits, att_dim): - raise NotImplementedError super(AttDot, self).__init__() - with self.init_scope(): - self.mlp_enc = L.Linear(eprojs, att_dim) - self.mlp_dec = L.Linear(dunits, att_dim) + self.mlp_enc = torch.nn.Linear(eprojs, att_dim) + self.mlp_dec = torch.nn.Linear(dunits, att_dim) self.dunits = dunits self.eprojs = eprojs @@ -361,7 +367,7 @@ def reset(self): self.enc_h = None self.pre_compute_enc_h = None - def forward(self, enc_hs, dec_z, scaling=2.0): + def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): ''' :param enc_hs: @@ -369,27 +375,28 @@ def forward(self, enc_hs, dec_z, scaling=2.0): :param scaling: :return: ''' - batch = len(enc_hs) + batch = enc_hs_pad.size(0) # pre-compute all h outside the decoder loop if self.pre_compute_enc_h is None: - self.enc_h = F.pad_sequence(enc_hs) # utt x frame x hdim + self.enc_h = mask_by_length(enc_hs_pad, enc_hs_len) # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim - self.pre_compute_enc_h = F.tanh(linear_tensor(self.mlp_enc, self.enc_h)) + self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h) if dec_z is None: - dec_z = chainer.Variable(self.xp.zeros((batch, self.dunits), dtype=np.float32)) + dec_z = Variable(enc_hs_pad.data.new(batch, self.dunits).zero_()) else: - dec_z = F.reshape(dec_z, (batch, self.dunits)) + dec_z = dec_z.view(batch, self.dunits) + + e = torch.sum(self.pre_compute_enc_h * + torch.tanh(self.mlp_dec(dec_z)).view(batch, 1, self.att_dim), + dim=2) # utt x frame + w = torch.nn.functional.softmax(scaling * e, dim=1) - # for all t - e = F.sum(self.pre_compute_enc_h * F.tile(F.reshape(F.tanh(self.mlp_dec(dec_z)), (batch, 1, self.att_dim)), - (1, self.h_length, 1)), axis=2) # utt x frame - w = F.softmax(scaling * e) # weighted sum over flames # utt x hdim - c = F.sum(self.enc_h * F.tile(F.reshape(w, (batch, self.h_length, 1)), (1, 1, self.eprojs)), axis=1) - + # NOTE use bmm instead of sum(*) + c = torch.sum(self.enc_h * w.view(batch, self.h_length, 1), dim=1) return c, w @@ -489,6 +496,7 @@ def __init__(self, eprojs, odim, dlayers, dunits, sos, eos, att, verbose=0, char self.embed = torch.nn.Embedding(odim, dunits) # TODO use multiple layers with dlayers option self.decoder = torch.nn.LSTMCell(dunits + eprojs, dunits) # 310s per 100 ite -> 240s from NStepLSTM + self.ignore_id = 0 # NOTE: 0 for CTC? self.output = torch.nn.Linear(dunits, odim) self.loss = None @@ -520,8 +528,7 @@ def forward(self, hpad, hlen, ys): # pys: utt x olen pad_ys_in = pad_list(ys_in, self.eos) - ignore_id = 0 # NOTE: 0 for CTC? - pad_ys_out = pad_list(ys_out, ignore_id) + pad_ys_out = pad_list(ys_out, self.ignore_id) # get dim, length info batch = pad_ys_out.shape[0] @@ -548,17 +555,17 @@ def forward(self, hpad, hlen, ys): z_all.append(z) att_weight_all.append(att_w.data) # for debugging - z_all = torch.stack(z_all, dim=1).view(batch * olength, self.dunits) # NOTE: maybe cat? + z_all = torch.stack(z_all, dim=1).view(batch * olength, self.dunits) # compute loss y_all = self.output(z_all) # NOTE: use size_average=True? self.loss = torch.nn.functional.cross_entropy(y_all, pad_ys_out.view(-1), - ignore_index=ignore_id, size_average=True) + ignore_index=self.ignore_id, size_average=True) # NOTE: is this length-scaling required? # -1: eos, which is removed in the loss computation self.loss *= (np.mean([len(x) for x in ys_in]) - 1) # acc = F.accuracy(y_all, F.concat(pad_ys_out, axis=0), ignore_label=-1) - acc = th_accuracy(y_all, pad_ys_out, ignore_label=ignore_id) + acc = th_accuracy(y_all, pad_ys_out, ignore_label=self.ignore_id) logging.info('att loss:' + str(self.loss.data)) # show predicted character sequence for debug @@ -745,7 +752,10 @@ class Encoder(torch.nn.Module): def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() - if etype == 'blstmp': + if etype == 'blstm': + self.enc1 = BLSTM(idim, elayers, eunits, eprojs, dropout) + logging.info('BLSTM without projection for encoder') + elif etype == 'blstmp': self.enc1 = BLSTMP(idim, elayers, eunits, eprojs, subsample, dropout) logging.info('BLSTM with every-layer projection for encoder') elif etype == 'vggblstmp': @@ -774,7 +784,9 @@ def forward(self, xs, ilens): :param ilens: :return: ''' - if self.etype == 'blstmp': + if self.etype == 'blstm': + xs, ilens = self.enc1(xs, ilens) + elif self.etype == 'blstmp': xs, ilens = self.enc1(xs, ilens) elif self.etype == 'vggblstmp': xs, ilens = self.enc1(xs, ilens) @@ -832,13 +844,12 @@ def forward(self, xpad, ilens): class BLSTM(torch.nn.Module): def __init__(self, idim, elayers, cdim, hdim, dropout): - raise NotImplementedError super(BLSTM, self).__init__() - with self.init_scope(): - self.nblstm = L.NStepBiLSTM(elayers, idim, cdim, dropout) - self.l_last = L.Linear(cdim * 2, hdim) + self.nblstm = torch.nn.LSTM(idim, cdim, elayers, batch_first=True, + dropout=dropout, bidirectional=True) + self.l_last = torch.nn.Linear(cdim * 2, hdim) - def forward(self, xs, ilens): + def forward(self, xpad, ilens): ''' :param xs: @@ -846,19 +857,14 @@ def forward(self, xs, ilens): :return: ''' logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens)) - hy, cy, ys = self.nblstm(None, None, xs) - ys = self.l_last(F.vstack(ys)) # (sum _utt frame_utt) x dim - xs = F.split_axis(ys, _ilens_to_index(ilens), axis=0) + xpack = pack_padded_sequence(xpad, ilens, batch_first=True) + ys, (hy, cy) = self.nblstm(xpack) del hy, cy - - # final tanh operation - xs = F.split_axis(F.tanh(F.vstack(xs)), _ilens_to_index(ilens), axis=0) - - # 1 utterance case, it becomes an array, so need to make a utt tuple - if not isinstance(xs, tuple): - xs = [xs] - - return xs, ilens # x: utt list of frame x dim + # ys: utt list of frame x cdim x 2 (2: means bidirectional) + ypad, ilens = pad_packed_sequence(ys, batch_first=True) + projected = torch.tanh(self.l_last(ypad.contiguous().view(-1, ypad.size(2)))) # (sum _utt frame_utt) x dim + xpad = projected.view(ypad.size(0), ypad.size(1), -1) + return xpad, ilens # x: utt list of frame x dim class VGG2L(torch.nn.Module): diff --git a/test/test_torch.py b/test/test_torch.py new file mode 100644 index 00000000000..1f41e62aa63 --- /dev/null +++ b/test/test_torch.py @@ -0,0 +1,35 @@ +import pytest + +try: + import torch + from torch.autograd import Variable +except: + pytest.skip("pytorch is not installed") + +from e2e_asr_attctc_th import pad_list, mask_by_length + + +def test_pad_list(): + xs = [[1, 2, 3], + [1, 2], + [1, 2, 3, 4]] + xs = list(map(lambda x: Variable(torch.LongTensor(x)), xs)) + xpad = pad_list(xs, -1) + + es = [[1, 2, 3, -1], + [1, 2, -1, -1], + [1, 2, 3, 4]] + assert xpad.data.tolist() == es + + +def test_mask_by_length(): + xs = [[1, 2, 3, -1], + [1, 2, -1, -1], + [1, 2, 3, 4]] + xs = Variable(torch.LongTensor(xs)) + xlen = [3, 2, 4] + ys = mask_by_length(xs, xlen, fill=0) + es = [[1, 2, 3, 0], + [1, 2, 0, 0], + [1, 2, 3, 4]] + assert ys.data.tolist() == es From 62a1cb0b3862d58ab0f5d29bf2eed71b71f93aeb Mon Sep 17 00:00:00 2001 From: karita Date: Thu, 21 Dec 2017 10:12:18 +0900 Subject: [PATCH 09/17] add forgotten tanh in encoder, chainer-like ctc and test them --- src/nets/e2e_asr_attctc_th.py | 42 +++++++++++++++----- test/test_e2e_model.py | 72 ++++++++++++++++++++++++++++++++++- test/test_initialization.py | 4 +- 3 files changed, 105 insertions(+), 13 deletions(-) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index 2fd44057e66..b9412756962 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -295,14 +295,40 @@ def recognize(self, x, recog_args, char_list): # ------------- CTC Network -------------------------------------------------------------------------------------------- + +from warpctc_pytorch import CTCLoss, _CTC + +class _ChainerLikeCTC(_CTC): + def forward(self, acts, labels, act_lens, label_lens): + return super(_ChainerLikeCTC, self).forward(acts, labels, act_lens, label_lens) / acts.size(1) + + def backward(self, grad_output): + return self.grads / self.grads.size(1), None, None, None + + +def chainer_like_ctc_loss(acts, labels, act_lens, label_lens): + """ + acts: Tensor of (seqLength x batch x outputDim) containing output from network + labels: 1 dimensional Tensor containing all the targets of the batch in one sequence + act_lens: Tensor of size (batch) containing size of each output sequence from the network + act_lens: Tensor of (batch) containing label length of each example + """ + assert len(labels.size()) == 1 # labels must be 1 dimensional + from torch.nn.modules.loss import _assert_no_grad + _assert_no_grad(labels) + _assert_no_grad(act_lens) + _assert_no_grad(label_lens) + return _ChainerLikeCTC()(acts, labels, act_lens, label_lens) + + + class CTC(torch.nn.Module): def __init__(self, odim, eprojs, dropout_rate): super(CTC, self).__init__() self.dropout_rate = dropout_rate self.loss = None self.ctc_lo = torch.nn.Linear(eprojs, odim) - from warpctc_pytorch import CTCLoss - self.loss_fn = CTCLoss() + self.loss_fn = chainer_like_ctc_loss # CTCLoss() def forward(self, hpad, ilens, ys): ''' @@ -328,8 +354,7 @@ def forward(self, hpad, ilens, ys): # get ctc loss # expected shape of seqLength x batchSize x alphabet_size y_hat = y_hat.transpose(0, 1) - - self.loss = to_cuda(self, self.loss_fn(y_hat, y_true, ilens, olens)) / len(ys) + self.loss = to_cuda(self, self.loss_fn(y_hat, y_true, ilens, olens)) logging.info('ctc loss:' + str(self.loss.data[0])) return self.loss @@ -341,7 +366,7 @@ def mask_by_length(xs, length, fill=0): for i, l in enumerate(length): ret[i, :l] = xs[i, :l] return ret - + # ------------- Attention Network -------------------------------------------------------------------------------------- # dot product based attention @@ -378,7 +403,7 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): batch = enc_hs_pad.size(0) # pre-compute all h outside the decoder loop if self.pre_compute_enc_h is None: - self.enc_h = mask_by_length(enc_hs_pad, enc_hs_len) # utt x frame x hdim + self.enc_h = enc_hs_pad # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h) @@ -517,6 +542,7 @@ def forward(self, hpad, hlen, ys): :param ys: :return: ''' + hpad = mask_by_length(hpad, hlen, 0) self.loss = None # prepare input and output word sequences with sos/eos IDs eos = Variable(ys[0].data.new([self.eos])) @@ -558,10 +584,8 @@ def forward(self, hpad, hlen, ys): z_all = torch.stack(z_all, dim=1).view(batch * olength, self.dunits) # compute loss y_all = self.output(z_all) - # NOTE: use size_average=True? self.loss = torch.nn.functional.cross_entropy(y_all, pad_ys_out.view(-1), ignore_index=self.ignore_id, size_average=True) - # NOTE: is this length-scaling required? # -1: eos, which is removed in the loss computation self.loss *= (np.mean([len(x) for x in ys_in]) - 1) # acc = F.accuracy(y_all, F.concat(pad_ys_out, axis=0), ignore_label=-1) @@ -836,7 +860,7 @@ def forward(self, xpad, ilens): ypad = ypad[:, ::sub] ilens = [(i + 1) // sub for i in ilens] projected = getattr(self, 'bt' + str(layer))(ypad.contiguous().view(-1, ypad.size(2))) # (sum _utt frame_utt) x dim - xpad = projected.view(ypad.size(0), ypad.size(1), -1) + xpad = torch.tanh(projected.view(ypad.size(0), ypad.size(1), -1)) del hy, cy return xpad, ilens # x: utt list of frame x dim diff --git a/test/test_e2e_model.py b/test/test_e2e_model.py index 2ced58c04b4..de0af671e0d 100644 --- a/test/test_e2e_model.py +++ b/test/test_e2e_model.py @@ -9,6 +9,7 @@ import pytest import numpy +import chainer args = argparse.Namespace( @@ -29,7 +30,7 @@ penalty=0.5, maxlenratio=1.0, minlenratio=0.0, - verbose = True, + verbose = 2, char_list = [u"あ", u"い", u"う", u"え", u"お"], outdir = None ) @@ -57,3 +58,72 @@ def test_model_trainable_and_decodable(): in_data = data[0][1]["feat"] y = model.predictor.recognize(in_data, args, args.char_list) # decodable + + +def init_torch_weight_const(m, val): + for p in m.parameters(): + if p.dim() > 1: + p.data.fill_(val) + + +def init_chainer_weight_const(m, val): + for p in m.params(): + if p.data.ndim > 1: + p.data[:] = val + + +class Model(chainer.Chain): + def __init__(self, n_in, n_out): + super(Model, self).__init__() + with self.init_scope(): + self.a = chainer.links.Linear(n_in, n_out) + + def __call__(self, x): + return self.a(x) + +# def test_encoder_mask_equal(): +if __name__ == "__main__": + import logging + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') + try: + import torch + except: + pytest.skip("pytorch is not installed") + import e2e_asr_attctc as ch + import e2e_asr_attctc_th as th + ch_model = ch.E2E(40, 5, args) + ch_model.cleargrads() + th_model = th.E2E(40, 5, args) + + const = 1e-4 + init_torch_weight_const(th_model, const) + init_chainer_weight_const(ch_model, const) + + out_data = "1 2 3 4" + data = [ + ("aaa", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid=out_data)), + ("bbb", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid=out_data)), + ("cc", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid=out_data)) + ] + + ch_ctc, ch_att, ch_acc = ch_model(data) + th_ctc, th_att, th_acc = th_model(data) + + # test masking + ch_ench = ch_model.att.pre_compute_enc_h.data + th_ench = th_model.att.pre_compute_enc_h.data.numpy() + numpy.testing.assert_equal(ch_ench == 0.0, th_ench == 0.0) + + # test loss with constant weights (1.0) and bias (0.0) except for foget-bias (1.0) + numpy.testing.assert_allclose(ch_ctc.data, th_ctc.data.numpy()) + numpy.testing.assert_allclose(ch_att.data, th_att.data.numpy()) + + # test grads + ch_ctc.backward() + th_ctc.backward() + numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.W.grad, + th_model.ctc.ctc_lo.weight.grad.data.numpy(), 1e-7, 1e-8) + + numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.W.grad, + th_model.ctc.ctc_lo.weight.grad.data.numpy(), 1e-7, 1e-8) + diff --git a/test/test_initialization.py b/test/test_initialization.py index 4618f5fdc87..a94c2acf457 100644 --- a/test/test_initialization.py +++ b/test/test_initialization.py @@ -64,8 +64,6 @@ def test_lecun_init_torch(): numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) numpy.testing.assert_allclose(data.var(), 1.0 / numpy.prod(data.shape[1:]), 5e-2, 5e-2) - - def test_lecun_init_chainer(): import e2e_asr_attctc as m @@ -89,4 +87,4 @@ def test_lecun_init_chainer(): else: numpy.testing.assert_allclose(data.mean(), 0.0, 5e-2, 5e-2) numpy.testing.assert_allclose(data.var(), 1.0 / numpy.prod(data.shape[1:]), 5e-2, 5e-2) - + From 517c95e8da893d8b6fec10307d5af45fdbbfcfeb Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Thu, 21 Dec 2017 15:43:00 +0900 Subject: [PATCH 10/17] add more tests on optim / grad --- .gitignore | 3 ++ .travis.yml | 2 +- pytest.ini | 5 ++++ test/test_e2e_model.py | 16 +++++++---- test/test_optimizer.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 pytest.ini create mode 100644 test/test_optimizer.py diff --git a/.gitignore b/.gitignore index 2fc17e926f1..333251ac0eb 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ egs/*/*/exp egs/*/*/fbank egs/*/*/stft *DS_Store + + +src/utils/kaldi_io_py.py \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 8c693adb209..5ff2c1e236b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ matrix: install: - pip install -U pip wheel - - pip install pytest hacking mock + - pip install pytest pytest-pythonpath hacking mock - pip install autopep8 # unable to install pytorch as https://github.com/pytorch/pytorch/issues/4178 # - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp27-cp27mu-linux_x86_64.whl; fi diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000000..50e4f411920 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,5 @@ +[pytest] +addopts = --verbose +testpaths = test +python_paths = src/nets src/utils src/bin + diff --git a/test/test_e2e_model.py b/test/test_e2e_model.py index de0af671e0d..b6f51c18e84 100644 --- a/test/test_e2e_model.py +++ b/test/test_e2e_model.py @@ -12,7 +12,8 @@ import chainer -args = argparse.Namespace( +def make_arg(etype): + return argparse.Namespace( elayers = 4, subsample = "1_2_2_1_1", etype = "vggblstmp", @@ -33,11 +34,12 @@ verbose = 2, char_list = [u"あ", u"い", u"う", u"え", u"お"], outdir = None -) + ) - -def test_model_trainable_and_decodable(): +@pytest.mark.parametrize("etype", ["blstmp", "vggblstmp"]) +def test_model_trainable_and_decodable(etype): + args = make_arg(etype) for m_str in ["e2e_asr_attctc", "e2e_asr_attctc_th"]: try: import torch @@ -81,8 +83,10 @@ def __init__(self, n_in, n_out): def __call__(self, x): return self.a(x) -# def test_encoder_mask_equal(): -if __name__ == "__main__": + +@pytest.mark.parametrize("etype", ["blstmp", "vggblstmp"]) +def test_loss_and_ctc_grad(etype): + args = make_arg(etype) import logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') try: diff --git a/test/test_optimizer.py b/test/test_optimizer.py new file mode 100644 index 00000000000..5a97020125e --- /dev/null +++ b/test/test_optimizer.py @@ -0,0 +1,62 @@ +import pytest +import chainer +import numpy +try: + import torch +except ImportError: + pytest.skip("torch not installed") + + +class ChModel(chainer.Chain): + def __init__(self): + super(ChModel, self).__init__() + with self.init_scope(): + self.a = chainer.links.Linear(3, 1) + + def __call__(self, x): + return chainer.functions.sum(self.a(x)) + + +class ThModel(torch.nn.Module): + def __init__(self): + super(ThModel, self).__init__() + self.a = torch.nn.Linear(3, 1) + + def forward(self, x): + return self.a(x).sum() + + +@pytest.mark.parametrize("ch_opt_t,th_opt_t", [ + (chainer.optimizers.SGD, lambda ps: torch.optim.SGD(ps, lr=0.01)), + (chainer.optimizers.Adam, torch.optim.Adam), + (chainer.optimizers.AdaDelta, lambda ps: torch.optim.Adadelta(ps, rho=0.95)) +]) +def test_optimizer(ch_opt_t, th_opt_t): + # model construction + ch_model = ChModel() + th_model = ThModel() + + # copy params + th_model.a.weight.data = torch.from_numpy(numpy.copy(ch_model.a.W.data)) + th_model.a.bias.data = torch.from_numpy(numpy.copy(ch_model.a.b.data)) + + # optimizer setup + ch_opt = ch_opt_t() + ch_opt.setup(ch_model) + th_opt = th_opt_t(th_model.parameters()) + + # forward + ch_model.cleargrads() + data = numpy.random.randn(2, 3).astype(numpy.float32) + v = chainer.Variable(data) + ch_loss = ch_model(data) + th_loss = th_model(torch.autograd.Variable(torch.from_numpy(data))) + numpy.testing.assert_allclose(ch_loss.data, th_loss.data.numpy()) + + chainer.functions.sum(ch_loss).backward() + th_loss.backward() + ch_opt.update() + th_opt.step() + numpy.testing.assert_allclose(ch_model.a.W.data, th_model.a.weight.data.numpy()) + numpy.testing.assert_allclose(ch_model.a.b.data, th_model.a.bias.data.numpy()) + From 6b0b4a81779efe59352b3b4d3fb1fd6e390b1e4e Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Thu, 21 Dec 2017 15:55:33 +0900 Subject: [PATCH 11/17] add more tests on grad --- test/test_e2e_model.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/test/test_e2e_model.py b/test/test_e2e_model.py index b6f51c18e84..58628926ec6 100644 --- a/test/test_e2e_model.py +++ b/test/test_e2e_model.py @@ -122,12 +122,26 @@ def test_loss_and_ctc_grad(etype): numpy.testing.assert_allclose(ch_ctc.data, th_ctc.data.numpy()) numpy.testing.assert_allclose(ch_att.data, th_att.data.numpy()) - # test grads + # test ctc grads ch_ctc.backward() th_ctc.backward() numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.W.grad, th_model.ctc.ctc_lo.weight.grad.data.numpy(), 1e-7, 1e-8) + numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.b.grad, + th_model.ctc.ctc_lo.bias.grad.data.numpy(), 1e-5, 1e-6) + - numpy.testing.assert_allclose(ch_model.ctc.ctc_lo.W.grad, - th_model.ctc.ctc_lo.weight.grad.data.numpy(), 1e-7, 1e-8) + # test cross-entropy grads + ch_model.cleargrads() + th_model.zero_grad() + ch_ctc, ch_att, ch_acc = ch_model(data) + th_ctc, th_att, th_acc = th_model(data) + ch_att.backward() + th_att.backward() + numpy.testing.assert_allclose(ch_model.dec.output.W.grad, + th_model.dec.output.weight.grad.data.numpy(), 1e-7, 1e-8) + numpy.testing.assert_allclose(ch_model.dec.output.b.grad, + th_model.dec.output.bias.grad.data.numpy(), 1e-5, 1e-6) + + From 68f2c88283e7639fd4033f8e4a1f421190ba8248 Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Thu, 21 Dec 2017 17:40:37 +0900 Subject: [PATCH 12/17] workaround for zero-length --- src/nets/e2e_asr_attctc_th.py | 8 +++--- test/test_e2e_model.py | 46 +++++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index b9412756962..bbaae9ed9ba 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -227,11 +227,13 @@ def forward(self, data): :return: ''' # utt list of frame x dim - xs = [i[1]['feat'] for i in data] - sorted_index = sorted(range(len(xs)), key=lambda i: -len(xs[i])) + xs = [d[1]['feat'] for d in data] + tids = [d[1]['tokenid'].split() for d in data] + filtered_index = filter(lambda i: len(tids[i]) > 0, range(len(xs))) + sorted_index = sorted(filtered_index, key=lambda i: -len(xs[i])) xs = [xs[i] for i in sorted_index] # utt list of olen - ys = [np.fromiter(map(int, data[i][1]['tokenid'].split()), dtype=np.int64) for i in sorted_index] + ys = [np.fromiter(map(int, tids[i]), dtype=np.int64) for i in sorted_index] ys = [to_cuda(self, Variable(torch.from_numpy(y))) for y in ys] # subsample frame diff --git a/test/test_e2e_model.py b/test/test_e2e_model.py index 58628926ec6..14271eac335 100644 --- a/test/test_e2e_model.py +++ b/test/test_e2e_model.py @@ -74,16 +74,6 @@ def init_chainer_weight_const(m, val): p.data[:] = val -class Model(chainer.Chain): - def __init__(self, n_in, n_out): - super(Model, self).__init__() - with self.init_scope(): - self.a = chainer.links.Linear(n_in, n_out) - - def __call__(self, x): - return self.a(x) - - @pytest.mark.parametrize("etype", ["blstmp", "vggblstmp"]) def test_loss_and_ctc_grad(etype): args = make_arg(etype) @@ -145,3 +135,39 @@ def test_loss_and_ctc_grad(etype): th_model.dec.output.bias.grad.data.numpy(), 1e-5, 1e-6) + + +@pytest.mark.parametrize("etype", ["blstmp", "vggblstmp"]) +def test_zero_length_target(etype): + args = make_arg(etype) + import logging + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s') + try: + import torch + except: + pytest.skip("pytorch is not installed") + import e2e_asr_attctc as ch + import e2e_asr_attctc_th as th + ch_model = ch.E2E(40, 5, args) + ch_model.cleargrads() + th_model = th.E2E(40, 5, args) + + out_data = "" + data = [ + ("aaa", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid="1")), + ("bbb", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid="")), + ("cc", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid="1 2")) + ] + + ch_ctc, ch_att, ch_acc = ch_model(data) + th_ctc, th_att, th_acc = th_model(data) + + # NOTE: We ignore all zero length case because chainer also fails. Have a nice data-prep! + # out_data = "" + # data = [ + # ("aaa", dict(feat=numpy.random.randn(200, 40).astype(numpy.float32), tokenid="")), + # ("bbb", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid="")), + # ("cc", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), tokenid="")) + # ] + # ch_ctc, ch_att, ch_acc = ch_model(data) + # th_ctc, th_att, th_acc = th_model(data) From d2fdf0a5ffa6ca1e7ed1cbba9f66435fa1530ee8 Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Thu, 21 Dec 2017 19:08:06 +0900 Subject: [PATCH 13/17] fix pytest to import torch --- test/test_optimizer.py | 6 ++---- test/test_torch.py | 9 ++------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/test/test_optimizer.py b/test/test_optimizer.py index 5a97020125e..f79df743961 100644 --- a/test/test_optimizer.py +++ b/test/test_optimizer.py @@ -1,10 +1,8 @@ import pytest import chainer import numpy -try: - import torch -except ImportError: - pytest.skip("torch not installed") +pytest.importorskip('torch') +import torch class ChModel(chainer.Chain): diff --git a/test/test_torch.py b/test/test_torch.py index 1f41e62aa63..60c51e70655 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -1,11 +1,6 @@ import pytest - -try: - import torch - from torch.autograd import Variable -except: - pytest.skip("pytorch is not installed") - +pytest.importorskip('torch') +import torch from e2e_asr_attctc_th import pad_list, mask_by_length From 11c14d4fd84df9fc0d786f39234b9bd6c154b417 Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Thu, 21 Dec 2017 19:13:12 +0900 Subject: [PATCH 14/17] fix .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5ff2c1e236b..164f6f13266 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,7 @@ install: # - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl; fi # - cd tools && make warp-ctc && cd - - grep -v cupy tools/requirements.txt | pip install -r /dev/stdin - - cd tools && git clone https://github.com/vesis84/kaldi-io-for-python.git && cd - + - cd tools && make kaldi-io-for-python.git && cd - script: From 696a3cce8ae745e0a32b5418299ce42b43d5c4c3 Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Fri, 22 Dec 2017 11:18:16 +0900 Subject: [PATCH 15/17] reflect reviews from kan-bayashi --- src/nets/e2e_asr_attctc_th.py | 13 +++++++------ test/test_torch.py | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index bbaae9ed9ba..36ea2142271 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -408,7 +408,7 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): self.enc_h = enc_hs_pad # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim - self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h) + self.pre_compute_enc_h = torch.tanh(linear_tensor(self.mlp_enc, self.enc_h)) if dec_z is None: dec_z = Variable(enc_hs_pad.data.new(batch, self.dunits).zero_()) @@ -469,7 +469,7 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): self.enc_h = enc_hs_pad # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim - self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h) + self.pre_compute_enc_h = torch.tanh(linear_tensor(self.mlp_enc, self.enc_h)) if dec_z is None: dec_z = Variable(enc_hs_pad.data.new(batch, self.dunits).zero_()) @@ -509,10 +509,11 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): def th_accuracy(y_all, pad_target, ignore_label): - acc = 0 pad_pred = y_all.data.view(pad_target.size(0), pad_target.size(1), y_all.size(1)).max(2)[1] mask = pad_target.data != ignore_label - return torch.sum(pad_pred.masked_select(mask) == pad_target.data.masked_select(mask)) / torch.sum(mask) + numerator = torch.sum(pad_pred.masked_select(mask) == pad_target.data.masked_select(mask)) + denominator = torch.sum(mask) + return float(numerator) / float(denominator) # ------------- Decoder Network ---------------------------------------------------------------------------------------- @@ -601,8 +602,8 @@ def forward(self, hpad, hlen, ys): for (i, y_hat_), y_true_ in zip(enumerate(y_hat.data.cpu().numpy()), y_true.data.cpu().numpy()): if i == MAX_DECODER_OUTPUT: break - idx_hat = np.argmax(y_hat_[y_true_ != -1], axis=1) - idx_true = y_true_[y_true_ != -1] + idx_hat = np.argmax(y_hat_[y_true_ != self.ignore_id], axis=1) + idx_true = y_true_[y_true_ != self.ignore_id] seq_hat = [self.char_list[int(idx)] for idx in idx_hat] seq_true = [self.char_list[int(idx)] for idx in idx_true] seq_hat = "".join(seq_hat) diff --git a/test/test_torch.py b/test/test_torch.py index 60c51e70655..98a2f37e7e1 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -1,6 +1,7 @@ import pytest pytest.importorskip('torch') import torch +from torch.autograd import Variable from e2e_asr_attctc_th import pad_list, mask_by_length From b4129da00f288c571e5a937e334a0a6549f4c08c Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Fri, 22 Dec 2017 13:40:22 +0900 Subject: [PATCH 16/17] revert attloc tanh --- src/nets/e2e_asr_attctc_th.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nets/e2e_asr_attctc_th.py b/src/nets/e2e_asr_attctc_th.py index 36ea2142271..c00136663db 100644 --- a/src/nets/e2e_asr_attctc_th.py +++ b/src/nets/e2e_asr_attctc_th.py @@ -469,7 +469,7 @@ def forward(self, enc_hs_pad, enc_hs_len, dec_z, att_prev, scaling=2.0): self.enc_h = enc_hs_pad # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim - self.pre_compute_enc_h = torch.tanh(linear_tensor(self.mlp_enc, self.enc_h)) + self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h) if dec_z is None: dec_z = Variable(enc_hs_pad.data.new(batch, self.dunits).zero_()) From e3a19ab57ce5bfeb90da413f36965f6fc540f6e1 Mon Sep 17 00:00:00 2001 From: ShigekiKarita Date: Fri, 22 Dec 2017 15:03:41 +0900 Subject: [PATCH 17/17] update Makefile --- tools/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/Makefile b/tools/Makefile index c518e234737..ee81ac49cf6 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,7 +1,7 @@ .PHONY: all clean -all: kaldi venv venv/bin/activate nkf kaldi-io-for-python.git +all: kaldi venv venv/bin/activate nkf kaldi-io-for-python.git pytorch warp-ctc kaldi-io-for-python.git: git clone https://github.com/vesis84/kaldi-io-for-python.git