espnet · sw005320 · Dec 22, 2017 · Dec 18, 2017 · Dec 18, 2017 · Dec 19, 2017
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,6 @@ egs/*/*/exp
 egs/*/*/fbank
 egs/*/*/stft
 *DS_Store
+
+
+src/utils/kaldi_io_py.py
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,42 @@
+language: python
+
+cache:
+  - pip
+  - ccache
+
+matrix:
+  include:
+    - os: linux
+      python: "2.7"
+    - os: linux
+      python: "3.6"
+
+
+install:
+  - pip install -U pip wheel
+  - pip install pytest pytest-pythonpath hacking mock
+  - pip install autopep8
+  # unable to install pytorch as https://github.com/pytorch/pytorch/issues/4178
+  # - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp27-cp27mu-linux_x86_64.whl; fi
+  # - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pip install install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl; fi
+  # - cd tools && make warp-ctc && cd -
+  - grep -v cupy tools/requirements.txt | pip install -r /dev/stdin
+  - cd tools && make kaldi-io-for-python.git && cd -
+
+
+script:
+  # TODO test coding style?
+  # - flake8
+  # - autopep8 -r . --global-config .pep8 --diff | tee check_autopep8
+  # - test ! -s check_autopep8
+  - export PYTHONPATH=`pwd`/src/nets:`pwd`/src/utils
+  - pytest test
+
+sudo: false
+
+addons:
+  apt:
+    packages:
+      - cmake
+      - python-dev
+      - python3-dev
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+addopts = --verbose
+testpaths = test
+python_paths = src/nets src/utils src/bin
+
diff --git a/src/bin/asr_train.py b/src/bin/asr_train.py
@@ -281,7 +281,7 @@ def main():
     # network archtecture
     # encoder
     parser.add_argument('--etype', default='blstmp', type=str,
-                        choices=['blstmp', 'vggblstmp', 'vggblstm'],
+                        choices=['blstm', 'blstmp', 'vggblstmp', 'vggblstm'],
                         help='Type of encoder network architecture')
     parser.add_argument('--elayers', default=4, type=int,
                         help='Number of encoder layers')

diff --git a/src/bin/asr_train_th.py b/src/bin/asr_train_th.py
@@ -3,23 +3,6 @@
 # Copyright 2017 Johns Hopkins University (Shinji Watanabe)
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
 
-"""
-options (batch_size is only changed because of my poor GPU at home): --gpu -1 --outdir exp/train_si284_vggblstmp_e4_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150/results --debugmode 1 --dict data/lang_1char/train_si284_units.txt --debugdir exp/train_si284_vggblstmp_e4_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150 --minibatches 0 --verbose 0 --train-feat scp:dump/train_si284/deltafalse/feats.scp --valid-feat scp:dump/test_dev93/deltafalse/feats.scp --train-label dump/train_si284/deltafalse/data.json --valid-label dump/test_dev93/deltafalse/data.json --etype blstmp --elayers 4 --eunits 320 --eprojs 320 --subsample 1_2_2_1_1 --dlayers 1 --dunits 300 --atype location --aconv-chans 10 --aconv-filts 100 --mtlalpha 0.5 --batch-size 5 --maxlen-in 800 --maxlen-out 150 --opt adadelta --epochs 15 --gpu 0 
-
-
-chainer result
-this epoch [#.................................................]  3.13%
-       400 iter, 0 epoch / 15 epochs
-   0.67657 iters/sec. Estimated time to finish: 3 days, 6:31:44.616061.
-
-
-pytorch result
-this epoch [#.................................................]  2.35%
-       300 iter, 0 epoch / 15 epochs
-    1.4973 iters/sec. Estimated time to finish: 1 day, 11:30:13.571661.
-
-"""
-
 
 import os
 import copy
@@ -305,7 +288,7 @@ def main():
     # network archtecture
     # encoder
     parser.add_argument('--etype', default='blstmp', type=str,
-                        choices=['blstmp', 'vggblstmp', 'vggblstm'],
+                        choices=['blstm', 'blstmp', 'vggblstmp', 'vggblstm'],
                         help='Type of encoder network architecture')
     parser.add_argument('--elayers', default=4, type=int,
                         help='Number of encoder layers')
@@ -460,7 +443,7 @@ def main():
 
     # Setup an optimizer
     if args.opt == 'adadelta':
-        optimizer = torch.optim.Adadelta(model.parameters(), eps=args.eps)
+        optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps)
     elif args.opt == 'adam':
         optimizer = torch.optim.Adam(model.parameters())
 

diff --git a/src/nets/e2e_asr_attctc.py b/src/nets/e2e_asr_attctc.py
@@ -271,7 +271,7 @@ def reset(self):
         self.enc_h = None
         self.pre_compute_enc_h = None
 
-    def __call__(self, enc_hs, dec_z, scaling=2.0):
+    def __call__(self, enc_hs, dec_z, att_prev, scaling=2.0):
         '''
 
         :param enc_hs:
@@ -466,8 +466,8 @@ def __call__(self, hs, ys):
                 idx_true = y_true_[y_true_ != -1]
                 seq_hat = [self.char_list[int(idx)] for idx in idx_hat]
                 seq_true = [self.char_list[int(idx)] for idx in idx_true]
-                seq_hat = "".join(seq_hat).encode('utf-8').replace('<space>', ' ')
-                seq_true = "".join(seq_true).encode('utf-8').replace('<space>', ' ')
+                seq_hat = "".join(seq_hat).replace('<space>', ' ')
+                seq_true = "".join(seq_true).replace('<space>', ' ')
                 logging.info("groundtruth[%d]: " + seq_true, i)
                 logging.info("prediction [%d]: " + seq_hat, i)
 
@@ -631,7 +631,10 @@ class Encoder(chainer.Chain):
     def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1):
         super(Encoder, self).__init__()
         with self.init_scope():
-            if etype == 'blstmp':
+            if etype == 'blstm':
+                self.enc1 = BLSTM(idim, elayers, eunits, eprojs, dropout)
+                logging.info('BLSTM without projection for encoder')
+            elif etype == 'blstmp':
                 self.enc1 = BLSTMP(idim, elayers, eunits, eprojs, subsample, dropout)
                 logging.info('BLSTM with every-layer projection for encoder')
             elif etype == 'vggblstmp':
@@ -656,7 +659,9 @@ def __call__(self, xs, ilens):
         :param ilens:
         :return:
         '''
-        if self.etype == 'blstmp':
+        if self.etype == 'blstm':
+            xs, ilens = self.enc1(xs, ilens)
+        elif self.etype == 'blstmp':
             xs, ilens = self.enc1(xs, ilens)
         elif self.etype == 'vggblstmp':
             xs, ilens = self.enc1(xs, ilens)