facebookresearch · kevin1kevin1k · Oct 4, 2019
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
@@ -11,7 +11,7 @@ This model uses a `Byte Pair Encoding (BPE)
 vocabulary <https://arxiv.org/abs/1508.07909>`__, so we'll have to apply
 the encoding to the source text before it can be translated. This can be
 done with the
-`apply\_bpe.py <https://github.com/rsennrich/subword-nmt/blob/master/apply_bpe.py>`__
+`apply\_bpe.py <https://github.com/rsennrich/subword-nmt/blob/master/subword_nmt/apply_bpe.py>`__
 script using the ``wmt14.en-fr.fconv-cuda/bpecodes`` file. ``@@`` is
 used as a continuation marker and the original text can be easily
 recovered with e.g. ``sed s/@@ //g`` or by passing the ``--remove-bpe``

diff --git a/examples/translation/prepare-iwslt14.sh b/examples/translation/prepare-iwslt14.sh
@@ -12,7 +12,7 @@ SCRIPTS=mosesdecoder/scripts
 TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
 LC=$SCRIPTS/tokenizer/lowercase.perl
 CLEAN=$SCRIPTS/training/clean-corpus-n.perl
-BPEROOT=subword-nmt
+BPEROOT=subword-nmt/subword_nmt
 BPE_TOKENS=10000
 
 URL="https://wit3.fbk.eu/archive/2014-01/texts/de/en/de-en.tgz"

diff --git a/examples/translation/prepare-wmt14en2de.sh b/examples/translation/prepare-wmt14en2de.sh
@@ -12,7 +12,7 @@ TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
 CLEAN=$SCRIPTS/training/clean-corpus-n.perl
 NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
 REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
-BPEROOT=subword-nmt
+BPEROOT=subword-nmt/subword_nmt
 BPE_TOKENS=40000
 
 URLS=(

diff --git a/examples/translation/prepare-wmt14en2fr.sh b/examples/translation/prepare-wmt14en2fr.sh
@@ -12,7 +12,7 @@ TOKENIZER=$SCRIPTS/tokenizer/tokenizer.perl
 CLEAN=$SCRIPTS/training/clean-corpus-n.perl
 NORM_PUNC=$SCRIPTS/tokenizer/normalize-punctuation.perl
 REM_NON_PRINT_CHAR=$SCRIPTS/tokenizer/remove-non-printing-char.perl
-BPEROOT=subword-nmt
+BPEROOT=subword-nmt/subword_nmt
 BPE_TOKENS=40000
 
 URLS=(

diff --git a/examples/translation_moe/README.md b/examples/translation_moe/README.md
@@ -52,7 +52,6 @@ wget dl.fbaipublicfiles.com/fairseq/data/wmt14-en-de.extra_refs.tok
 
 Next apply BPE on the fly and run generation for each expert:
 ```bash
-BPEROOT=examples/translation/subword-nmt/
 BPE_CODE=examples/translation/wmt17_en_de/code
 for EXPERT in $(seq 0 2); do \
     cat wmt14-en-de.extra_refs.tok \

diff --git a/fairseq/data/encoders/subword_nmt_bpe.py b/fairseq/data/encoders/subword_nmt_bpe.py
@@ -24,7 +24,7 @@ def __init__(self, args):
             raise ValueError('--bpe-codes is required for --bpe=subword_nmt')
         codes = file_utils.cached_path(args.bpe_codes)
         try:
-            from subword_nmt import apply_bpe
+            from subword_nmt.subword_nmt import apply_bpe
             bpe_parser = apply_bpe.create_parser()
             bpe_args = bpe_parser.parse_args([
                 '--codes', codes,