diff --git a/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh b/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh index c541a2f660b..3576aa54953 100755 --- a/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh +++ b/egs/material/s5/local/rnnlm/run_tdnn_lstm.sh @@ -15,7 +15,6 @@ # Begin configuration section. -dir=exp/rnnlm_lstm_1a embedding_dim=512 lstm_rpd=128 lstm_nrpd=128 @@ -24,20 +23,25 @@ train_stage=-10 # variables for lattice rescoring run_rescore=true -ac_model_dir=exp/chain/tdnn_lstm1a_sp_ld5 +ac_model_dir=exp/$language/chain/tdnn_lstm1a_sp_ld5 decode_dir_suffix=rnnlm_1a ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order # if it's set, it merges histories in the lattice if they share # the same ngram history and this prevents the lattice from # exploding exponentially pruned_rescore=true +language=swahili . ./cmd.sh . ./utils/parse_options.sh -text=data/train/text -lexicon=data/local/dict_nosp/lexiconp.txt -text_dir=data/rnnlm/text_nosp_1e +dir=exp/$language/rnnlm_lstm_1a + +text=data/$language/train/text +lexicon=data/$language/local/dict_nosp/lexiconp.txt +text_dir=data/$language/rnnlm/text_nosp_1e +dir=exp/$language/rnnlm_lstm_1a + mkdir -p $dir/config set -e diff --git a/egs/material/s5_tagalog/local/rnnlm/run_tdnn_lstm.sh b/egs/material/s5_tagalog/local/rnnlm/run_tdnn_lstm.sh deleted file mode 100755 index 877569c3501..00000000000 --- a/egs/material/s5_tagalog/local/rnnlm/run_tdnn_lstm.sh +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/bash - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) -# 2017 Hainan Xu - -# This script trains LMs on the swbd LM-training data. - -# rnnlm/train_rnnlm.sh: best iteration (out of 10) was 3, linking it to final iteration. -# rnnlm/train_rnnlm.sh: train/dev perplexity was 65.0 / 106.6. -# Train objf: -5.17 -4.66 -4.37 -4.17 -3.99 -3.82 -3.66 -3.52 -3.39 -3.27 -# Dev objf: -9.55 -5.02 -4.73 -4.67 -4.69 -4.77 -4.90 -5.02 -5.16 -5.29 - -# %WER 46.51 [ 29942 / 64382, 3413 ins, 9336 del, 17193 sub ] exp/chain/tdnn_lstm1a_sp_ld5/decode_dev/wer_9_0.5 -# %WER 44.67 [ 28762 / 64382, 3276 ins, 9266 del, 16220 sub ] exp/chain/tdnn_lstm1a_sp_ld5/decode_dev_rnnlm_1a/wer_9_0.5 - -# Begin configuration section. - -dir=exp/rnnlm_lstm_1a -embedding_dim=256 -lstm_rpd=64 -lstm_nrpd=64 -stage=-10 -train_stage=-10 - -# variables for lattice rescoring -run_rescore=true -ac_model_dir=exp/chain/tdnn_lstm1a_sp_ld5 -decode_dir_suffix=rnnlm_1a -ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order - # if it's set, it merges histories in the lattice if they share - # the same ngram history and this prevents the lattice from - # exploding exponentially -pruned_rescore=true - -. ./cmd.sh -. ./utils/parse_options.sh - -text=data/train/text -lexicon=data/local/dict_nosp/lexiconp.txt -text_dir=data/rnnlm/text_nosp_1e -mkdir -p $dir/config -set -e - -for f in $text $lexicon; do - [ ! -f $f ] && \ - echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1 -done - -if [ $stage -le 0 ]; then - mkdir -p $text_dir - echo -n >$text_dir/dev.txt - # hold out one in every 50 lines as dev data. - cat $text | cut -d ' ' -f2- | awk -v text_dir=$text_dir '{if(NR%50 == 0) { print >text_dir"/dev.txt"; } else {print;}}' >$text_dir/train.txt -fi - -if [ $stage -le 1 ]; then - cp data/lang/words.txt $dir/config/ - n=`cat $dir/config/words.txt | wc -l` - echo " $n" >> $dir/config/words.txt - - # words that are not present in words.txt but are in the training or dev data, will be - # mapped to during training. - echo "" >$dir/config/oov.txt - - cat > $dir/config/data_weights.txt <$dir/config/unigram_probs.txt - - # choose features - rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ - --use-constant-feature=true \ - --special-words=',,,,,,' \ - $dir/config/words.txt > $dir/config/features.txt - - cat >$dir/config/xconfig <