Merge pull request #469 from Xiaofei-Wang/xiaofei_merge

AMI recipe improvement 2
espnet · Nov 20, 2018 · 5ad45a3 · 5ad45a3
2 parents 12477de + d9d2668
commit 5ad45a3
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 13 deletions.
diff --git a/egs/ami/asr1/RESULTS b/egs/ami/asr1/RESULTS
@@ -1,12 +1,12 @@
-# initial results (WER
-$ grep -e Avg -e SPKR -m 2 exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_*bs64*/result.wrd.txt
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.wrd.txt:|           SPKR                                    |            # Snt                      # Wrd            |           Corr                          Sub                         Del                         Ins                          Err                       S.Err            |
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.wrd.txt:|           Sum/Avg                                 |           13059                       94914            |           69.7                         23.3                         7.0                         5.3                         35.7                        69.3            |
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.wrd.txt:|           SPKR                                    |            # Snt                      # Wrd            |            Corr                         Sub                          Del                         Ins                          Err                       S.Err            |
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.wrd.txt:|           Sum/Avg                                 |           12612                       89635            |            66.9                        26.7                          6.4                         5.5                         38.5                        66.4            |
+# initial results (WER)
+$ grep -e Avg -e SPKR -m 2 exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_*bs64_word20000/result.wrd.txt
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.wrd.txt:|           SPKR                                    |            # Snt                      # Wrd            |           Corr                          Sub                         Del                         Ins                          Err                       S.Err            |
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.wrd.txt:|           Sum/Avg                                 |           13059                       94914            |           69.7                         23.3                         7.0                         5.3                         35.7                        69.3            |
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.wrd.txt:|           SPKR                                    |            # Snt                      # Wrd            |            Corr                         Sub                          Del                         Ins                          Err                       S.Err            |
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.wrd.txt:|           Sum/Avg                                 |           12612                       89635            |            66.9                        26.7                          6.4                         5.5                         38.5                        66.4            |
 # initial results (CER)
-$ grep -e Avg -e SPKR -m 2 exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_*bs64*/result.txt
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.txt:|           SPKR                                    |           # Snt                      # Wrd            |           Corr                         Sub                        Del                         Ins                         Err                       S.Err            |
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.txt:|           Sum/Avg                                 |          13059                       452218           |           82.8                         8.4                        8.9                         5.4                        22.6                        69.3            |
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.txt:|           SPKR                                    |           # Snt                      # Wrd            |           Corr                         Sub                         Del                         Ins                         Err                       S.Err            |
-exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word65000/result.txt:|           Sum/Avg                                 |          12612                       431997           |           81.6                         9.9                         8.5                         5.6                        24.0                        66.4            |
+$ grep -e Avg -e SPKR -m 2 exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_*bs64_word20000/result.txt
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.txt:|           SPKR                                    |           # Snt                      # Wrd            |           Corr                         Sub                        Del                         Ins                         Err                       S.Err            |
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_dev_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.txt:|           Sum/Avg                                 |          13059                       452218           |           82.8                         8.4                        8.9                         5.4                        22.6                        69.3            |
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.txt:|           SPKR                                    |           # Snt                      # Wrd            |           Corr                         Sub                         Del                         Ins                         Err                       S.Err            |
+exp/ihm_train_pytorch_blstmp_e8_subsample1_2_2_1_1_unit320_proj320_d1_unit300_location_aconvc10_aconvf100_mtlalpha0.5_adadelta_bs30_mli800_mlo150_lsmunigram0.05/decode_ihm_eval_beam20_emodel.acc.best_p0.2_len0.0-0.0_ctcw0.3_rnnlm0.5_1layer_unit1000_sgd_bs64_word20000/result.txt:|           Sum/Avg                                 |          12612                       431997           |           81.6                         9.9                         8.5                         5.6                        24.0                        66.4            |
diff --git a/egs/ami/asr1/conf/ami_beamformit.cfg b/egs/ami/asr1/conf/ami_beamformit.cfg
@@ -0,0 +1,50 @@
+#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+####directories where to store/retrieve info####
+#channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+#show_id = Ttmp
+
diff --git a/egs/ami/asr1/local/beamformit.sh b/egs/ami/asr1/local/beamformit.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+
+. ./path.sh
+
+nj=$1
+job=$2
+numch=$3
+meetings=$4
+sdir=$5
+odir=$6
+wdir=data/local/beamforming
+
+set -e
+set -u
+
+utils/split_scp.pl -j $nj $((job-1)) $meetings $meetings.$job
+
+while read line; do
+
+  mkdir -p $odir/$line
+  BeamformIt -s $line -c $wdir/channels_$numch \
+                        --config_file `pwd`/conf/ami_beamformit.cfg \
+                        --source_dir $sdir \
+                        --result_dir $odir/$line
+  mkdir -p $odir/$line
+  mv $odir/$line/${line}.del  $odir/$line/${line}_MDM$numch.del
+  mv $odir/$line/${line}.del2 $odir/$line/${line}_MDM$numch.del2
+  mv $odir/$line/${line}.info $odir/$line/${line}_MDM$numch.info
+  mv $odir/$line/${line}.weat $odir/$line/${line}_MDM$numch.weat
+  mv $odir/$line/${line}.wav  $odir/$line/${line}_MDM$numch.wav
+  #mv $odir/$line/${line}.ovl  $odir/$line/${line}_MDM$numch.ovl # Was not created!
+
+done < $meetings.$job
+
diff --git a/egs/ami/asr1/run.sh b/egs/ami/asr1/run.sh
@@ -56,7 +56,7 @@ epochs=15
 
 # rnnlm related
 use_wordlm=true     # false means to train/use a character LM
-lm_vocabsize=65000  # effective only for word LMs
+lm_vocabsize=20000  # effective only for word LMs
 lm_layers=1         # 2 for character LMs
 lm_units=1000       # 650 for character LMs
 lm_opt=sgd          # adam for character LMs
@@ -295,7 +295,7 @@ else
 fi
 mkdir -p ${expdir}
 
-if [ ${stage} -le -4 ]; then
+if [ ${stage} -le 4 ]; then
     echo "stage 4: Network Training"
 
     ${cuda_cmd} --gpu ${ngpu} ${expdir}/train.log \