Skip to content

Commit

Permalink
Add SLUE-VoxPopuli results for WavLM with mBART-50
Browse files Browse the repository at this point in the history
  • Loading branch information
akreal committed Nov 29, 2022
1 parent ca2193d commit e75d8dc
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 24 deletions.
9 changes: 9 additions & 0 deletions egs2/slue-voxpopuli/asr1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@
|dataset|Snt|Macro F1(%)|Micro F1 (%)|Macro Label F1(%)|Micro Label F1 (%)| WER|
|---|---|---|---|---|---|---|
|decode_asr_asr_model_valid.acc.ave/devel|1742|61.0|74.5|81.6|88.0|9.3|

## Using WavLM pretrained speech Encoder and mBART-50 Large pretrained text Encoder-Decoder

- Recipe shell script: [local/run_hf.sh](local/run_hf.sh)
- Model link: [https://zenodo.org/record/7377091#.Y4YGKNLMJp8](https://zenodo.org/record/7377091#.Y4YGKNLMJp8)

|dataset|Snt|Macro F1(%)|Micro F1 (%)|Macro Label F1(%)|Micro Label F1 (%)| WER|
|---|---|---|---|---|---|---|
|decode_asr_hf_asr_model_valid.acc.ave/devel|1742|60.35|74.57|82.93|88.06|11.3|
3 changes: 3 additions & 0 deletions egs2/slue-voxpopuli/asr1/conf/decode_asr_hf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
beam_size: 5
ctc_weight: 0.0
hugging_face_decoder: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
encoder: branchformer
encoder_conf:
output_size: 1024
use_attn: true
attention_heads: 8
attention_layer_type: rel_selfattn
pos_enc_layer_type: rel_pos
rel_pos_type: latest
use_cgmlp: true
cgmlp_linear_units: 4096
cgmlp_conv_kernel: 31
use_linear_after_conv: false
gate_activation: identity
merge_method: concat
cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave"
attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave"
num_blocks: 18
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: conv2d
stochastic_depth_rate: 0.0

postencoder: hugging_face_transformers
postencoder_conf:
model_name_or_path: "akreal/mbart-large-50-finetuned-slue"
length_adaptor_n_layers: 1
lang_token_id: 250004

decoder: hugging_face_transformers
decoder_conf:
model_name_or_path: "akreal/mbart-large-50-finetuned-slue"

use_amp: true
optim: adam
batch_type: length
batch_bins: 300000
accum_grad: 4
optim_conf:
lr: 0.00005
weight_decay: 0.000001
scheduler: warmuplr # pytorch v1.1.0+ required
scheduler_conf:
warmup_steps: 40000
max_epoch: 100

freeze_param: [
"frontend.upstream"
]

frontend: s3prl
frontend_conf:
frontend_conf:
upstream: wavlm_large # Note: If the upstream is changed, please change the input_size in the preencoder.
download_dir: ./hub
multilayer_feature: True

preencoder: linear
preencoder_conf:
input_size: 1024 # Note: If the upstream is changed, please change this value accordingly.
output_size: 80

model_conf:
ctc_weight: 0.0
lsm_weight: 0.1
length_normalized_loss: false
extract_feats_in_collect_stats: false # Note: "False" means during collect stats (stage 10), generating dummy stats files rather than extract_feats by forward frontend.
# mBART dictionary customizations
ignore_id: 1
sym_blank: "<pad>"
sym_sos: "<s>"
sym_eos: "</s>"
lang_token_id: 250004

best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 10
2 changes: 1 addition & 1 deletion egs2/slue-voxpopuli/asr1/local/data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
mkdir -p data/{train,devel,test}
python3 local/data_prep_original_slue_format.py ${VOXPOPULI}
for x in test devel train; do
for f in text wav.scp utt2spk transcript; do
for f in text wav.scp utt2spk ; do
sort data/${x}/${f} -o data/${x}/${f}
done
utils/utt2spk_to_spk2utt.pl data/${x}/utt2spk > "data/${x}/spk2utt"
Expand Down
30 changes: 30 additions & 0 deletions egs2/slue-voxpopuli/asr1/local/run_hf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail

train_set="train"
valid_set="devel"
test_sets="test devel"

asr_config=conf/tuning/train_asr_branchformer_wavlm_mbart.yaml
inference_config=conf/decode_asr_hf.yaml

./asr.sh \
--lang en \
--ngpu 1 \
--use_lm false \
--token_type hugging_face \
--hugging_face_model_name_or_path facebook/mbart-large-50-many-to-many-mmt \
--local_score_opts "--score_folder score_wer" \
--max_wav_duration 30 \
--speed_perturb_factors "0.9 1.0 1.1" \
--feats_normalize utterance_mvn \
--asr_config "${asr_config}" \
--inference_config "${inference_config}" \
--train_set "${train_set}" \
--valid_set "${valid_set}" \
--lm_train_text "data/${train_set}/text" \
--test_sets "${test_sets}" "$@"
24 changes: 15 additions & 9 deletions egs2/slue-voxpopuli/asr1/local/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,26 +169,32 @@ def get_classification_result(hyp_file, ref_file, hyp_asr_file, ref_asr_file):
default="decode_asr_asr_model_valid.acc.ave/test/",
help="Directory inside exp_root containing inference on test set",
)
parser.add_argument(
"--score_folder",
default="score_ter",
help="Directory inside inference folder containing hypothesis and reference files",
)
args = parser.parse_args()

exp_root = args.exp_root
valid_inference_folder = args.valid_folder
test_inference_folder = args.test_folder
score_folder = args.score_folder

# Read original tokenized text
valid_hyp_file = open(
os.path.join(exp_root, valid_inference_folder + "score_ter/hyp.trn")
os.path.join(exp_root, valid_inference_folder, score_folder, "hyp.trn")
)
valid_ref_file = open(
os.path.join(exp_root, valid_inference_folder + "score_ter/ref.trn")
os.path.join(exp_root, valid_inference_folder, score_folder, "ref.trn")
)

# Write detokenized text
valid_hyp_asr_file = open(
os.path.join(exp_root, valid_inference_folder + "score_ter/hyp_asr.trn"), "w"
os.path.join(exp_root, valid_inference_folder, score_folder, "hyp_asr.trn"), "w"
)
valid_ref_asr_file = open(
os.path.join(exp_root, valid_inference_folder + "score_ter/ref_asr.trn"), "w"
os.path.join(exp_root, valid_inference_folder, score_folder, "ref_asr.trn"), "w"
)

result, label_result = get_classification_result(
Expand All @@ -202,21 +208,21 @@ def get_classification_result(hyp_file, ref_file, hyp_asr_file, ref_asr_file):
print()


if os.path.isdir(test_inference_folder):
if os.path.isdir(os.path.join(exp_root, test_inference_folder)):
# Read files
test_hyp_file = open(
os.path.join(exp_root, test_inference_folder + "score_ter/hyp.trn")
os.path.join(exp_root, test_inference_folder, score_folder, "hyp.trn")
)
test_ref_file = open(
os.path.join(exp_root, test_inference_folder + "score_ter/ref.trn")
os.path.join(exp_root, test_inference_folder, score_folder, "ref.trn")
)

# Write files
test_hyp_asr_file = open(
os.path.join(exp_root, test_inference_folder + "score_ter/hyp_asr.trn"), "w"
os.path.join(exp_root, test_inference_folder, score_folder, "hyp_asr.trn"), "w"
)
test_ref_asr_file = open(
os.path.join(exp_root, test_inference_folder + "score_ter/ref_asr.trn"), "w"
os.path.join(exp_root, test_inference_folder, score_folder, "ref_asr.trn"), "w"
)

result, label_result = get_classification_result(
Expand Down
32 changes: 18 additions & 14 deletions egs2/slue-voxpopuli/asr1/local/score.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# stage=0
# data=data/eval2000
# #end configuration section.
score_folder=score_ter

[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
Expand All @@ -22,27 +23,30 @@ asr_expdir=$1
if [ $# -gt 1 ]; then
valid_inference_folder=$2
test_inference_folder=$3
python local/score.py --exp_root ${asr_expdir} --valid_folder ${valid_inference_folder} --test_folder ${test_inference_folder}
else
valid_inference_folder=decode_asr_asr_model_valid.acc.ave/devel
test_inference_folder=decode_asr_asr_model_valid.acc.ave/test
python local/score.py --exp_root ${asr_expdir}
valid_inference_folder=$(ls -t ${asr_expdir}/*/devel*/score_wer/hyp.trn | head -n 1 | sed 's!//!/!g' | cut -d/ -f3,4)/
test_inference_folder=$(ls -t ${asr_expdir}/*/test*/score_wer/hyp.trn | head -n 1 | sed 's!//!/!g' | cut -d/ -f3,4)/
fi

python local/score.py --exp_root ${asr_expdir} \
--valid_folder ${valid_inference_folder} \
--test_folder ${test_inference_folder} \
--score_folder ${score_folder}

sclite \
-r "${asr_expdir}/${valid_inference_folder}/score_ter/ref_asr.trn" trn \
-h "${asr_expdir}/${valid_inference_folder}/score_ter/hyp_asr.trn" trn \
-i rm -o all stdout > "${asr_expdir}/${valid_inference_folder}/score_ter/result_asr.txt"
echo "Write ASR result in ${asr_expdir}/${valid_inference_folder}/score_ter/result_asr.txt"
grep -e Avg -e SPKR -m 2 "${asr_expdir}/${valid_inference_folder}/score_ter/result_asr.txt"
-r "${asr_expdir}/${valid_inference_folder}/${score_folder}/ref_asr.trn" trn \
-h "${asr_expdir}/${valid_inference_folder}/${score_folder}/hyp_asr.trn" trn \
-i rm -o all stdout > "${asr_expdir}/${valid_inference_folder}/${score_folder}/result_asr.txt"
echo "Write ASR result in ${asr_expdir}/${valid_inference_folder}/${score_folder}/result_asr.txt"
grep -e Avg -e SPKR -m 2 "${asr_expdir}/${valid_inference_folder}/${score_folder}/result_asr.txt"

if [ -d "${test_inference_folder}" ]; then
sclite \
-r "${asr_expdir}/${test_inference_folder}/score_ter/ref_asr.trn" trn \
-h "${asr_expdir}/${test_inference_folder}/score_ter/hyp_asr.trn" trn \
-i rm -o all stdout > "${asr_expdir}/${test_inference_folder}/score_ter/result_asr.txt"
echo "Write ASR result in ${asr_expdir}/${test_inference_folder}/score_ter/result_asr.txt"
grep -e Avg -e SPKR -m 2 "${asr_expdir}/${test_inference_folder}/score_ter/result_asr.txt"
-r "${asr_expdir}/${test_inference_folder}/${score_folder}/ref_asr.trn" trn \
-h "${asr_expdir}/${test_inference_folder}/${score_folder}/hyp_asr.trn" trn \
-i rm -o all stdout > "${asr_expdir}/${test_inference_folder}/${score_folder}/result_asr.txt"
echo "Write ASR result in ${asr_expdir}/${test_inference_folder}/${score_folder}/result_asr.txt"
grep -e Avg -e SPKR -m 2 "${asr_expdir}/${test_inference_folder}/${score_folder}/result_asr.txt"
else
echo "[Warning] Skip ASR result on test set as it does not exist."
fi
Expand Down

0 comments on commit e75d8dc

Please sign in to comment.