Skip to content

Commit

Permalink
Merge branch 'espnet:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
realzza committed Nov 22, 2022
2 parents 562f7dd + ca2193d commit fbfe277
Show file tree
Hide file tree
Showing 25 changed files with 872 additions and 27 deletions.
18 changes: 18 additions & 0 deletions egs2/TEMPLATE/asr1/asr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ sos_eos="<sos/eos>" # sos and eos symbole
bpe_input_sentence_size=100000000 # Size of input sentence for BPE.
bpe_nlsyms= # non-linguistic symbols list, separated by a comma or a file containing 1 symbol per line, for BPE
bpe_char_cover=1.0 # character coverage when modeling BPE
hugging_face_model_name_or_path="" # Hugging Face model or path for hugging_face tokenizer

# Ngram model related
use_ngram=false
Expand Down Expand Up @@ -306,6 +307,7 @@ bpeprefix="${bpedir}"/bpe
bpemodel="${bpeprefix}".model
bpetoken_list="${bpedir}"/tokens.txt
chartoken_list="${token_listdir}"/char/tokens.txt
hugging_face_token_list="${token_listdir}/hugging_face_"${hugging_face_model_name_or_path/\//-}/tokens.txt
# NOTE: keep for future development.
# shellcheck disable=SC2034
wordtoken_list="${token_listdir}"/word/tokens.txt
Expand All @@ -318,6 +320,9 @@ elif [ "${token_type}" = char ]; then
elif [ "${token_type}" = word ]; then
token_list="${wordtoken_list}"
bpemodel=none
elif [ "${token_type}" = hugging_face ]; then
token_list="${hugging_face_token_list}"
bpemodel=${hugging_face_model_name_or_path}
else
log "Error: not supported --token_type '${token_type}'"
exit 2
Expand Down Expand Up @@ -349,6 +354,9 @@ if [ -z "${asr_tag}" ]; then
if [ "${token_type}" = bpe ]; then
asr_tag+="${nbpe}"
fi
if [ "${token_type}" = hugging_face ]; then
asr_tag+="_"${hugging_face_model_name_or_path/\//-}
fi
# Add overwritten arg's info
if [ -n "${asr_args}" ]; then
asr_tag+="$(echo "${asr_args}" | sed -e "s/--/\_/g" -e "s/[ |=/]//g")"
Expand Down Expand Up @@ -387,6 +395,9 @@ if [ -z "${asr_stats_dir}" ]; then
if [ "${token_type}" = bpe ]; then
asr_stats_dir+="${nbpe}"
fi
if [ "${token_type}" = hugging_face ]; then
asr_stats_dir+="_"${hugging_face_model_name_or_path/\//-}
fi
if [ -n "${speed_perturb_factors}" ]; then
asr_stats_dir+="_sp"
fi
Expand Down Expand Up @@ -690,7 +701,14 @@ if ! "${skip_data_prep}"; then
--add_symbol "${blank}:0" \
--add_symbol "${oov}:1" \
--add_symbol "${sos_eos}:-1"
elif [ "${token_type}" = hugging_face ]; then
log "Stage 5: Generate hugging_face token_list from ${hugging_face_model_name_or_path}"

# The first symbol in token_list must be "<blank>" and the last must be also sos/eos:
# 0 is reserved for CTC-blank for ASR and also used as ignore-index in the other task
${python} -m espnet2.bin.hugging_face_export_vocabulary \
--model_name_or_path "${hugging_face_model_name_or_path}" \
--output "${token_list}"
else
log "Error: not supported --token_type '${token_type}'"
exit 2
Expand Down
22 changes: 22 additions & 0 deletions egs2/slurp_entity/asr1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,28 @@



# Using XLS-R pretrained speech Encoder and mBART-50 Large pretrained text Encoder-Decoder

- ASR config: [conf/tuning/train_asr_branchformer_xlsr_mbart.yaml](conf/tuning/train_asr_branchformer_xlsr_mbart.yaml)
- #Params: 1.21 B

## Environments
- date: `Wed Sep 7 01:16:08 CEST 2022`
- python version: `3.9.13 (main, Jun 9 2022, 00:00:00) [GCC 11.3.1 20220421 (Red Hat 11.3.1-2)]`
- espnet version: `espnet 202207`
- pytorch version: `pytorch 1.12.1+cu116`
- Git hash: `c9cb7c424c90e9d3a59ace324308793b91fedbe1`
- Commit date: `Tue Aug 23 16:22:24 2022 +0200`

## Intent Classification
- Valid Intent Classification Result: 0.8933256616800921
- Test Intent Classification Result: 0.8811744915124636

## Entity
|Slu f1|Precision|Recall|F-Measure|
|:---:|:---:|:---:|:---:|
|test|0.7949|0.7788|0.7868|

# Initial Result

## Environments
Expand Down
3 changes: 3 additions & 0 deletions egs2/slurp_entity/asr1/conf/decode_asr_hf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
beam_size: 5
ctc_weight: 0.0
hugging_face_decoder: True
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# network architecture
# encoder related
encoder: branchformer
encoder_conf:
output_size: 1024
use_attn: true
attention_heads: 8
attention_layer_type: rel_selfattn
pos_enc_layer_type: rel_pos
rel_pos_type: latest
use_cgmlp: true
cgmlp_linear_units: 4096
cgmlp_conv_kernel: 31
use_linear_after_conv: false
gate_activation: identity
merge_method: concat
cgmlp_weight: 0.5 # used only if merge_method is "fixed_ave"
attn_branch_drop_rate: 0.0 # used only if merge_method is "learned_ave"
num_blocks: 18
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: conv2d
stochastic_depth_rate: 0.0

postencoder: hugging_face_transformers
postencoder_conf:
model_name_or_path: "akreal/mbart-large-50-finetuned-slurp"
length_adaptor_n_layers: 1
lang_token_id: 250004

decoder: hugging_face_transformers
decoder_conf:
model_name_or_path: "akreal/mbart-large-50-finetuned-slurp"

use_amp: true
num_workers: 2
optim: adam
batch_type: length
batch_bins: 170000
accum_grad: 4
optim_conf:
lr: 0.00005
weight_decay: 0.000001
scheduler: warmuplr # pytorch v1.1.0+ required
scheduler_conf:
warmup_steps: 25000
max_epoch: 50

freeze_param: [
"frontend.upstream"
]

frontend: s3prl
frontend_conf:
frontend_conf:
upstream: xls_r_300m # Note: If the upstream is changed, please change the input_size in the preencoder.
download_dir: ./hub
multilayer_feature: True

preencoder: linear
preencoder_conf:
input_size: 1024 # Note: If the upstream is changed, please change this value accordingly.
output_size: 80

model_conf:
ctc_weight: 0.0
lsm_weight: 0.1
length_normalized_loss: false
extract_feats_in_collect_stats: false # Note: "False" means during collect stats (stage 10), generating dummy stats files rather than extract_feats by forward frontend.
# mBART dictionary customizations
ignore_id: 1
sym_blank: "<pad>"
sym_sos: "<s>"
sym_eos: "</s>"
lang_token_id: 250004

best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 10
20 changes: 18 additions & 2 deletions egs2/slurp_entity/asr1/local/convert_to_entity_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,24 @@
import os
import sys

from espnet2.utils.types import str2bool

def generate_entity_file(line_arr, output_file="result_test.json"):

def generate_entity_file(line_arr, output_file="result_test.json", token_type_bpe=True):
fp = open(output_file, "w")
for line in line_arr:
scenario = line.strip().split("\t")[0].split("_")[0]
action = "_".join(line.strip().split("\t")[0].split()[0].split("_")[1:])
if not token_type_bpe:
line = line.replace(" ", "▁")
entity_names_arr = line.strip().split("▁SEP")[1:-1]
ent_final_arr = []
for entity in entity_names_arr:
if len(entity.split("▁FILL")) != 2:
continue
ent_type = entity.split("▁FILL")[0].strip()
if not token_type_bpe:
ent_type = ent_type.replace("▁", " ").strip()
ent_val = entity.split("▁FILL")[1].strip().replace(" ", "")
ent_val = ent_val.replace("▁", " ").strip().replace("'", "'")
dict1 = {}
Expand Down Expand Up @@ -45,6 +51,12 @@ def generate_entity_file(line_arr, output_file="result_test.json"):
default="decode_asr_asr_model_valid.acc.ave_10best/test/",
help="Directory inside exp_root containing inference on test set",
)
parser.add_argument(
"--token_type_bpe",
type=str2bool,
default=True,
help="Whether text is encoded in BPE units",
)

args = parser.parse_args()

Expand All @@ -54,4 +66,8 @@ def generate_entity_file(line_arr, output_file="result_test.json"):

gen_file = open(os.path.join(exp_root, test_inference_folder + "score_wer/hyp.trn"))
line_arr = [line for line in gen_file]
generate_entity_file(line_arr, output_file=os.path.join(exp_root, "result_test.json"))
generate_entity_file(
line_arr,
output_file=os.path.join(exp_root, "result_test.json"),
token_type_bpe=args.token_type_bpe,
)
8 changes: 6 additions & 2 deletions egs2/slurp_entity/asr1/local/data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ SECONDS=0

stage=1
stop_stage=100000
token_type_bpe=true
log "$0 $*"
. utils/parse_options.sh

Expand All @@ -33,7 +34,7 @@ fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [ ! -e "${SLURP}/LICENSE.txt" ]; then
echo "stage 1: Download data to ${SLURP}"
echo "stage 1: Download data to ${SLURP}"
else
log "stage 1: ${SLURP}/LICENSE.txt is already existing. Skip data downloading"
fi
Expand All @@ -56,7 +57,10 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
python local/prepare_entity_type.py
for x in test devel train; do
mv data/${x}/text data/${x}/text_old
mv data/${x}/text_new data/${x}/text
mv data/${x}/text_new data/${x}/text
if ! "${token_type_bpe}"; then
cp -a data_old/${x} data/${x}_char
fi
done
fi

Expand Down
18 changes: 14 additions & 4 deletions egs2/slurp_entity/asr1/local/score.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# cmd=run.pl
# stage=0
# data=data/eval2000
token_type_bpe=true
# #end configuration section.

[ -f ./path.sh ] && . ./path.sh
Expand All @@ -17,16 +18,25 @@ if [ $# -lt 1 ]; then
fi
. ./db.sh

if [ -z "${SLURP}" ]; then
echo "Fill the value of 'SLURP' of db.sh"
exit 1
fi

asr_expdir=$1

if [ $# -gt 1 ]; then
valid_inference_folder=$2
test_inference_folder=$3
python local/score.py --exp_root ${asr_expdir} --valid_folder ${valid_inference_folder} --test_folder ${test_inference_folder}
python local/convert_to_entity_file.py --exp_root ${asr_expdir} --valid_folder ${valid_inference_folder} --test_folder ${test_inference_folder}
else
python local/score.py --exp_root ${asr_expdir}
python local/convert_to_entity_file.py --exp_root ${asr_expdir}
valid_inference_folder=$(ls ${asr_expdir}/*/devel*/score_wer/hyp.trn | head -n 1 | sed 's!//!/!g' | cut -d/ -f3,4)/
test_inference_folder=$(ls ${asr_expdir}/*/test*/score_wer/hyp.trn | head -n 1 | sed 's!//!/!g' | cut -d/ -f3,4)/
fi
python local/score.py --exp_root ${asr_expdir} --valid_folder ${valid_inference_folder} --test_folder ${test_inference_folder}
python local/convert_to_entity_file.py \
--exp_root ${asr_expdir} \
--valid_folder ${valid_inference_folder} \
--test_folder ${test_inference_folder} \
--token_type_bpe ${token_type_bpe}
python local/evaluation/evaluate.py -g ${SLURP}/dataset/slurp/test.jsonl -p ${asr_expdir}/result_test.json
exit 0
33 changes: 33 additions & 0 deletions egs2/slurp_entity/asr1/run-hugging-face.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail

train_set="train_char"
valid_set="devel_char"
test_sets="test_char devel_char"

asr_config=conf/tuning/train_asr_branchformer_xlsr_mbart.yaml
inference_config=conf/decode_asr_hf.yaml

./asr.sh \
--lang en \
--ngpu 1 \
--use_lm false \
--token_type hugging_face \
--hugging_face_model_name_or_path facebook/mbart-large-50-many-to-many-mmt \
--local_data_opts "--token_type_bpe false" \
--local_score_opts "--token_type_bpe false" \
--max_wav_duration 30 \
--speed_perturb_factors "0.9 1.0 1.1" \
--feats_normalize utterance_mvn \
--asr_config "${asr_config}" \
--inference_config "${inference_config}" \
--inference_nj 1 \
--gpu_inference true \
--train_set "${train_set}" \
--valid_set "${valid_set}" \
--lm_train_text "data/${train_set}/text" \
--test_sets "${test_sets}" "$@"

0 comments on commit fbfe277

Please sign in to comment.