-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5584 from Masao-Someki/feature/multi_dataset_support
Support external dataset library for ESPnetEasy
- Loading branch information
Showing
35 changed files
with
1,476 additions
and
1,181 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,3 +46,4 @@ jobs: | |
run: | | ||
./ci/test_python_espnet1.sh | ||
./ci/test_python_espnet2.sh | ||
./ci/test_python_espnetez.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
#!/usr/bin/env bash | ||
|
||
# set -euo pipefail | ||
|
||
source tools/activate_python.sh | ||
PYTHONPATH="${PYTHONPATH:-}:$(pwd)/tools/s3prl" | ||
export PYTHONPATH | ||
python="python -m coverage run --append" | ||
cwd=$(pwd) | ||
|
||
gen_dummy_coverage(){ | ||
# To avoid a problem when parallel running for `coverage run`. | ||
# Please put this command after cd ./egs2/foo/bar | ||
touch empty.py; ${python} empty.py | ||
} | ||
|
||
#### Make sure chainer-independent #### | ||
python3 -m pip uninstall -y chainer | ||
|
||
# Download mini_an4 as test data and prepare flac data | ||
cd ./egs2/mini_an4/asr1 || exit | ||
./run.sh --stage 1 --stop-stage 1 | ||
./run.sh --stage 2 --stop-stage 4 --feats-type "raw" | ||
|
||
# Now we have flac files under dump/org/train_*/data/format.*/ | ||
# and wav.scp files under dump/train_*/ | ||
|
||
rm -rf exp data/spm | ||
# [ESPnet Easy] test asr recipe with coverage | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \ | ||
--task asr \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path conf/train_asr_transformer_debug.yaml \ | ||
--train_sentencepiece_model \ | ||
--run_collect_stats \ | ||
--run_train | ||
|
||
# finetuning | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \ | ||
--task asr \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path conf/train_asr_transformer_debug.yaml \ | ||
--run_finetune | ||
|
||
# Remove generated files in order to reduce the disk usage | ||
rm -rf exp data/spm | ||
|
||
# [ESPnet Easy] test asr transducer recipe with coverage | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \ | ||
--task asr \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path conf/train_asr_transducer_debug.yaml \ | ||
--train_sentencepiece_model \ | ||
--run_collect_stats \ | ||
--run_train | ||
|
||
# finetuning | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \ | ||
--task asr \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path conf/train_asr_transducer_debug.yaml \ | ||
--run_finetune | ||
|
||
# Remove generated files in order to reduce the disk usage | ||
rm -rf exp data/spm | ||
|
||
# [ESPnet Easy] test lm recipe with coverage | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \ | ||
--task lm \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path ../lm1/conf/train_transformer.yaml \ | ||
--train_sentencepiece_model \ | ||
--run_collect_stats \ | ||
--run_train | ||
|
||
# finetune | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \ | ||
--task lm \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path ../lm1/conf/train_transformer.yaml \ | ||
--run_finetune | ||
|
||
# Remove generated files in order to reduce the disk usage | ||
rm -rf exp data/spm | ||
|
||
|
||
# [ESPnet Easy] test slu recipe with coverage | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \ | ||
--task slu \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path ../s2t1/conf/train_slu_transformer.yaml \ | ||
--train_sentencepiece_model \ | ||
--run_collect_stats \ | ||
--run_train | ||
|
||
# finetune | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \ | ||
--task slu \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path ../s2t1/conf/train_slu_transformer.yaml \ | ||
--run_finetune | ||
|
||
# Remove generated files in order to reduce the disk usage | ||
rm -rf exp data/spm | ||
|
||
|
||
# [ESPnet Easy] test tts recipe with coverage | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez.py \ | ||
--task tts \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path ../tts1/conf/train_tacotron2_debug.yaml \ | ||
--train_sentencepiece_model \ | ||
--run_collect_stats \ | ||
--run_train | ||
|
||
# finetune | ||
python -m coverage run --append ../../../test/espnetez/test_integration_espnetez_ft.py \ | ||
--task tts \ | ||
--data_path data \ | ||
--train_dump_path dump/raw/train_nodev \ | ||
--valid_dump_path dump/raw/train_dev \ | ||
--exp_path ./exp \ | ||
--config_path ../tts1/conf/train_tacotron2_debug.yaml \ | ||
--run_finetune | ||
|
||
# Remove generated files in order to reduce the disk usage | ||
rm -rf exp data/spm | ||
|
||
cd "${cwd}" || exit | ||
|
||
|
||
echo "=== report ===" | ||
python -m coverage combine egs2/*/*/.coverage | ||
python -m coverage report | ||
python -m coverage xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/usr/bin/env bash | ||
|
||
. tools/activate_python.sh | ||
. tools/extra_path.sh | ||
|
||
set -euo pipefail | ||
|
||
exclude="egs2/TEMPLATE/asr1/utils,egs2/TEMPLATE/asr1/steps,egs2/TEMPLATE/tts1/sid,doc,tools,test_utils/bats-core,test_utils/bats-support,test_utils/bats-assert" | ||
|
||
# flake8 | ||
# "$(dirname $0)"/test_flake8.sh | ||
# pycodestyle | ||
pycodestyle --exclude "${exclude}" --show-source --show-pep8 | ||
|
||
LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}:$(pwd)/tools/chainer_ctc/ext/warp-ctc/build" pytest -q test/espnetez/test_ez.py | ||
|
||
echo "=== report ===" | ||
coverage report | ||
coverage xml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
use_lora: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# This is a debug config for CI | ||
batch_type: unsorted | ||
batch_size: 2 | ||
accum_grad: 1 | ||
max_epoch: 1 | ||
num_iters_per_epoch: 1 | ||
patience: | ||
# The initialization method for model parameters | ||
init: xavier_uniform | ||
best_model_criterion: | ||
- - valid | ||
- acc | ||
- max | ||
keep_nbest_models: 10 | ||
|
||
encoder: transformer | ||
encoder_conf: | ||
output_size: 2 | ||
attention_heads: 2 | ||
linear_units: 2 | ||
num_blocks: 2 | ||
dropout_rate: 0.1 | ||
positional_dropout_rate: 0.1 | ||
attention_dropout_rate: 0.0 | ||
input_layer: conv1d2 | ||
normalize_before: true | ||
|
||
decoder: transducer | ||
decoder_conf: | ||
rnn_type: lstm | ||
num_layers: 1 # Decoder Layers | ||
hidden_size: 4 # Decoder dim | ||
dropout: 0.1 | ||
dropout_embed: 0.2 | ||
|
||
|
||
joint_net_conf: | ||
joint_space_size: 4 | ||
|
||
model: espnet | ||
model_conf: | ||
ctc_weight: 0.3 | ||
lsm_weight: 0.1 | ||
length_normalized_loss: false | ||
|
||
optim: adam | ||
optim_conf: | ||
lr: 0.005 | ||
scheduler: warmuplr | ||
scheduler_conf: | ||
warmup_steps: 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# This is a debug config for CI | ||
batch_type: unsorted | ||
batch_size: 2 | ||
accum_grad: 1 | ||
max_epoch: 1 | ||
num_iters_per_epoch: 1 | ||
patience: | ||
# The initialization method for model parameters | ||
init: xavier_uniform | ||
best_model_criterion: | ||
- - valid | ||
- acc | ||
- max | ||
keep_nbest_models: 10 | ||
|
||
|
||
encoder: transformer | ||
encoder_conf: | ||
output_size: 2 | ||
attention_heads: 2 | ||
linear_units: 2 | ||
num_blocks: 2 | ||
dropout_rate: 0.1 | ||
positional_dropout_rate: 0.1 | ||
attention_dropout_rate: 0.0 | ||
input_layer: conv1d2 | ||
normalize_before: true | ||
|
||
decoder: transformer | ||
decoder_conf: | ||
attention_heads: 2 | ||
linear_units: 2 | ||
num_blocks: 2 | ||
dropout_rate: 0.1 | ||
positional_dropout_rate: 0.1 | ||
self_attention_dropout_rate: 0.0 | ||
src_attention_dropout_rate: 0.0 | ||
|
||
model: espnet | ||
model_conf: | ||
ctc_weight: 0.3 | ||
lsm_weight: 0.1 | ||
length_normalized_loss: false | ||
|
||
optim: adam | ||
optim_conf: | ||
lr: 0.005 | ||
scheduler: warmuplr | ||
scheduler_conf: | ||
warmup_steps: 4 |
Oops, something went wrong.