Skip to content

Commit

Permalink
Merge pull request #5370 from Emrys365/tse
Browse files Browse the repository at this point in the history
Adding general data augmentation methods for speech preprocessing
  • Loading branch information
sw005320 committed Aug 9, 2023
2 parents ac8b312 + 3a82677 commit 88050b2
Show file tree
Hide file tree
Showing 19 changed files with 1,023 additions and 141 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci_on_ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down Expand Up @@ -285,7 +285,7 @@ jobs:
python3 -m pip freeze
- name: Import all modules (Try2)
run: |
python3 ./ci/test_import_all.py
python3 -q -X faulthandler ./ci/test_import_all.py
check_kaldi_symlinks:
runs-on: ubuntu-latest
Expand Down
15 changes: 14 additions & 1 deletion ci/test_import_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import glob
import importlib
import sys
import traceback

try:
import k2
Expand All @@ -17,6 +18,7 @@
has_mir_eval = True


failed_imports = []
for dirname in ["espnet", "espnet2"]:
for f in glob.glob(f"{dirname}/**/*.py"):
module_name = f.replace("/", ".")[:-3]
Expand All @@ -38,4 +40,15 @@
else:
print(f"import {module_name}", file=sys.stderr)

importlib.import_module(module_name)
try:
importlib.import_module(module_name)
except Exception as e:
reason = traceback.format_exc()
failed_imports.append((module_name, reason))


if failed_imports:
print(f"Error: Failed to import {len(failed_imports)} modules")
for i, (name, reason) in enumerate(failed_imports, 1):
print(f"[{i}] {name}\n\t{reason}\n")
raise RuntimeError("See the errors above")
12 changes: 10 additions & 2 deletions ci/test_integration_espnet2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ echo "==== feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_
--feats_normalize "utterance_mvn" --python "${python}" \
--asr-args "--model_conf extract_feats_in_collect_stats=false --num_workers 0"

echo "==== feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn, with data augmentation ==="
./run.sh --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false --feats-type "raw" --token-type "bpe" \
--asr_config "conf/train_asr_rnn_data_aug_debug.yaml" \
--feats_normalize "utterance_mvn" --python "${python}" \
--asr-args "--model_conf extract_feats_in_collect_stats=false --num_workers 0"

echo "==== use_streaming, feats_type=raw, token_types=bpe, model_conf.extract_feats_in_collect_stats=False, normalize=utt_mvn ==="
./run.sh --use_streaming true --ngpu 0 --stage 10 --stop-stage 13 --skip-upload false --feats-type "raw" --token-type "bpe" \
--feats_normalize "utterance_mvn" --python "${python}" \
Expand Down Expand Up @@ -171,8 +177,10 @@ if python -c 'import torch as t; from packaging.version import parse as L; asser
echo "==== feats_type=${t} with preprocessor ==="
./run.sh --ngpu 0 --stage 2 --stop-stage 10 --skip-upload false --feats-type "${t}" --ref-num 1 --python "${python}" \
--extra_wav_list "rirs.scp noises.scp" --enh_config ./conf/train_with_preprocessor_debug.yaml --enh-args "--num_workers 0"
./run.sh --ngpu 0 --stage 2 --stop-stage 10 --skip-upload false --feats-type "${t}" --ref-num 1 --python "${python}" \
--enh_config conf/train_with_dynamic_mixing_debug.yaml --ref-num 2 --enh-args "--num_workers 0"
./run.sh --ngpu 0 --stage 5 --stop-stage 10 --skip-upload false --feats-type "${t}" --ref-num 1 --python "${python}" \
--enh_config conf/train_with_data_aug_debug.yaml --enh-args "--num_workers 0"
./run.sh --ngpu 0 --stage 2 --stop-stage 10 --skip-upload false --feats-type "${t}" --ref-num 2 --python "${python}" \
--enh_config conf/train_with_dynamic_mixing_debug.yaml --enh-args "--num_workers 0"
done
rm data/**/utt2category 2>/dev/null || true
rm -r dump
Expand Down
45 changes: 45 additions & 0 deletions egs2/mini_an4/asr1/conf/train_asr_rnn_data_aug_debug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# This is a debug config for CI
encoder: vgg_rnn
encoder_conf:
num_layers: 1
hidden_size: 2
output_size: 2

decoder: rnn
decoder_conf:
hidden_size: 2

scheduler: reducelronplateau
scheduler_conf:
mode: min
factor: 0.5
patience: 1

use_preprocessor: true
preprocessor: default
preprocessor_conf:
fs: 16000
data_aug_effects: # no need to set the "sample_rate" argument for each effect here
- [0.1, "contrast", {"enhancement_amount": 75.0}]
- [0.1, "highpass", {"cutoff_freq": 5000, "Q": 0.707}]
- [0.1, "equalization", {"center_freq": 1000, "gain": 0, "Q": 0.707}]
- - 0.1
- - [0.3, "speed_perturb", {"factor": 0.9}]
- [0.3, "speed_perturb", {"factor": 1.1}]
- [0.3, "speed_perturb", {"factor": 1.3}]
data_aug_num: [1, 4]
data_aug_prob: 1.0


val_scheduler_criterion:
- valid
- loss
best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 1
max_epoch: 1
num_iters_per_epoch: 1
batch_type: folded
batch_size: 2
57 changes: 57 additions & 0 deletions egs2/mini_an4/enh1/conf/train_with_data_aug_debug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# This is a debug config for CI
encoder: stft
encoder_conf:
n_fft: 512
hop_length: 128

decoder: stft
decoder_conf:
n_fft: 512
hop_length: 128

separator: rnn
separator_conf:
rnn_type: blstm
num_spk: 1
nonlinear: relu
layer: 1
unit: 2
dropout: 0.2

preprocessor: enh
preprocessor_conf:
speech_volume_normalize: "0.5_1.0"
rir_scp: dump/raw/train_nodev/rirs.scp
rir_apply_prob: 1.0
noise_scp: dump/raw/train_nodev/noises.scp
noise_apply_prob: 1.0
noise_db_range: "5_20"
sample_rate: 16000
force_single_channel: true
categories:
- 1ch_16k
- 2ch_16k
data_aug_effects: # no need to set the "sample_rate" argument for each effect here
- [0.1, "contrast", {"enhancement_amount": 75.0}]
- [0.1, "highpass", {"cutoff_freq": 5000, "Q": 0.707}]
- - 0.1
- - [0.3, "clipping", {"min_quantile": 0.05, "max_quantile": 0.95}]
- [0.3, "corrupt_phase", {"scale": 0.1, "n_fft": 0.032, "hop_length": 0.008}]
data_aug_num: [1, 3]
data_aug_prob: 1.0

criterions:
# The first criterion
- name: mse
conf:
compute_on_mask: false
# the wrapper for the current criterion
# for single-talker case, we simplely use fixed_order wrapper
wrapper: fixed_order
wrapper_conf:
weight: 1.0

max_epoch: 1
num_iters_per_epoch: 1
batch_type: folded
batch_size: 2
2 changes: 1 addition & 1 deletion egs2/musdb18/enh1/conf/tuning/train_enh_conv_tasnet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ decoder_conf:
stride: 10
separator: tcn
separator_conf:
num_spk: 2
num_spk: 4
layer: 8
stack: 4
bottleneck_dim: 256
Expand Down
5 changes: 4 additions & 1 deletion egs2/musdb18/enh1/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ num_dev=5000
num_eval=3000
sample_rate=16k

# 0, 1, 2, 3 represent drums, bass, vocals, and others, respectively.
ref_num=4


train_set="train_${sample_rate}"
valid_set="dev_${sample_rate}"
Expand All @@ -21,7 +24,7 @@ test_sets="test_${sample_rate} "
--test_sets "${test_sets}" \
--fs "${sample_rate}" \
--audio_format wav \
--ref_num 4 \
--ref_num ${ref_num} \
--lang en \
--ngpu 1 \
--local_data_opts "--sample_rate ${sample_rate} --num_train ${num_train} --num_dev ${num_dev} --num_eval ${num_eval}" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ unzip ${wdir}/spatialize_wsj0-mix.zip -d ${dir}
sed -i -e "s#data_in_root = './wsj0-mix/';#data_in_root = '${wsj0_2mix_wav}';#" \
-e "s#rir_root = './wsj0-mix/';#rir_root = '${wsj0_2mix_spatialized_wav}';#" \
-e "s#data_out_root = './wsj0-mix/';#data_out_root = '${wsj0_2mix_spatialized_wav}';#" \
-e "s#RIR-Generator-master/#RIR-Generator/" \
-e "s#RIR-Generator-master/#RIR-Generator/#" \
${dir}/spatialize_wsj0_mix.m

sed -i -e "s#MIN_OR_MAX=\"'min'\"#MIN_OR_MAX=\"'${min_or_max}'\"#" \
Expand Down

0 comments on commit 88050b2

Please sign in to comment.