Skip to content

Commit

Permalink
Merge branch 'master' into codec
Browse files Browse the repository at this point in the history
  • Loading branch information
ftshijt committed Jun 14, 2024
2 parents 1382977 + 19787b1 commit fab9aac
Show file tree
Hide file tree
Showing 44 changed files with 2,167 additions and 68 deletions.
2 changes: 2 additions & 0 deletions ci/test_integration_espnet2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ if python -c 'import torch as t; from packaging.version import parse as L; asser
for t in ${feats_types}; do
echo "==== feats_type=${t} without preprocessor ==="
./run.sh --ngpu 0 --stage 2 --stop-stage 10 --skip-packing false --feats-type "${t}" --ref-num 1 --python "${python}" --enh-args "--num_workers 0"
./run.sh --ngpu 0 --stage 6 --stop-stage 10 --skip-packing false --feats-type "${t}" --ref-num 1 --python "${python}" \
--enh_config conf/train_with_chunk_iterator_debug.yaml --enh-args "--num_workers 0"
done
# Remove generated files in order to reduce the disk usage
rm -rf exp dump data
Expand Down
1 change: 1 addition & 0 deletions egs2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ See: https://espnet.github.io/espnet/espnet2_tutorial.html#recipes-using-espnet2
| totonac | Highland Totonac corpus (endangered language in central Mexico) | ASR | TOS | http://www.openslr.org/107/ | |
| tsukuyomi | つくよみちゃんコーパス | TTS | JPN | https://tyc.rei-yumesaki.net/material/corpus | |
| universal_se_v1 | Combination of Multi-condition English Corpora (vctk_noisy, dns_ins20, chime4, reverb, whamr) | SE | ENG | | |
| urgent2024 | Multi-domain simulated speech enhancement data for the URGENT 2024 Challenge | SE | ENG | https://urgent-challenge.github.io/urgent2024/data/ | |
| vctk | English Multi-speaker Corpus for CSTR Voice Cloning Toolkit | ASR/TTS | ENG | http://www.udialogue.org/download/cstr-vctk-corpus.html | |
| vctk_reverb | Reverberant speech database (48kHz) | SE | ENG | https://datashare.ed.ac.uk/handle/10283/2826 | |
| vctk_noisyreverb | Noisy reverberant speech database (48kHz) | SE | ENG | https://datashare.ed.ac.uk/handle/10283/2826 | |
Expand Down
25 changes: 23 additions & 2 deletions egs2/TEMPLATE/enh1/enh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ if ! "${skip_train}"; then
_valid_data_param+="--valid_data_path_and_name_and_type ${_enh_valid_dir}/utt2category,category,text "
fi

# Add the fs information at the end of the data path list
# Add the sampling frequency information at the end of the data path list
if [ -e "${_enh_train_dir}/utt2fs" ] && [ -e "${_enh_valid_dir}/utt2fs" ]; then
log "[INFO] Adding the sampling frequency information (fs) for training"

Expand Down Expand Up @@ -837,6 +837,19 @@ if ! "${skip_eval}"; then
_data_param+="--data_path_and_name_and_type ${_data}/enroll_spk${spk}.scp,enroll_ref${spk},text "
done
fi
# Add the category information at the end of the data path list
if [ -e "${_data}/utt2category" ]; then
log "[INFO] Adding the category information for inference"
log "[WARNING] Please make sure the category information is explicitly processed by the preprocessor defined in '${enh_config}' so that it is converted to an integer"

_data_param+="--data_path_and_name_and_type ${_data}/utt2category,category,text "
fi
# Add the sampling frequency information at the end of the data path list
if [ -e "${_data}/utt2fs" ]; then
log "[INFO] Adding the sampling frequency information for inference"

_data_param+="--data_path_and_name_and_type ${_data}/utt2fs,fs,text_int "
fi
# 1. Split the key file
key_file=${_data}/${_scp}
split_scps=""
Expand Down Expand Up @@ -888,6 +901,14 @@ if ! "${skip_eval}"; then
log "Stage 8: Scoring"
_cmd=${decode_cmd}

if ${gpu_inference}; then
_cmd=${cuda_cmd}
_ngpu=1
else
_cmd=${decode_cmd}
_ngpu=0
fi

# score_obs=true: Scoring for observation signal
# score_obs=false: Scoring for enhanced signal
for score_obs in true false; do
Expand Down Expand Up @@ -944,7 +965,7 @@ if ! "${skip_eval}"; then
# 2. Submit scoring jobs
log "Scoring started... log: '${_logdir}/enh_scoring.*.log'"
# shellcheck disable=SC2086
${_cmd} JOB=1:"${_nj}" "${_logdir}"/enh_scoring.JOB.log \
${_cmd} --gpu "${_ngpu}" JOB=1:"${_nj}" "${_logdir}"/enh_scoring.JOB.log \
${python} -m espnet2.bin.enh_scoring \
--key_file "${_logdir}"/keys.JOB.scp \
--output_dir "${_logdir}"/output.JOB \
Expand Down
41 changes: 41 additions & 0 deletions egs2/mini_an4/enh1/conf/train_with_chunk_iterator_debug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# This is a debug config for CI
encoder: conv
encoder_conf:
channel: 32
kernel_size: 20
stride: 10
decoder: conv
decoder_conf:
channel: 32
kernel_size: 20
stride: 10
separator: tcn
separator_conf:
num_spk: 1
layer: 2
stack: 2
bottleneck_dim: 16
hidden_dim: 48
kernel: 3
causal: False
norm_type: "gLN"
nonlinear: relu

criterions:
# The first criterion
- name: mse_td
conf: {}
# the wrapper for the current criterion
# for single-talker case, we simplely use fixed_order wrapper
wrapper: fixed_order
wrapper_conf:
weight: 1.0

max_epoch: 1
batch_type: folded
batch_size: 2
iterator_type: chunk
chunk_length: 25 # 0.5s
chunk_default_fs: 50 # GCD among all possible sampling frequencies
chunk_max_abs_length: 100000 # max number of samples per chunk for all sampling frequencies (reduce this value if OOM occurs)
chunk_discard_short_samples: false
Loading

0 comments on commit fab9aac

Please sign in to comment.