Merge pull request #5710 from nateanl/master

TF-GridNet training recipe for DNS Interspeech 2020 dataset
espnet · Mar 25, 2024 · f084317 · f084317
2 parents dbd73dd + 4cfed17
commit f084317
Show file tree

Hide file tree

Showing 2 changed files with 93 additions and 0 deletions.
diff --git a/egs2/dns_ins20/enh1/README.md b/egs2/dns_ins20/enh1/README.md
@@ -68,3 +68,26 @@ Note: Here, the PESQ score is calculated based on https://github.com/vBaiCai/pyt
 | enhanced_cv_synthetic             | 0.97 | 24.52 | 24.52 | 24.43 |
 | enhanced_tt_synthetic_no_reverb   | 0.96 | 17.66 | 17.66 | 17.69 |
 | enhanced_tt_synthetic_with_reverb | 0.84 | 11.84 | 11.84 | 11.15 |
+
+
+<!-- Generated by ./scripts/utils/show_enh_score.sh -->
+# RESULTS
+## Environments
+- date: `Wed Mar  6 01:29:02 UTC 2024`
+- python version: `3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]`
+- espnet version: `espnet 202308`
+- pytorch version: `pytorch 2.1.0+cu118`
+- Git hash: `60ce18efa06ca5a5922534682f47e2107ef88b13`
+  - Commit date: `Wed Sep 6 10:17:57 2023 -0700`
+
+
+## enh_train_enh_tfgrid_raw
+
+- config: ./conf/tuning/train_enh_tfgrid.yaml
+- model: https://huggingface.co/Zhaoheng/tfgridnet_dns_ins20_epoch33
+
+|dataset|PESQ_WB|STOI|SAR|SDR|SIR|SI_SNR|
+|---|---|---|---|---|---|---|
+|enhanced_cv_synthetic|3.61|99.06|26.04|26.04|0.00|26.44|
+|enhanced_tt_synthetic_no_reverb|3.32|97.88|20.18|20.18|0.00|20.17|
+|enhanced_tt_synthetic_with_reverb|2.79|91.75|15.54|15.54|0.00|15.06|
diff --git a/egs2/dns_ins20/enh1/conf/tuning/train_enh_tfgridnet.yaml b/egs2/dns_ins20/enh1/conf/tuning/train_enh_tfgridnet.yaml
@@ -0,0 +1,70 @@
+optim: adam
+init: xavier_uniform
+max_epoch: 35
+batch_type: folded
+batch_size: 2
+iterator_type: chunk
+chunk_length: 48000
+num_iters_per_epoch: 5000
+num_workers: 8
+optim_conf:
+    lr: 1.0e-03
+    eps: 1.0e-08
+    weight_decay: 0
+patience: 5
+val_scheduler_criterion:
+- valid
+- loss
+best_model_criterion:
+-   - valid
+    - si_snr
+    - max
+-   - valid
+    - loss
+    - min
+keep_nbest_models: 5
+scheduler: reducelronplateau
+scheduler_conf:
+    mode: min
+    factor: 0.7
+    patience: 1
+encoder: same
+decoder: same
+separator: tfgridnet
+separator_conf:
+    n_srcs: 1
+    n_fft: 512
+    stride: 256
+    window: hann
+    n_imics: 1
+    n_layers: 4
+    lstm_hidden_units: 128
+    attn_n_head: 4
+    attn_approx_qk_dim: 512
+    emb_dim: 32
+    emb_ks: 4
+    emb_hs: 4
+    activation: prelu
+    eps: 1.0e-05
+
+criterions:
+  # The first criterion
+  - name: mr_l1_tfd
+    conf:
+      window_sz: [256, 512, 768, 1024]
+      hop_sz: null
+      eps: 1.0e-8
+      time_domain_weight: 0.5
+      # normalize_variance: true
+    wrapper: fixed_order
+    wrapper_conf:
+      weight: 1.0
+  # The second criterion
+  - name: si_snr
+    conf:
+      eps: 1.0e-7
+    wrapper: fixed_order
+    wrapper_conf:
+      weight: 0.0
+
+use_amp: false