Skip to content

Commit

Permalink
Merge branch 'master' into chime7task1
Browse files Browse the repository at this point in the history
  • Loading branch information
popcornell committed Feb 14, 2023
2 parents 00308e1 + 34d6117 commit 5179f7a
Show file tree
Hide file tree
Showing 4 changed files with 236 additions and 0 deletions.
72 changes: 72 additions & 0 deletions egs2/tedlium2/asr1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,78 @@
|decode_asr_asr_model_valid.acc.ave/test|1155|52113|95.0|2.6|2.5|0.9|5.9|64.2|


# E-Branchformer with Transducer, 12 layers
## Environments
- date: `Thu Feb 9 01:29:33 CST 2023`
- python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
- espnet version: `espnet 202301`
- pytorch version: `pytorch 1.13.1`
- Git hash: `478ba004e114e7862b05fb01112de7f7e1da3996`
- Commit date: `Tue Feb 7 00:50:49 2023 +0000`

## asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp
- ASR config: [conf/tuning/train_asr_transducer_e_branchformer_e12.yaml](conf/tuning/train_asr_transducer_e_branchformer_e12.yaml)
- Params: 26.26M
- Model link: [https://huggingface.co/pyf98/tedlium2_transducer_e_branchformer](https://huggingface.co/pyf98/tedlium2_transducer_e_branchformer)

### WER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transducer_asr_model_valid.loss.ave/dev|466|14671|93.4|4.3|2.3|1.0|7.6|71.7|
|decode_asr_transducer_asr_model_valid.loss.ave/test|1155|27500|93.6|4.0|2.4|1.0|7.4|63.5|

### CER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transducer_asr_model_valid.loss.ave/dev|466|78259|97.1|0.9|2.0|0.9|3.8|71.7|
|decode_asr_transducer_asr_model_valid.loss.ave/test|1155|145066|97.1|0.9|2.1|0.9|3.9|63.5|

### TER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transducer_asr_model_valid.loss.ave/dev|466|28296|94.7|3.1|2.3|0.8|6.2|71.7|
|decode_asr_transducer_asr_model_valid.loss.ave/test|1155|52113|95.1|2.6|2.2|0.9|5.8|63.5|


# Conformer with Transducer, 12 layers, 2048 linear units
## Environments
- date: `Wed Feb 8 22:07:40 CST 2023`
- python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
- espnet version: `espnet 202301`
- pytorch version: `pytorch 1.13.1`
- Git hash: `478ba004e114e7862b05fb01112de7f7e1da3996`
- Commit date: `Tue Feb 7 00:50:49 2023 +0000`

## asr_train_asr_transducer_conformer_e12_linear2048_raw_en_bpe500_sp
- ASR config: [conf/tuning/train_asr_transducer_conformer_e12_linear2048.yaml](conf/tuning/train_asr_transducer_conformer_e12_linear2048.yaml)
- Params: 34.62M
- Model link: [https://huggingface.co/pyf98/tedlium2_transducer_conformer_e12_linear2048](https://huggingface.co/pyf98/tedlium2_transducer_conformer_e12_linear2048)

### WER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transducer_asr_model_valid.loss.ave/dev|466|14671|93.3|4.5|2.3|1.1|7.8|71.2|
|decode_asr_transducer_asr_model_valid.loss.ave/test|1155|27500|93.2|4.2|2.6|1.0|7.8|65.6|

### CER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transducer_asr_model_valid.loss.ave/dev|466|78259|97.0|0.9|2.1|1.0|3.9|71.2|
|decode_asr_transducer_asr_model_valid.loss.ave/test|1155|145066|96.9|0.9|2.2|0.9|4.0|65.6|

### TER

|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transducer_asr_model_valid.loss.ave/dev|466|28296|94.6|3.0|2.4|0.9|6.3|71.2|
|decode_asr_transducer_asr_model_valid.loss.ave/test|1155|52113|94.8|2.7|2.5|0.9|6.0|65.6|



# E-Branchformer with CTC, 12 layers
## Environments
Expand Down
4 changes: 4 additions & 0 deletions egs2/tedlium2/asr1/conf/decode_asr_transducer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
beam_size: 10
transducer_conf:
search_type: default
score_norm: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Trained with NVIDIA A40 GPU (48GB) x 2
encoder: conformer
encoder_conf:
output_size: 256
attention_heads: 4
linear_units: 2048
num_blocks: 12
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: conv2d
normalize_before: true
macaron_style: true
rel_pos_type: latest
pos_enc_layer_type: rel_pos
selfattention_layer_type: rel_selfattn
activation_type: swish
use_cnn_module: true
cnn_module_kernel: 31

decoder: transducer
decoder_conf:
rnn_type: lstm
num_layers: 1
hidden_size: 256
dropout: 0.1
dropout_embed: 0.2

joint_net_conf:
joint_space_size: 320

model_conf:
ctc_weight: 0.3
report_cer: False
report_wer: False

frontend_conf:
n_fft: 512
win_length: 400
hop_length: 160

seed: 2022
use_amp: false
num_workers: 6
batch_type: numel
batch_bins: 10000000
accum_grad: 5
max_epoch: 50
init: none
best_model_criterion:
- - valid
- loss
- min
keep_nbest_models: 10

optim: adam
optim_conf:
lr: 0.002
weight_decay: 0.000001
scheduler: warmuplr
scheduler_conf:
warmup_steps: 15000

specaug: specaug
specaug_conf:
apply_time_warp: true
time_warp_window: 5
time_warp_mode: bicubic
apply_freq_mask: true
freq_mask_width_range:
- 0
- 27
num_freq_mask: 2
apply_time_mask: true
time_mask_width_ratio_range:
- 0.
- 0.05
num_time_mask: 5
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Trained with NVIDIA A40 GPU (48GB) x 2
encoder: e_branchformer
encoder_conf:
output_size: 256
attention_heads: 4
attention_layer_type: rel_selfattn
pos_enc_layer_type: rel_pos
rel_pos_type: latest
cgmlp_linear_units: 1024
cgmlp_conv_kernel: 31
use_linear_after_conv: false
gate_activation: identity
num_blocks: 12
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: conv2d
layer_drop_rate: 0.0
linear_units: 1024
positionwise_layer_type: linear
use_ffn: true
macaron_ffn: true
merge_conv_kernel: 31

decoder: transducer
decoder_conf:
rnn_type: lstm
num_layers: 1
hidden_size: 256
dropout: 0.1
dropout_embed: 0.2

joint_net_conf:
joint_space_size: 320

model_conf:
ctc_weight: 0.3
report_cer: False
report_wer: False

frontend_conf:
n_fft: 512
win_length: 400
hop_length: 160

seed: 2022
use_amp: false
num_workers: 6
batch_type: numel
batch_bins: 10000000
accum_grad: 5
max_epoch: 50
init: none
best_model_criterion:
- - valid
- loss
- min
keep_nbest_models: 10

optim: adam
optim_conf:
lr: 0.002
weight_decay: 0.000001
scheduler: warmuplr
scheduler_conf:
warmup_steps: 15000

specaug: specaug
specaug_conf:
apply_time_warp: true
time_warp_window: 5
time_warp_mode: bicubic
apply_freq_mask: true
freq_mask_width_range:
- 0
- 27
num_freq_mask: 2
apply_time_mask: true
time_mask_width_ratio_range:
- 0.
- 0.05
num_time_mask: 5

0 comments on commit 5179f7a

Please sign in to comment.