Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[example]add bucketing and batchnorm scheme for speech_recognition example #6923

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions example/speech_recognition/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,16 @@ The new file should implement two functions, prepare_data() and arch(), for buil

Run the following line after preparing the files.
<pre><code>python main.py --configfile custom.cfg --archfile arch_custom</pre></code>

***
## **Further more**
You can prepare full LibriSpeech dataset by following the instruction on https://github.com/baidu-research/ba-dls-deepspeech
```bash
git clone https://github.com/baidu-research/ba-dls-deepspeech
cd ba-dls-deepspeech
./download.sh
./flac_to_wav.sh
python create_desc_json.py /path/to/ba-dls-deepspeech/LibriSpeech/train-clean-100 train_corpus.json
python create_desc_json.py /path/to/ba-dls-deepspeech/LibriSpeech/dev-clean validation_corpus.json
python create_desc_json.py /path/to/ba-dls-deepspeech/LibriSpeech/test-clean test_corpus.json
```
285 changes: 164 additions & 121 deletions example/speech_recognition/arch_deepspeech.py

Large diffs are not rendered by default.

37 changes: 27 additions & 10 deletions example/speech_recognition/deepspeech.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ mode = train
#ex: gpu0,gpu1,gpu2,gpu3
context = gpu0,gpu1,gpu2
# checkpoint prefix, check point will be saved under checkpoints folder with prefix
prefix = deep
prefix = deep_bucket
# when mode is load or predict, model will be loaded from the file name with model_file under checkpoints
model_file = deepspeechn_epoch1n_batch-0009
model_file = deep_bucket-0001
batch_size = 12
# log will be saved by the log_filename
log_filename = deep.log
log_filename = deep_bucket.log
# checkpoint set n to save checkpoints after n epoch
save_checkpoint_every_n_epoch = 1
save_checkpoint_every_n_batch = 1000
Expand All @@ -18,6 +18,7 @@ tensorboard_log_dir = tblog/deep
# if random_seed is -1 then it gets random seed from timestamp
mx_random_seed = -1
random_seed = -1
kvstore_option = device

[data]
train_json = ./train_corpus_all.json
Expand Down Expand Up @@ -50,22 +51,18 @@ rnn_type = bigru
#vanilla_lstm or fc_lstm (no effect when network_type is gru, bigru)
lstm_type = fc_lstm
is_batchnorm = True
is_bucketing = True
#[[0,2.3],[10,5.8],[10.8,25],[13.8,50],[15.1,75][15.8,90][29.7,100]
buckets = [200, 300, 400, 500, 600, 700, 800, 900, 1599]

[train]
num_epoch = 70
learning_rate = 0.0003
# constant learning rate annealing by factor
learning_rate_annealing = 1.1
# supports only sgd and adam
optimizer = sgd
# for sgd
momentum = 0.9
# set to 0 to disable gradient clipping
clip_gradient = 0
initializer = Xavier
init_scale = 2
factor_type = in
weight_decay = 0.
# show progress every how nth batches
show_every = 100
save_optimizer_states = True
Expand All @@ -78,3 +75,23 @@ enable_logging_validation_metric = True
[load]
load_optimizer_states = True
is_start_from_batch = True

[optimizer]
optimizer = sgd
# define parameters for optimizer
# optimizer_params_dictionary should use " not ' as string wrapper
# sgd/nag
optimizer_params_dictionary={"momentum":0.9}
# dcasgd
# optimizer_params_dictionary={"momentum":0.9, "lamda":1.0}
# adam
# optimizer_params_dictionary={"beta1":0.9,"beta2":0.999}
# adagrad
# optimizer_params_dictionary={"eps":1e-08}
# rmsprop
# optimizer_params_dictionary={"gamma1":0.9, "gamma2":0.9,"epsilon":1e-08}
# adadelta
# optimizer_params_dictionary={"rho":0.95, "epsilon":1e-08}
# set to 0 to disable gradient clipping
clip_gradient = 100
weight_decay = 0.
32 changes: 23 additions & 9 deletions example/speech_recognition/default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ tensorboard_log_dir = tblog/libri_sample
# if random_seed is -1 then it gets random seed from timestamp
mx_random_seed = -1
random_seed = -1
kvstore_option = device

[data]
train_json = ./Libri_sample.json
Expand Down Expand Up @@ -50,24 +51,17 @@ rnn_type = bigru
#vanilla_lstm or fc_lstm (no effect when network_type is gru, bigru)
lstm_type = fc_lstm
is_batchnorm = True
is_bucketing = False
buckets = []

[train]
num_epoch = 70

learning_rate = 0.005
# constant learning rate annealing by factor
learning_rate_annealing = 1.1
# supports only sgd and adam
optimizer = adam
# for sgd
momentum = 0.9
# set to 0 to disable gradient clipping
clip_gradient = 0

initializer = Xavier
init_scale = 2
factor_type = in
weight_decay = 0.00001
# show progress every nth batches
show_every = 1
save_optimizer_states = True
Expand All @@ -80,3 +74,23 @@ enable_logging_validation_metric = True
[load]
load_optimizer_states = True
is_start_from_batch = False

[optimizer]
optimizer = adam
# define parameters for optimizer
# optimizer_params_dictionary should use " not ' as string wrapper
# sgd/nag
# optimizer_params_dictionary={"momentum":0.9}
# dcasgd
# optimizer_params_dictionary={"momentum":0.9, "lamda":1.0}
# adam
optimizer_params_dictionary={"beta1":0.9,"beta2":0.999}
# adagrad
# optimizer_params_dictionary={"eps":1e-08}
# rmsprop
# optimizer_params_dictionary={"gamma1":0.9, "gamma2":0.9,"epsilon":1e-08}
# adadelta
# optimizer_params_dictionary={"rho":0.95, "epsilon":1e-08}
# set to 0 to disable gradient clipping
clip_gradient = 0
weight_decay = 0.
Loading