Skip to content

Commit

Permalink
set --distributed-port=-1 if ngpus=1; code adaptation/changes accordi…
Browse files Browse the repository at this point in the history
…ng to the commits on Oct 11, 2019
  • Loading branch information
freewym committed Oct 13, 2019
1 parent 50403ac commit bbdeb28
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 10 deletions.
6 changes: 3 additions & 3 deletions examples/asr_librispeech/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ if [ ${stage} -le 5 ]; then
--log-interval 8000 --log-format simple \
--num-workers 0 --max-tokens 32000 --max-sentences 1024 --curriculum 1 \
--valid-subset $valid_subset --max-sentences-valid 1536 \
--distributed-world-size $ngpus --distributed-port 100 \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 30 --optimizer adam --lr 0.001 --clip-norm 1.0 \
--lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \
--save-dir $lmdir --restore-file checkpoint_last.pt --save-interval-updates 8000 \
Expand Down Expand Up @@ -208,8 +208,8 @@ if [ ${stage} -le 7 ]; then
CUDA_VISIBLE_DEVICES=$free_gpu speech_train.py --seed 1 \
--log-interval 4000 --log-format simple --print-training-sample-interval 2000 \
--num-workers 0 --max-tokens 26000 --max-sentences 24 --curriculum 1 \
--valid-subset $valid_subset --max-sentences-valid 48 \
--distributed-world-size $ngpus --distributed-port 100 --ddp-backend no_c10d \
--valid-subset $valid_subset --max-sentences-valid 48 --ddp-backend no_c10d \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 30 --optimizer adam --lr 0.001 --weight-decay 0.0 --clip-norm 2.0 \
--lr-scheduler reduce_lr_on_plateau_v2 --lr-shrink 0.5 --min-lr 1e-5 --start-reduce-lr-epoch 10 \
--save-dir $dir --restore-file checkpoint_last.pt --save-interval-updates 3000 \
Expand Down
6 changes: 3 additions & 3 deletions examples/asr_swbd/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ if [ $stage -le 4 ]; then
--log-interval 500 --log-format simple \
--num-workers 0 --max-tokens 25600 --max-sentences 1024 \
--valid-subset $valid_subset --max-sentences-valid 1536 \
--distributed-world-size $ngpus --distributed-rank 0 --distributed-port 100 \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 25 --optimizer adam --lr 0.001 --clip-norm 1.0 \
--lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \
--save-dir $lmdir --restore-file checkpoint_last.pt --save-interval-updates 500 \
Expand Down Expand Up @@ -249,8 +249,8 @@ if [ $stage -le 6 ]; then
CUDA_VISIBLE_DEVICES=$free_gpu speech_train.py --seed 1 \
--log-interval 1500 --log-format simple --print-training-sample-interval 2000 \
--num-workers 0 --max-tokens 26000 --max-sentences 48 --curriculum 2 \
--valid-subset $valid_subset --max-sentences-valid 64 \
--distributed-world-size $ngpus --distributed-rank 0 --distributed-port 100 --ddp-backend no_c10d \
--valid-subset $valid_subset --max-sentences-valid 64 --ddp-backend no_c10d \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 35 --optimizer adam --lr 0.001 --weight-decay 0.0 --clip-norm 2.0 \
--lr-scheduler reduce_lr_on_plateau_v2 --lr-shrink 0.5 --min-lr 1e-5 --start-reduce-lr-epoch 10 \
--save-dir $dir --restore-file checkpoint_last.pt --save-interval-updates 1500 \
Expand Down
8 changes: 4 additions & 4 deletions examples/asr_wsj/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ if [ ${stage} -le 4 ] && ! $use_wordlm; then
--log-interval 2000 --log-format simple \
--num-workers 0 --max-tokens 25600 --max-sentences 128 \
--valid-subset $valid_subset --max-sentences-valid 256 \
--distributed-world-size $ngpus --distributed-port 100 \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 25 --optimizer adam --lr 0.001 --weight-decay 5e-06 \
--lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \
--save-dir $lmdir --restore-file checkpoint_last.pt --save-interval-updates 2000 \
Expand Down Expand Up @@ -227,7 +227,7 @@ if [ ${stage} -le 6 ] && $use_wordlm; then
--log-interval 2000 --log-format simple \
--num-workers 0 --max-tokens 6400 --max-sentences 256 \
--valid-subset $valid_subset --max-sentences-valid 512 \
--distributed-world-size $ngpus --distributed-port 100 \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 25 --optimizer adam --lr 0.001 --weight-decay 0.0 \
--lr-scheduler reduce_lr_on_plateau --lr-shrink 0.5 \
--save-dir $wordlmdir --restore-file checkpoint_last.pt --save-interval-updates 2000 \
Expand Down Expand Up @@ -267,8 +267,8 @@ if [ ${stage} -le 8 ]; then
CUDA_VISIBLE_DEVICES=$free_gpu speech_train.py --seed 1 \
--log-interval 400 --log-format simple --print-training-sample-interval 1000 \
--num-workers 0 --max-tokens 24000 --max-sentences 32 --curriculum 2 \
--valid-subset $valid_subset --max-sentences-valid 64 \
--distributed-world-size $ngpus --distributed-port 100 --ddp-backend no_c10d \
--valid-subset $valid_subset --max-sentences-valid 64 --ddp-backend no_c10d \
--distributed-world-size $ngpus --distributed-port $(if [ $ngpus -gt 1 ]; then echo 100; else echo -1; fi) \
--max-epoch 35 --optimizer adam --lr 0.001 --weight-decay 0.0 \
--lr-scheduler reduce_lr_on_plateau_v2 --lr-shrink 0.5 --min-lr 1e-5 --start-reduce-lr-epoch 11 \
--save-dir $dir --restore-file checkpoint_last.pt --save-interval-updates 400 \
Expand Down
11 changes: 11 additions & 0 deletions speech_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,21 @@
from fairseq.meters import AverageMeter, StopwatchMeter
from fairseq.utils import import_user_module

fb_pathmgr_registerd = False


def main(args, init_distributed=False):
utils.import_user_module(args)

try:
from fairseq.fb_pathmgr import fb_pathmgr
global fb_pathmgr_registerd
if not fb_pathmgr_registerd:
fb_pathmgr.register()
fb_pathmgr_registerd = True
except (ModuleNotFoundError, ImportError):
pass

assert args.max_tokens is not None or args.max_sentences is not None, \
'Must specify batch size either with --max-tokens or --max-sentences'

Expand Down

0 comments on commit bbdeb28

Please sign in to comment.