Skip to content

Commit

Permalink
Merge pull request #194 from allenai/default-2x-batch-size
Browse files Browse the repository at this point in the history
Default to batch size of 2048
  • Loading branch information
epwalsh authored Jun 9, 2023
2 parents ab0b967 + 2611f9b commit fde42f9
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
7 changes: 4 additions & 3 deletions configs/c4-large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ model:
attention_dropout: 0.0
attention_layer_norm: true
multi_query_attention: true
block_type: sequential
layer_norm_type: default # if not compiling, use 'low_precision'
block_type: parallel
layer_norm_type: low_precision # if not compiling, use 'low_precision'
activation_type: swiglu
residual_dropout: 0.0
embedding_dropout: 0.0
Expand Down Expand Up @@ -69,7 +69,8 @@ save_num_unsharded_checkpoints_to_keep: -1

load_path: null

max_duration: 476837 # 2T tokens
# max_duration: 476837 # 2T tokens
max_duration: 47684 # 200B tokens
global_train_batch_size: 2048
device_train_microbatch_size: 4

Expand Down
7 changes: 4 additions & 3 deletions configs/c4-medium.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ compile: null # causes instability on AMD GPUs

optimizer:
name: lionw
learning_rate: 1.0e-5
learning_rate: 1.0e-4
weight_decay: 0.01
betas:
- 0.9
Expand Down Expand Up @@ -67,8 +67,9 @@ save_num_unsharded_checkpoints_to_keep: -1

load_path: null

max_duration: 953674 # 2T tokens
global_train_batch_size: 1024
# max_duration: 476837 # 2T tokens
max_duration: 47684 # 200B tokens
global_train_batch_size: 2048
device_train_microbatch_size: 2

precision: amp_bf16
Expand Down
8 changes: 4 additions & 4 deletions configs/c4-small.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ compile: null # causes instability on AMD GPUs

optimizer:
name: lionw
learning_rate: 1.0e-4
learning_rate: 2.0e-4
weight_decay: 0.01
betas:
- 0.9
Expand Down Expand Up @@ -69,9 +69,9 @@ save_num_unsharded_checkpoints_to_keep: -1

load_path: null

# max_duration: 953674 # 2T tokens
max_duration: 95367 # 200B tokens
global_train_batch_size: 1024
# max_duration: 476837 # 2T tokens
max_duration: 47684 # 200B tokens
global_train_batch_size: 2048
device_train_microbatch_size: 8

precision: amp_bf16
Expand Down

0 comments on commit fde42f9

Please sign in to comment.