Skip to content

Commit

Permalink
Remove deprecated arguments from new run_clm (huggingface#8197)
Browse files Browse the repository at this point in the history
  • Loading branch information
sgugger authored and fabiocapsouza committed Nov 15, 2020
1 parent b8360f8 commit 615b6bb
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions examples/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,14 @@ def tokenize_function(examples):
)

if data_args.block_size <= 0:
block_size = tokenizer.max_len
block_size = tokenizer.model_max_length
else:
if data_args.block_size > tokenizer.max_len:
if data_args.block_size > tokenizer.model_max_length:
logger.warn(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
f"({tokenizer.max_len}). Using block_size={tokenizer.max_len}."
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
)
block_size = min(data_args.block_size, tokenizer.max_len)
block_size = min(data_args.block_size, tokenizer.model_max_length)

# Main data processing function that will concatenate all texts from our dataset and generate chunks of block_size.
def group_texts(examples):
Expand Down

0 comments on commit 615b6bb

Please sign in to comment.