Skip to content

Commit

Permalink
Add many missing spaces in adjacent strings (huggingface#26751)
Browse files Browse the repository at this point in the history
Add missing spaces in adjacent strings
  • Loading branch information
tomaarsen authored and blbadger committed Nov 8, 2023
1 parent 67611ac commit 502d548
Show file tree
Hide file tree
Showing 154 changed files with 331 additions and 331 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ class ModelArguments:
encoder_model_name_or_path: str = field(
metadata={
"help": (
"The encoder model checkpoint for weights initialization."
"The encoder model checkpoint for weights initialization. "
"Don't set if you want to train an encoder model from scratch."
)
},
)
decoder_model_name_or_path: str = field(
metadata={
"help": (
"The decoder model checkpoint for weights initialization."
"The decoder model checkpoint for weights initialization. "
"Don't set if you want to train a decoder model from scratch."
)
},
Expand Down
8 changes: 4 additions & 4 deletions examples/flax/image-captioning/run_image_captioning_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -256,7 +256,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
)
Expand Down Expand Up @@ -423,7 +423,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down Expand Up @@ -685,7 +685,7 @@ def preprocess_fn(examples, max_target_length, check_image=True):
eval_batch_size = int(training_args.per_device_eval_batch_size) * jax.device_count()
if training_args.block_size % train_batch_size > 0 or training_args.block_size % eval_batch_size > 0:
raise ValueError(
"`training_args.block_size` needs to be a multiple of the global train/eval batch size."
"`training_args.block_size` needs to be a multiple of the global train/eval batch size. "
f"Got {training_args.block_size}, {train_batch_size} and {eval_batch_size} respectively instead."
)

Expand Down
4 changes: 2 additions & 2 deletions examples/flax/language-modeling/run_bart_dlm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down Expand Up @@ -606,7 +606,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
8 changes: 4 additions & 4 deletions examples/flax/language-modeling/run_clm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -368,7 +368,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down Expand Up @@ -524,7 +524,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down Expand Up @@ -586,7 +586,7 @@ def tokenize_function(examples):
else:
if data_args.block_size > tokenizer.model_max_length:
logger.warning(
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
)
block_size = min(data_args.block_size, tokenizer.model_max_length)
Expand Down
6 changes: 3 additions & 3 deletions examples/flax/language-modeling/run_mlm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -411,7 +411,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down Expand Up @@ -556,7 +556,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
4 changes: 2 additions & 2 deletions examples/flax/language-modeling/run_t5_mlm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down Expand Up @@ -647,7 +647,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
4 changes: 2 additions & 2 deletions examples/flax/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -582,7 +582,7 @@ def main():

if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use `--overwrite_output_dir` to overcome."
)

Expand Down
8 changes: 4 additions & 4 deletions examples/flax/summarization/run_summarization_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -268,7 +268,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
"than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the `max_length` param of `model.generate`, which is used "
"during evaluation."
)
Expand Down Expand Up @@ -451,7 +451,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down Expand Up @@ -558,7 +558,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
2 changes: 1 addition & 1 deletion examples/flax/text-classification/run_flax_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down
2 changes: 1 addition & 1 deletion examples/flax/token-classification/run_flax_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down
4 changes: 2 additions & 2 deletions examples/flax/vision/run_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -291,7 +291,7 @@ def main():
and not training_args.overwrite_output_dir
):
raise ValueError(
f"Output directory ({training_args.output_dir}) already exists and is not empty."
f"Output directory ({training_args.output_dir}) already exists and is not empty. "
"Use --overwrite_output_dir to overcome."
)

Expand Down
4 changes: 2 additions & 2 deletions examples/legacy/multiple_choice/utils_multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
raise ValueError(
"For swag testing, the input file does not contain a label column. It can not be tested in current code"
"For swag testing, the input file does not contain a label column. It can not be tested in current code "
"setting!"
)
return self._create_examples(self._read_csv(os.path.join(data_dir, "test.csv")), "test")
Expand Down Expand Up @@ -541,7 +541,7 @@ def convert_examples_to_features(
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
logger.info(
"Attention! you are cropping tokens (swag task is ok). "
"If you are training ARC and RACE and you are poping question + options,"
"If you are training ARC and RACE and you are poping question + options, "
"you need to try to use a bigger max seq length!"
)

Expand Down
2 changes: 1 addition & 1 deletion examples/legacy/pytorch-lightning/lightning_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def add_generic_args(parser, root_dir) -> None:
type=str,
default="O2",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html"
),
)
Expand Down
2 changes: 1 addition & 1 deletion examples/legacy/question-answering/run_squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def main():
type=str,
default="O1",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html"
),
)
Expand Down
4 changes: 2 additions & 2 deletions examples/legacy/run_language_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class DataTrainingArguments:
default=-1,
metadata={
"help": (
"Optional input sequence length after tokenization."
"Optional input sequence length after tokenization. "
"The training dataset will be truncated in block of this size for training."
"Default to the model max input length for single sentence inputs (take into account special tokens)."
)
Expand Down Expand Up @@ -283,7 +283,7 @@ def main():

if config.model_type in ["bert", "roberta", "distilbert", "camembert"] and not data_args.mlm:
raise ValueError(
"BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the"
"BERT and RoBERTa-like models do not have LM heads but masked LM heads. They must be run using the "
"--mlm flag (masked language modeling)."
)

Expand Down
2 changes: 1 addition & 1 deletion examples/legacy/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ def main():
type=str,
default="O1",
help=(
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']. "
"See details at https://nvidia.github.io/apex/amp.html"
),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand All @@ -189,14 +189,14 @@ def __post_init__(self):
if not self.freeze_feature_extractor and self.freeze_feature_encoder:
warnings.warn(
"The argument `--freeze_feature_extractor` is deprecated and "
"will be removed in a future version. Use `--freeze_feature_encoder`"
"will be removed in a future version. Use `--freeze_feature_encoder` "
"instead. Setting `freeze_feature_encoder==True`.",
FutureWarning,
)
if self.freeze_feature_extractor and not self.freeze_feature_encoder:
raise ValueError(
"The argument `--freeze_feature_extractor` is deprecated and "
"should not be used in combination with `--freeze_feature_encoder`."
"should not be used in combination with `--freeze_feature_encoder`. "
"Only make use of `--freeze_feature_encoder`."
)

Expand Down
4 changes: 2 additions & 2 deletions examples/pytorch/contrastive-image-text/run_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down Expand Up @@ -358,7 +358,7 @@ def main():
)
else:
raise ValueError(
"You are instantiating a new tokenizer from scratch. This is not supported by this script."
"You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class ModelArguments:
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
)
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def parse_args():
default=False,
help=(
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will"
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
),
)
Expand All @@ -179,7 +179,7 @@ def parse_args():
default="all",
help=(
'The integration to report the results and logs to. Supported platforms are `"tensorboard"`,'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations.'
' `"wandb"`, `"comet_ml"` and `"clearml"`. Use `"all"` (default) to report to all integrations. '
"Only applicable when `--with_tracking` is passed."
),
)
Expand Down
Loading

0 comments on commit 502d548

Please sign in to comment.