Skip to content

Commit

Permalink
Add new LFS prune API (#14294)
Browse files Browse the repository at this point in the history
  • Loading branch information
sgugger committed Nov 5, 2021
1 parent 4be78c2 commit 08a5f57
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 23 deletions.
6 changes: 4 additions & 2 deletions examples/pytorch/language-modeling/run_clm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,9 @@ def group_texts(examples):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -516,7 +518,7 @@ def group_texts(examples):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions examples/pytorch/language-modeling/run_mlm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,9 @@ def group_texts(examples):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -557,7 +559,7 @@ def group_texts(examples):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions examples/pytorch/multiple-choice/run_swag_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,9 @@ def preprocess_function(examples):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -514,7 +516,7 @@ def preprocess_function(examples):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,9 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

# intialize all lists to collect the batches
all_start_top_log_probs = []
Expand Down Expand Up @@ -853,7 +855,7 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions examples/pytorch/question-answering/run_qa_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,9 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

# Evaluation
logger.info("***** Running Evaluation *****")
Expand Down Expand Up @@ -816,7 +818,7 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,11 @@ def prepare_dataset(batch):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)

if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process:
repo.push_to_hub(commit_message=f"Training in progress step {completed_steps}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress step {completed_steps}",
blocking=False,
auto_lfs_prune=True,
)

# if completed steps > `args.max_train_steps` stop
if completed_steps >= args.max_train_steps:
Expand Down Expand Up @@ -714,7 +718,7 @@ def prepare_dataset(batch):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,9 @@ def postprocess_text(preds, labels):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -610,7 +612,7 @@ def postprocess_text(preds, labels):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions examples/pytorch/text-classification/run_glue_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,9 @@ def preprocess_function(examples):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -462,7 +464,7 @@ def preprocess_function(examples):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)

if args.task_name == "mnli":
# Final evaluation on mismatched validation set
Expand Down
6 changes: 4 additions & 2 deletions examples/pytorch/token-classification/run_ner_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,9 @@ def compute_metrics():
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -599,7 +601,7 @@ def compute_metrics():
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
6 changes: 4 additions & 2 deletions examples/pytorch/translation/run_translation_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,9 @@ def postprocess_text(preds, labels):
unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save)
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
repo.push_to_hub(commit_message=f"Training in progress epoch {epoch}", blocking=False)
repo.push_to_hub(
commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True
)

if args.output_dir is not None:
accelerator.wait_for_everyone()
Expand All @@ -589,7 +591,7 @@ def postprocess_text(preds, labels):
if accelerator.is_main_process:
tokenizer.save_pretrained(args.output_dir)
if args.push_to_hub:
repo.push_to_hub(commit_message="End of training")
repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True)


if __name__ == "__main__":
Expand Down
12 changes: 9 additions & 3 deletions src/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2644,7 +2644,9 @@ def _push_from_checkpoint(self, checkpoint_folder):
commit_message = f"Training in progress, step {self.state.global_step}"
else:
commit_message = f"Training in progress, epoch {int(self.state.epoch)}"
_, self.push_in_progress = self.repo.push_to_hub(commit_message=commit_message, blocking=False)
_, self.push_in_progress = self.repo.push_to_hub(
commit_message=commit_message, blocking=False, auto_lfs_prune=True
)
finally:
if self.args.hub_strategy == HubStrategy.CHECKPOINT:
# Move back the checkpoint to its place
Expand Down Expand Up @@ -2680,12 +2682,16 @@ def push_to_hub(self, commit_message: Optional[str] = "End of training", blockin
if not self.is_world_process_zero():
return

git_head_commit_url = self.repo.push_to_hub(commit_message=commit_message, blocking=blocking)
git_head_commit_url = self.repo.push_to_hub(
commit_message=commit_message, blocking=blocking, auto_lfs_prune=True
)
# push separately the model card to be independant from the rest of the model
if self.args.should_save:
self.create_model_card(model_name=model_name, **kwargs)
try:
self.repo.push_to_hub(commit_message="update model card README.md", blocking=blocking)
self.repo.push_to_hub(
commit_message="update model card README.md", blocking=blocking, auto_lfs_prune=True
)
except EnvironmentError as exc:
logger.error(f"Error pushing update to the model card. Please read logs and retry.\n${exc}")

Expand Down

0 comments on commit 08a5f57

Please sign in to comment.