diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index 652bb3be45474..640919f7bff87 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -36,14 +36,14 @@ import nltk # Here to have a nice missing dependency error message early on import numpy as np import optax +from PIL import Image from datasets import Dataset, load_dataset from filelock import FileLock from flax import jax_utils, traverse_util from flax.jax_utils import unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository, create_repo -from PIL import Image +from huggingface_hub import HfApi from tqdm import tqdm import transformers @@ -455,9 +455,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ @@ -1052,7 +1051,7 @@ def generate_step(params, batch): if not os.path.isdir(os.path.join(training_args.output_dir)): os.makedirs(os.path.join(training_args.output_dir), exist_ok=True) - def save_ckpt(ckpt_dir: str, commit_msg: str = ""): + def save_ckpt(ckpt_dir: str, commit_msg: str): """save checkpoints and push to Hugging Face Hub if specified""" # save checkpoint after each epoch and push checkpoint to the hub @@ -1061,7 +1060,13 @@ def save_ckpt(ckpt_dir: str, commit_msg: str = ""): model.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir), params=params) tokenizer.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir)) if training_args.push_to_hub: - repo.push_to_hub(commit_message=commit_msg, blocking=False) + api.upload_folder( + commit_message=commit_msg, + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) def evaluation_loop( rng: jax.random.PRNGKey, diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index f5369299a6d4c..e5cbe5cd0fdba 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -44,7 +44,7 @@ from flax.jax_utils import pad_shard_unpad from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm from transformers import ( @@ -517,9 +517,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ @@ -949,7 +948,13 @@ def eval_step(params, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of step {cur_step}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) # Eval after training if training_args.do_eval: diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index 5ef17be35322c..48bad1a04c6d1 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -44,7 +44,7 @@ from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm import transformers @@ -403,9 +403,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ @@ -847,8 +846,13 @@ def eval_step(params, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) - + api.upload_folder( + commit_message=f"Saving weights and logs of step {cur_step}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) # Eval after training if training_args.do_eval: eval_metrics = [] diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index 86415578138ae..ccd0f2bf20d97 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -45,7 +45,7 @@ from flax.jax_utils import pad_shard_unpad from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm from transformers import ( @@ -441,9 +441,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ @@ -890,8 +889,13 @@ def eval_step(params, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) - + api.upload_folder( + commit_message=f"Saving weights and logs of step {cur_step}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) # Eval after training if training_args.do_eval: num_eval_samples = len(tokenized_datasets["validation"]) diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index fa6a5742236ca..06384deac457f 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -44,7 +44,7 @@ from flax.jax_utils import pad_shard_unpad from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm from transformers import ( @@ -558,9 +558,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ @@ -977,8 +976,13 @@ def eval_step(params, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) - + api.upload_folder( + commit_message=f"Saving weights and logs of step {cur_step}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) # Eval after training if training_args.do_eval: num_eval_samples = len(tokenized_datasets["validation"]) diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 0a9e98a52dd5b..3b9c352399573 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -42,7 +42,7 @@ from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm from utils_qa import postprocess_qa_predictions @@ -493,9 +493,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # region Load Data # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) @@ -1051,7 +1050,13 @@ def eval_step(state, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of step {cur_step}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}" # endregion diff --git a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py index 7df04fe6c4943..f6037c3321575 100644 --- a/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py +++ b/examples/flax/speech-recognition/run_flax_speech_recognition_seq2seq.py @@ -39,7 +39,7 @@ from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm import tqdm @@ -427,8 +427,9 @@ def main(): ) else: repo_name = training_args.hub_model_id - create_repo(repo_name, exist_ok=True, token=training_args.hub_token) - repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token) + # Create repo and retrieve repo_id + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # 3. Load dataset raw_datasets = DatasetDict() @@ -852,7 +853,13 @@ def generate_step(params, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of epoch {epoch}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) if __name__ == "__main__": diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index 1a72cc65225c5..391cd8cba77f6 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -44,7 +44,7 @@ from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm import transformers @@ -483,9 +483,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/ @@ -976,7 +975,13 @@ def generate_step(params, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of epoch {epoch}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) # ======================== Prediction loop ============================== if training_args.do_predict: diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 0167bfdd26f9b..d60a87251800e 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -37,7 +37,7 @@ from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm import transformers @@ -373,9 +373,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). @@ -677,7 +676,13 @@ def eval_step(state, batch): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of epoch {epoch}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}" # save the eval metrics in json diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index b73ec4810308a..f021402f09d63 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -39,7 +39,7 @@ from flax.jax_utils import pad_shard_unpad, replicate, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm import transformers @@ -429,9 +429,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below) # or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/ @@ -798,7 +797,13 @@ def compute_metrics(): model.save_pretrained(training_args.output_dir, params=params) tokenizer.save_pretrained(training_args.output_dir) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of step {cur_step}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}" # Eval after training diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 364ac7dd2d093..d8011867957cd 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -42,7 +42,7 @@ from flax.jax_utils import pad_shard_unpad, unreplicate from flax.training import train_state from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from tqdm import tqdm import transformers @@ -324,9 +324,8 @@ def main(): if repo_name is None: repo_name = Path(training_args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id - # Clone repo locally - repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id # Initialize datasets and pre-processing transforms # We use torchvision here for faster pre-processing @@ -595,7 +594,13 @@ def eval_step(params, batch): params = jax.device_get(jax.tree_util.tree_map(lambda x: x[0], state.params)) model.save_pretrained(training_args.output_dir, params=params) if training_args.push_to_hub: - repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False) + api.upload_folder( + commit_message=f"Saving weights and logs of epoch {epoch}", + folder_path=training_args.output_dir, + repo_id=repo_id, + repo_type="model", + token=training_args.hub_token, + ) if __name__ == "__main__": diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 9dd9acace50a7..124cc9215e94c 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -27,7 +27,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from torchvision.transforms import ( CenterCrop, @@ -264,15 +264,9 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -561,10 +555,12 @@ def collate_fn(examples): ) if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress {completed_steps} steps", - blocking=False, - auto_lfs_prune=True, + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if completed_steps >= args.max_train_steps: @@ -603,8 +599,12 @@ def collate_fn(examples): ) if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -625,8 +625,13 @@ def collate_fn(examples): if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) - + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) all_results = {f"eval_{k}": v for k, v in eval_metric.items()} with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: json.dump(all_results, f) diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py index 1bca532f930c8..ecc1b1196f15c 100644 --- a/examples/pytorch/image-pretraining/run_mim_no_trainer.py +++ b/examples/pytorch/image-pretraining/run_mim_no_trainer.py @@ -26,7 +26,7 @@ from accelerate import Accelerator, DistributedType from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from torchvision.transforms import Compose, Lambda, Normalize, RandomHorizontalFlip, RandomResizedCrop, ToTensor from tqdm.auto import tqdm @@ -437,15 +437,9 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -781,8 +775,12 @@ def preprocess_images(examples): ) if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -803,7 +801,13 @@ def preprocess_images(examples): if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) if __name__ == "__main__": diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index e227997bc5739..2a2b91afa3719 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -37,7 +37,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -304,15 +304,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -682,8 +675,12 @@ def group_texts(examples): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -704,8 +701,13 @@ def group_texts(examples): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) - + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: json.dump({"perplexity": perplexity}, f) diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index e002c999c883d..0eb690a98501d 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -37,7 +37,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -311,15 +311,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -720,8 +713,12 @@ def group_texts(examples): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -742,8 +739,13 @@ def group_texts(examples): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) - + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: json.dump({"perplexity": perplexity}, f) diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index b7c3cb58bb646..da7d99526b5a9 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -36,7 +36,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -328,15 +328,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 8cf216e5f4d0b..bd425a93bbc9e 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -34,7 +34,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm from utils_qa import postprocess_qa_predictions_with_beam_search @@ -333,15 +333,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -873,8 +866,12 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) # initialize all lists to collect the batches @@ -1020,7 +1017,13 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) logger.info(json.dumps(eval_metric, indent=4)) save_prefixed_metrics(eval_metric, args.output_dir) diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 07a51cb53b1ef..0c591acdda3c3 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -34,7 +34,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm from utils_qa import postprocess_qa_predictions @@ -381,15 +381,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -912,8 +905,12 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) # Evaluation @@ -1013,8 +1010,13 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) - + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) logger.info(json.dumps(eval_metric, indent=4)) save_prefixed_metrics(eval_metric, args.output_dir) diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index 44faf4fc4c5e3..358d3bd208d5b 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -29,7 +29,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo, hf_hub_download +from huggingface_hub import HfApi, hf_hub_download from PIL import Image from torch.utils.data import DataLoader from torchvision import transforms @@ -365,15 +365,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -632,10 +625,12 @@ def preprocess_val(example_batch): ) if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress {completed_steps} steps", - blocking=False, - auto_lfs_prune=True, + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if completed_steps >= args.max_train_steps: @@ -687,8 +682,12 @@ def preprocess_val(example_batch): ) if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -709,7 +708,13 @@ def preprocess_val(example_batch): if accelerator.is_main_process: image_processor.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) all_results = { f"eval_{k}": v.tolist() if isinstance(v, np.ndarray) else v for k, v in eval_metrics.items() diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index 6bde6d2b7d0f1..5b77348ad7278 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -27,7 +27,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from datasets import DatasetDict, concatenate_datasets, load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data.dataloader import DataLoader from tqdm.auto import tqdm @@ -423,9 +423,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -719,10 +718,12 @@ def prepare_dataset(batch): ) if (args.push_to_hub and epoch < args.num_train_epochs - 1) and accelerator.is_main_process: - repo.push_to_hub( - commit_message=f"Training in progress step {completed_steps}", - blocking=False, - auto_lfs_prune=True, + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) # if completed steps > `args.max_train_steps` stop @@ -772,7 +773,13 @@ def prepare_dataset(batch): ) if accelerator.is_main_process: if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) if __name__ == "__main__": diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index f2c6c456a92d7..f859f1cfed5c5 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -36,7 +36,7 @@ from accelerate.utils import set_seed from datasets import load_dataset from filelock import FileLock -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -375,9 +375,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -755,8 +754,12 @@ def postprocess_text(preds, labels): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -774,7 +777,13 @@ def postprocess_text(preds, labels): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) all_results = {f"eval_{k}": v for k, v in result.items()} with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 084e6753b27b8..c6a6cd7516e0f 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -28,7 +28,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import HfApi, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -46,6 +46,7 @@ from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version + # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.39.0.dev0") @@ -255,7 +256,7 @@ def main(): repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id api = HfApi() - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -608,7 +609,6 @@ def preprocess_function(examples): folder_path=args.output_dir, repo_id=repo_id, repo_type="model", - ignore_patterns=["step_*", "epoch_*"], token=args.hub_token, ) @@ -635,7 +635,6 @@ def preprocess_function(examples): folder_path=args.output_dir, repo_id=repo_id, repo_type="model", - ignore_patterns=["step_*", "epoch_*"], token=args.hub_token, ) diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index c6058b0fed3ff..297ffde3bc469 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -34,7 +34,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import ClassLabel, load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -310,15 +310,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -776,8 +769,12 @@ def compute_metrics(): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -798,7 +795,13 @@ def compute_metrics(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) all_results = {f"eval_{k}": v for k, v in eval_metric.items()} if args.with_tracking: diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 6e803c0a19c7c..f732fb6428924 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -34,7 +34,7 @@ from accelerate.logging import get_logger from accelerate.utils import set_seed from datasets import load_dataset -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -355,9 +355,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: @@ -743,8 +742,12 @@ def postprocess_text(preds, labels): ) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.checkpointing_steps == "epoch": @@ -765,7 +768,13 @@ def postprocess_text(preds, labels): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) with open(os.path.join(args.output_dir, "all_results.json"), "w") as f: json.dump({"eval_bleu": eval_metric["score"]}, f) diff --git a/examples/research_projects/codeparrot/scripts/codeparrot_training.py b/examples/research_projects/codeparrot/scripts/codeparrot_training.py index 16f6077f2415c..b5be3cbfc0bc2 100644 --- a/examples/research_projects/codeparrot/scripts/codeparrot_training.py +++ b/examples/research_projects/codeparrot/scripts/codeparrot_training.py @@ -10,7 +10,7 @@ from accelerate.utils import ProjectConfiguration from arguments import TrainingArguments from datasets import load_dataset -from huggingface_hub import Repository +from huggingface_hub import HfApi from torch.optim import AdamW from torch.utils.data import IterableDataset from torch.utils.data.dataloader import DataLoader @@ -204,17 +204,14 @@ def evaluate(args): samples_per_step = accelerator.state.num_processes * args.train_batch_size set_seed(args.seed) -# Clone model repository -if accelerator.is_main_process: - hf_repo = Repository(args.save_dir, clone_from=args.model_ckpt) - # Logging logger, run_name = setup_logging(args) logger.info(accelerator.state) # Checkout new branch on repo if accelerator.is_main_process: - hf_repo.git_checkout(run_name, create_branch_ok=True) + api = HfApi() + api.create_branch(branch=run_name, repo_id=args.model_ckpt, create_branch_ok=True) # Load model and tokenizer model = AutoModelForCausalLM.from_pretrained(args.save_dir) @@ -310,7 +307,7 @@ def get_lr(): save_dir = os.path.join(args.save_dir, f"step_{step}") accelerator.save_state(save_dir) if accelerator.is_main_process: - hf_repo.push_to_hub(commit_message=f"step {step}") + api.upload_folder(commit_message=f"step {step}", repo_id=args.model_ckpt, folder_path=args.save_dir) model.train() if completed_steps >= args.max_train_steps: break @@ -325,4 +322,4 @@ def get_lr(): save_dir = os.path.join(args.save_dir, f"step_{step}") accelerator.save_state(save_dir) if accelerator.is_main_process: - hf_repo.push_to_hub(commit_message="final model") + api.upload_folder(commit_message="final model", repo_id=args.model_ckpt, folder_path=args.save_dir) diff --git a/examples/research_projects/luke/run_luke_ner_no_trainer.py b/examples/research_projects/luke/run_luke_ner_no_trainer.py index cac487b059d71..982658aacdc0f 100644 --- a/examples/research_projects/luke/run_luke_ner_no_trainer.py +++ b/examples/research_projects/luke/run_luke_ner_no_trainer.py @@ -29,7 +29,7 @@ import torch from accelerate import Accelerator, DistributedDataParallelKwargs from datasets import ClassLabel, load_dataset, load_metric -from huggingface_hub import Repository, create_repo +from huggingface_hub import HfApi from luke_utils import DataCollatorForLukeTokenClassification, is_punctuation, padding_tensor from torch.utils.data import DataLoader from tqdm.auto import tqdm @@ -262,9 +262,8 @@ def main(): if repo_name is None: repo_name = Path(args.output_dir).absolute().name # Create repo and retrieve repo_id - repo_id = create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id - # Clone repo locally - repo = Repository(args.output_dir, clone_from=repo_id, token=args.hub_token) + api = HfApi() + repo_id = api.create_repo(repo_name, exist_ok=True, token=args.hub_token).repo_id elif args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) accelerator.wait_for_everyone() @@ -697,8 +696,12 @@ def compute_metrics(): unwrapped_model.save_pretrained(args.output_dir, save_function=accelerator.save) if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False, auto_lfs_prune=True + api.upload_folder( + commit_message=f"Training in progress epoch {epoch}", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, ) if args.output_dir is not None: @@ -708,7 +711,13 @@ def compute_metrics(): if accelerator.is_main_process: tokenizer.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", auto_lfs_prune=True) + api.upload_folder( + commit_message="End of training", + folder_path=args.output_dir, + repo_id=repo_id, + repo_type="model", + token=args.hub_token, + ) if __name__ == "__main__":