Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update legacy Repository usage in various example files #29085

15 changes: 10 additions & 5 deletions examples/flax/image-captioning/run_image_captioning_flax.py
Expand Up @@ -42,7 +42,7 @@
from flax.jax_utils import unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from PIL import Image
from tqdm import tqdm

Expand Down Expand Up @@ -455,9 +455,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -1061,7 +1060,13 @@ def save_ckpt(ckpt_dir: str, commit_msg: str = ""):
model.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir), params=params)
tokenizer.save_pretrained(os.path.join(training_args.output_dir, ckpt_dir))
if training_args.push_to_hub:
repo.push_to_hub(commit_message=commit_msg, blocking=False)
api.upload_folder(
commit_message=commit_msg,
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)

def evaluation_loop(
rng: jax.random.PRNGKey,
Expand Down
15 changes: 10 additions & 5 deletions examples/flax/language-modeling/run_bart_dlm_flax.py
Expand Up @@ -44,7 +44,7 @@
from flax.jax_utils import pad_shard_unpad
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

from transformers import (
Expand Down Expand Up @@ -517,9 +517,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -949,7 +948,13 @@ def eval_step(params, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)

# Eval after training
if training_args.do_eval:
Expand Down
16 changes: 10 additions & 6 deletions examples/flax/language-modeling/run_clm_flax.py
Expand Up @@ -44,7 +44,7 @@
from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

import transformers
Expand Down Expand Up @@ -403,9 +403,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -847,8 +846,13 @@ def eval_step(params, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)

api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training
if training_args.do_eval:
eval_metrics = []
Expand Down
16 changes: 10 additions & 6 deletions examples/flax/language-modeling/run_mlm_flax.py
Expand Up @@ -45,7 +45,7 @@
from flax.jax_utils import pad_shard_unpad
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

from transformers import (
Expand Down Expand Up @@ -441,9 +441,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -890,8 +889,13 @@ def eval_step(params, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)

api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training
if training_args.do_eval:
num_eval_samples = len(tokenized_datasets["validation"])
Expand Down
16 changes: 10 additions & 6 deletions examples/flax/language-modeling/run_t5_mlm_flax.py
Expand Up @@ -44,7 +44,7 @@
from flax.jax_utils import pad_shard_unpad
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

from transformers import (
Expand Down Expand Up @@ -558,9 +558,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -977,8 +976,13 @@ def eval_step(params, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)

api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
# Eval after training
if training_args.do_eval:
num_eval_samples = len(tokenized_datasets["validation"])
Expand Down
15 changes: 10 additions & 5 deletions examples/flax/question-answering/run_qa.py
Expand Up @@ -42,7 +42,7 @@
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm
from utils_qa import postprocess_qa_predictions

Expand Down Expand Up @@ -493,9 +493,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# region Load Data
# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
Expand Down Expand Up @@ -1051,7 +1050,13 @@ def eval_step(state, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"
# endregion

Expand Down
Expand Up @@ -39,7 +39,7 @@
from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from torch.utils.data import DataLoader
from tqdm import tqdm

Expand Down Expand Up @@ -427,8 +427,9 @@ def main():
)
else:
repo_name = training_args.hub_model_id
create_repo(repo_name, exist_ok=True, token=training_args.hub_token)
repo = Repository(training_args.output_dir, clone_from=repo_name, token=training_args.hub_token)
# Create repo and retrieve repo_id
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# 3. Load dataset
raw_datasets = DatasetDict()
Expand Down Expand Up @@ -852,7 +853,13 @@ def generate_step(params, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False)
api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)


if __name__ == "__main__":
Expand Down
15 changes: 10 additions & 5 deletions examples/flax/summarization/run_summarization_flax.py
Expand Up @@ -44,7 +44,7 @@
from flax.jax_utils import pad_shard_unpad, unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard, shard_prng_key
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

import transformers
Expand Down Expand Up @@ -483,9 +483,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -976,7 +975,13 @@ def generate_step(params, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of epoch {epoch}", blocking=False)
api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)

# ======================== Prediction loop ==============================
if training_args.do_predict:
Expand Down
15 changes: 10 additions & 5 deletions examples/flax/text-classification/run_flax_glue.py
Expand Up @@ -37,7 +37,7 @@
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

import transformers
Expand Down Expand Up @@ -373,9 +373,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
# or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
Expand Down Expand Up @@ -677,7 +676,13 @@ def eval_step(state, batch):
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
api.upload_folder(
commit_message=f"Saving weights and logs of epoch {epoch}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"

# save the eval metrics in json
Expand Down
15 changes: 10 additions & 5 deletions examples/flax/token-classification/run_flax_ner.py
Expand Up @@ -39,7 +39,7 @@
from flax.jax_utils import pad_shard_unpad, replicate, unreplicate
from flax.training import train_state
from flax.training.common_utils import get_metrics, onehot, shard
from huggingface_hub import Repository, create_repo
from huggingface_hub import HfApi
from tqdm import tqdm

import transformers
Expand Down Expand Up @@ -429,9 +429,8 @@ def main():
if repo_name is None:
repo_name = Path(training_args.output_dir).absolute().name
# Create repo and retrieve repo_id
repo_id = create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id
# Clone repo locally
repo = Repository(training_args.output_dir, clone_from=repo_id, token=training_args.hub_token)
api = HfApi()
repo_id = api.create_repo(repo_name, exist_ok=True, token=training_args.hub_token).repo_id

# Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
# or just provide the name of one of the public datasets for token classification task available on the hub at https://huggingface.co/datasets/
Expand Down Expand Up @@ -798,7 +797,13 @@ def compute_metrics():
model.save_pretrained(training_args.output_dir, params=params)
tokenizer.save_pretrained(training_args.output_dir)
if training_args.push_to_hub:
repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
api.upload_folder(
commit_message=f"Saving weights and logs of step {cur_step}",
folder_path=training_args.output_dir,
repo_id=repo_id,
repo_type="model",
token=training_args.hub_token,
)
epochs.desc = f"Epoch ... {epoch + 1}/{num_epochs}"

# Eval after training
Expand Down