From 1293777f6ea343ba5d981341aa6f49a85af917b0 Mon Sep 17 00:00:00 2001 From: testbot Date: Fri, 31 Mar 2023 17:44:14 +0200 Subject: [PATCH] use upload folder in training scripts --- examples/controlnet/train_controlnet.py | 40 +++++----------- examples/controlnet/train_controlnet_flax.py | 46 +++++++----------- examples/dreambooth/train_dreambooth.py | 40 +++++----------- examples/dreambooth/train_dreambooth_lora.py | 47 +++++++------------ .../train_instruct_pix2pix.py | 40 +++++----------- .../colossalai/train_dreambooth_colossalai.py | 40 +++++----------- .../train_dreambooth_inpaint.py | 40 +++++----------- .../train_dreambooth_inpaint_lora.py | 40 +++++----------- .../textual_inversion_bf16.py | 40 +++++----------- .../lora/train_text_to_image_lora.py | 46 +++++++----------- .../textual_inversion.py | 40 +++++----------- .../textual_inversion_flax.py | 40 +++++----------- .../train_multi_subject_dreambooth.py | 40 +++++----------- .../text_to_image/train_text_to_image.py | 40 +++++----------- .../textual_inversion/textual_inversion.py | 40 +++++----------- examples/text_to_image/train_text_to_image.py | 40 +++++----------- .../text_to_image/train_text_to_image_flax.py | 40 +++++----------- .../text_to_image/train_text_to_image_lora.py | 45 ++++++------------ .../textual_inversion/textual_inversion.py | 40 +++++----------- .../textual_inversion_flax.py | 40 +++++----------- 20 files changed, 271 insertions(+), 553 deletions(-) diff --git a/examples/controlnet/train_controlnet.py b/examples/controlnet/train_controlnet.py index 6c14e8ca10db..231b7fe8c5f2 100644 --- a/examples/controlnet/train_controlnet.py +++ b/examples/controlnet/train_controlnet.py @@ -19,7 +19,6 @@ import os import random from pathlib import Path -from typing import Optional import accelerate import numpy as np @@ -31,7 +30,7 @@ from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from PIL import Image from torchvision import transforms @@ -661,16 +660,6 @@ def collate_fn(examples): } -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -704,22 +693,14 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, revision=args.revision, use_fast=False) @@ -1053,7 +1034,12 @@ def load_model_hook(models, input_dir): controlnet.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/controlnet/train_controlnet_flax.py b/examples/controlnet/train_controlnet_flax.py index f409a539667c..f157f95eb972 100644 --- a/examples/controlnet/train_controlnet_flax.py +++ b/examples/controlnet/train_controlnet_flax.py @@ -19,7 +19,6 @@ import os import random from pathlib import Path -from typing import Optional import jax import jax.numpy as jnp @@ -33,7 +32,7 @@ from flax.core.frozen_dict import unfreeze from flax.training import train_state from flax.training.common_utils import shard -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from PIL import Image from torch.utils.data import IterableDataset from torchvision import transforms @@ -148,7 +147,7 @@ def log_validation(controlnet, controlnet_params, tokenizer, args, rng, weight_d return image_logs -def save_model_card(repo_name, image_logs=None, base_model=str, repo_folder=None): +def save_model_card(repo_id: str, image_logs=None, base_model=str, repo_folder=None): img_str = "" for i, log in enumerate(image_logs): images = log["images"] @@ -174,7 +173,7 @@ def save_model_card(repo_name, image_logs=None, base_model=str, repo_folder=None --- """ model_card = f""" -# controlnet- {repo_name} +# controlnet- {repo_id} These are controlnet weights trained on {base_model} with new type of conditioning. You can find some example images in the following. \n {img_str} @@ -612,16 +611,6 @@ def collate_fn(examples): return batch -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def get_params_to_save(params): return jax.device_get(jax.tree_util.tree_map(lambda x: x[0], params)) @@ -656,22 +645,14 @@ def main(): # Handle the repository creation if jax.process_index() == 0: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo_url = create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_url, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer and add the placeholder token as a additional special token if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -1020,12 +1001,17 @@ def cumul_grad_step(grad_idx, loss_grad_rng): if args.push_to_hub: save_model_card( - repo_name, + repo_id, image_logs=image_logs, base_model=args.pretrained_model_name_or_path, repo_folder=args.output_dir, ) - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) if __name__ == "__main__": diff --git a/examples/dreambooth/train_dreambooth.py b/examples/dreambooth/train_dreambooth.py index 3d2e694a1015..7c02d154a068 100644 --- a/examples/dreambooth/train_dreambooth.py +++ b/examples/dreambooth/train_dreambooth.py @@ -21,7 +21,6 @@ import os import warnings from pathlib import Path -from typing import Optional import accelerate import numpy as np @@ -32,7 +31,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from PIL import Image from torch.utils.data import Dataset @@ -575,16 +574,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -677,22 +666,14 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, revision=args.revision, use_fast=False) @@ -1043,7 +1024,12 @@ def load_model_hook(models, input_dir): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index daef268ff8f3..cef19e4a5425 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -20,7 +20,6 @@ import os import warnings from pathlib import Path -from typing import Optional import numpy as np import torch @@ -30,7 +29,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from PIL import Image from torch.utils.data import Dataset @@ -59,7 +58,7 @@ logger = get_logger(__name__) -def save_model_card(repo_name, images=None, base_model=str, prompt=str, repo_folder=None): +def save_model_card(repo_id: str, images=None, base_model=str, prompt=str, repo_folder=None): img_str = "" for i, image in enumerate(images): image.save(os.path.join(repo_folder, f"image_{i}.png")) @@ -80,7 +79,7 @@ def save_model_card(repo_name, images=None, base_model=str, prompt=str, repo_fol --- """ model_card = f""" -# LoRA DreamBooth - {repo_name} +# LoRA DreamBooth - {repo_id} These are LoRA adaption weights for {base_model}. The weights were trained on {prompt} using [DreamBooth](https://dreambooth.github.io/). You can find some example images in the following. \n {img_str} @@ -528,16 +527,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -625,23 +614,14 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, revision=args.revision, use_fast=False) @@ -1027,13 +1007,18 @@ def main(args): if args.push_to_hub: save_model_card( - repo_name, + repo_id, images=images, base_model=args.pretrained_model_name_or_path, prompt=args.instance_prompt, repo_folder=args.output_dir, ) - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/instruct_pix2pix/train_instruct_pix2pix.py b/examples/instruct_pix2pix/train_instruct_pix2pix.py index 6e51e86a9f16..a119e12f73d1 100644 --- a/examples/instruct_pix2pix/train_instruct_pix2pix.py +++ b/examples/instruct_pix2pix/train_instruct_pix2pix.py @@ -21,7 +21,6 @@ import math import os from pathlib import Path -from typing import Optional import accelerate import datasets @@ -37,7 +36,7 @@ from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from torchvision import transforms from tqdm.auto import tqdm @@ -363,16 +362,6 @@ def parse_args(): return args -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def convert_to_np(image, resolution): image = image.convert("RGB").resize((resolution, resolution)) return np.array(image).transpose(2, 0, 1) @@ -436,22 +425,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load scheduler, tokenizer and models. noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler") tokenizer = CLIPTokenizer.from_pretrained( @@ -968,7 +949,12 @@ def collate_fn(examples): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) if args.validation_prompt is not None: edited_images = [] diff --git a/examples/research_projects/colossalai/train_dreambooth_colossalai.py b/examples/research_projects/colossalai/train_dreambooth_colossalai.py index 6136f7233900..3d4466bf94b7 100644 --- a/examples/research_projects/colossalai/train_dreambooth_colossalai.py +++ b/examples/research_projects/colossalai/train_dreambooth_colossalai.py @@ -3,7 +3,6 @@ import math import os from pathlib import Path -from typing import Optional import colossalai import torch @@ -16,7 +15,7 @@ from colossalai.nn.parallel.utils import get_static_torch_model from colossalai.utils import get_current_device from colossalai.utils.model.colo_init_context import ColoInitContext -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from PIL import Image from torch.utils.data import Dataset from torchvision import transforms @@ -344,16 +343,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - # Gemini + ZeRO DDP def gemini_zero_dpp(model: torch.nn.Module, placememt_policy: str = "auto"): from colossalai.nn.parallel import GeminiDDP @@ -413,22 +402,14 @@ def main(args): # Handle the repository creation if local_rank == 0: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: logger.info(f"Loading tokenizer from {args.tokenizer_name}", ranks=[0]) @@ -679,7 +660,12 @@ def collate_fn(examples): logger.info(f"Saving model checkpoint to {args.output_dir}", ranks=[0]) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) if __name__ == "__main__": diff --git a/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py b/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py index 247361d21299..c9b9211415b8 100644 --- a/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py +++ b/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint.py @@ -5,7 +5,6 @@ import os import random from pathlib import Path -from typing import Optional import numpy as np import torch @@ -14,7 +13,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from PIL import Image, ImageDraw from torch.utils.data import Dataset from torchvision import transforms @@ -402,16 +401,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(): args = parse_args() logging_dir = Path(args.output_dir, args.logging_dir) @@ -485,22 +474,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -816,7 +797,12 @@ def collate_fn(examples): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint_lora.py b/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint_lora.py index e415e6965317..0522488f2882 100644 --- a/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint_lora.py +++ b/examples/research_projects/dreambooth_inpaint/train_dreambooth_inpaint_lora.py @@ -4,7 +4,6 @@ import os import random from pathlib import Path -from typing import Optional import numpy as np import torch @@ -13,7 +12,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from PIL import Image, ImageDraw from torch.utils.data import Dataset from torchvision import transforms @@ -401,16 +400,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(): args = parse_args() logging_dir = Path(args.output_dir, args.logging_dir) @@ -484,22 +473,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -835,7 +816,12 @@ def collate_fn(examples): unet.save_attn_procs(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/research_projects/intel_opts/textual_inversion/textual_inversion_bf16.py b/examples/research_projects/intel_opts/textual_inversion/textual_inversion_bf16.py index f4d77c383e91..1580cb392e8d 100644 --- a/examples/research_projects/intel_opts/textual_inversion/textual_inversion_bf16.py +++ b/examples/research_projects/intel_opts/textual_inversion/textual_inversion_bf16.py @@ -4,7 +4,6 @@ import os import random from pathlib import Path -from typing import Optional import intel_extension_for_pytorch as ipex import numpy as np @@ -15,7 +14,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder # TODO: remove and import from diffusers.utils when the new version of diffusers is released from packaging import version @@ -356,16 +355,6 @@ def __getitem__(self, i): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def freeze_params(params): for param in params: param.requires_grad = False @@ -388,22 +377,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer and add the placeholder token as a additional special token if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -640,7 +621,12 @@ def main(): save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/research_projects/lora/train_text_to_image_lora.py b/examples/research_projects/lora/train_text_to_image_lora.py index fe031df147a4..9db2024bde1e 100644 --- a/examples/research_projects/lora/train_text_to_image_lora.py +++ b/examples/research_projects/lora/train_text_to_image_lora.py @@ -22,7 +22,6 @@ import os import random from pathlib import Path -from typing import Optional import datasets import numpy as np @@ -34,7 +33,7 @@ from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from torchvision import transforms from tqdm.auto import tqdm @@ -55,7 +54,7 @@ logger = get_logger(__name__, log_level="INFO") -def save_model_card(repo_name, images=None, base_model=str, dataset_name=str, repo_folder=None): +def save_model_card(repo_id: str, images=None, base_model=str, dataset_name=str, repo_folder=None): img_str = "" for i, image in enumerate(images): image.save(os.path.join(repo_folder, f"image_{i}.png")) @@ -75,7 +74,7 @@ def save_model_card(repo_name, images=None, base_model=str, dataset_name=str, re --- """ model_card = f""" -# LoRA text2image fine-tuning - {repo_name} +# LoRA text2image fine-tuning - {repo_id} These are LoRA adaption weights for {base_model}. The weights were fine-tuned on the {dataset_name} dataset. You can find some example images in the following. \n {img_str} """ @@ -386,16 +385,6 @@ def parse_args(): return args -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - DATASET_NAME_MAPPING = { "lambdalabs/pokemon-blip-captions": ("image", "text"), } @@ -441,22 +430,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo_name = create_repo(repo_name, exist_ok=True) - repo = Repository(args.output_dir, clone_from=repo_name) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load scheduler, tokenizer and models. noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler") tokenizer = CLIPTokenizer.from_pretrained( @@ -945,13 +926,18 @@ def collate_fn(examples): if args.push_to_hub: save_model_card( - repo_name, + repo_id, images=images, base_model=args.pretrained_model_name_or_path, dataset_name=args.dataset_name, repo_folder=args.output_dir, ) - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) # Final inference # Load previous pipeline diff --git a/examples/research_projects/mulit_token_textual_inversion/textual_inversion.py b/examples/research_projects/mulit_token_textual_inversion/textual_inversion.py index 05f714715fc9..622c51d2e52e 100644 --- a/examples/research_projects/mulit_token_textual_inversion/textual_inversion.py +++ b/examples/research_projects/mulit_token_textual_inversion/textual_inversion.py @@ -19,7 +19,6 @@ import os import random from pathlib import Path -from typing import Optional import numpy as np import PIL @@ -30,7 +29,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from multi_token_clip import MultiTokenCLIPTokenizer # TODO: remove and import from diffusers.utils when the new version of diffusers is released @@ -547,16 +546,6 @@ def __getitem__(self, i): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(): args = parse_args() logging_dir = os.path.join(args.output_dir, args.logging_dir) @@ -596,22 +585,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load tokenizer if args.tokenizer_name: tokenizer = MultiTokenCLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -932,7 +913,12 @@ def main(): save_progress(tokenizer, text_encoder, accelerator, save_path) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/research_projects/mulit_token_textual_inversion/textual_inversion_flax.py b/examples/research_projects/mulit_token_textual_inversion/textual_inversion_flax.py index 9474e3281256..ecc89f98298e 100644 --- a/examples/research_projects/mulit_token_textual_inversion/textual_inversion_flax.py +++ b/examples/research_projects/mulit_token_textual_inversion/textual_inversion_flax.py @@ -4,7 +4,6 @@ import os import random from pathlib import Path -from typing import Optional import jax import jax.numpy as jnp @@ -17,7 +16,7 @@ from flax import jax_utils from flax.training import train_state from flax.training.common_utils import shard -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder # TODO: remove and import from diffusers.utils when the new version of diffusers is released from packaging import version @@ -326,16 +325,6 @@ def __getitem__(self, i): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def resize_token_embeddings(model, new_num_tokens, initializer_token_id, placeholder_token_id, rng): if model.config.vocab_size == new_num_tokens or new_num_tokens is None: return @@ -367,22 +356,14 @@ def main(): set_seed(args.seed) if jax.process_index() == 0: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", @@ -661,7 +642,12 @@ def compute_loss(params): jnp.save(os.path.join(args.output_dir, "learned_embeds.npy"), learned_embeds_dict) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) if __name__ == "__main__": diff --git a/examples/research_projects/multi_subject_dreambooth/train_multi_subject_dreambooth.py b/examples/research_projects/multi_subject_dreambooth/train_multi_subject_dreambooth.py index 2ea6217e576f..a1016b50e7b2 100644 --- a/examples/research_projects/multi_subject_dreambooth/train_multi_subject_dreambooth.py +++ b/examples/research_projects/multi_subject_dreambooth/train_multi_subject_dreambooth.py @@ -6,7 +6,6 @@ import os import warnings from pathlib import Path -from typing import Optional import datasets import torch @@ -16,7 +15,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from PIL import Image from torch.utils.data import Dataset from torchvision import transforms @@ -463,16 +462,6 @@ def __getitem__(self, index): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(args): logging_dir = Path(args.output_dir, args.logging_dir) @@ -584,22 +573,14 @@ def main(args): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load the tokenizer if args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_name, revision=args.revision, use_fast=False) @@ -886,7 +867,12 @@ def main(args): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/research_projects/onnxruntime/text_to_image/train_text_to_image.py b/examples/research_projects/onnxruntime/text_to_image/train_text_to_image.py index 637b35b3f695..aba9020f58b6 100644 --- a/examples/research_projects/onnxruntime/text_to_image/train_text_to_image.py +++ b/examples/research_projects/onnxruntime/text_to_image/train_text_to_image.py @@ -19,7 +19,6 @@ import os import random from pathlib import Path -from typing import Optional import datasets import numpy as np @@ -31,7 +30,7 @@ from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from onnxruntime.training.ortmodule import ORTModule from torchvision import transforms from tqdm.auto import tqdm @@ -313,16 +312,6 @@ def parse_args(): return args -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - dataset_name_mapping = { "lambdalabs/pokemon-blip-captions": ("image", "text"), } @@ -364,22 +353,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load scheduler, tokenizer and models. noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler") tokenizer = CLIPTokenizer.from_pretrained( @@ -732,7 +713,12 @@ def collate_fn(examples): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/research_projects/onnxruntime/textual_inversion/textual_inversion.py b/examples/research_projects/onnxruntime/textual_inversion/textual_inversion.py index 8d2c4c3c0bd4..a3d24066ad7a 100644 --- a/examples/research_projects/onnxruntime/textual_inversion/textual_inversion.py +++ b/examples/research_projects/onnxruntime/textual_inversion/textual_inversion.py @@ -19,7 +19,6 @@ import os import random from pathlib import Path -from typing import Optional import datasets import numpy as np @@ -31,7 +30,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from onnxruntime.training.ortmodule import ORTModule # TODO: remove and import from diffusers.utils when the new version of diffusers is released @@ -463,16 +462,6 @@ def __getitem__(self, i): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(): args = parse_args() logging_dir = os.path.join(args.output_dir, args.logging_dir) @@ -514,22 +503,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load tokenizer if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -851,7 +832,12 @@ def main(): save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/text_to_image/train_text_to_image.py b/examples/text_to_image/train_text_to_image.py index 6139a0e6514d..bf2d1e81912e 100644 --- a/examples/text_to_image/train_text_to_image.py +++ b/examples/text_to_image/train_text_to_image.py @@ -19,7 +19,6 @@ import os import random from pathlib import Path -from typing import Optional import accelerate import datasets @@ -32,7 +31,7 @@ from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from torchvision import transforms from tqdm.auto import tqdm @@ -315,16 +314,6 @@ def parse_args(): return args -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - dataset_name_mapping = { "lambdalabs/pokemon-blip-captions": ("image", "text"), } @@ -376,22 +365,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load scheduler, tokenizer and models. noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler") tokenizer = CLIPTokenizer.from_pretrained( @@ -786,7 +767,12 @@ def collate_fn(examples): pipeline.save_pretrained(args.output_dir) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/text_to_image/train_text_to_image_flax.py b/examples/text_to_image/train_text_to_image_flax.py index f09fa2249a97..cbd236c5ea15 100644 --- a/examples/text_to_image/train_text_to_image_flax.py +++ b/examples/text_to_image/train_text_to_image_flax.py @@ -4,7 +4,6 @@ import os import random from pathlib import Path -from typing import Optional import jax import jax.numpy as jnp @@ -17,7 +16,7 @@ from flax import jax_utils from flax.training import train_state from flax.training.common_utils import shard -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from torchvision import transforms from tqdm.auto import tqdm from transformers import CLIPImageProcessor, CLIPTokenizer, FlaxCLIPTextModel, set_seed @@ -222,16 +221,6 @@ def parse_args(): return args -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - dataset_name_mapping = { "lambdalabs/pokemon-blip-captions": ("image", "text"), } @@ -261,22 +250,14 @@ def main(): # Handle the repository creation if jax.process_index() == 0: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Get the datasets: you can either provide your own training and evaluation files (see below) # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub). @@ -581,7 +562,12 @@ def compute_loss(params): ) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) if __name__ == "__main__": diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index 3b54cc286663..c85b339d5b7a 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -20,7 +20,6 @@ import os import random from pathlib import Path -from typing import Optional import datasets import numpy as np @@ -32,7 +31,7 @@ from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder from packaging import version from torchvision import transforms from tqdm.auto import tqdm @@ -53,7 +52,7 @@ logger = get_logger(__name__, log_level="INFO") -def save_model_card(repo_name, images=None, base_model=str, dataset_name=str, repo_folder=None): +def save_model_card(repo_id: str, images=None, base_model=str, dataset_name=str, repo_folder=None): img_str = "" for i, image in enumerate(images): image.save(os.path.join(repo_folder, f"image_{i}.png")) @@ -73,7 +72,7 @@ def save_model_card(repo_name, images=None, base_model=str, dataset_name=str, re --- """ model_card = f""" -# LoRA text2image fine-tuning - {repo_name} +# LoRA text2image fine-tuning - {repo_id} These are LoRA adaption weights for {base_model}. The weights were fine-tuned on the {dataset_name} dataset. You can find some example images in the following. \n {img_str} """ @@ -347,16 +346,6 @@ def parse_args(): return args -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - DATASET_NAME_MAPPING = { "lambdalabs/pokemon-blip-captions": ("image", "text"), } @@ -402,22 +391,13 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo_name = create_repo(repo_name, exist_ok=True) - repo = Repository(args.output_dir, clone_from=repo_name) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id # Load scheduler, tokenizer and models. noise_scheduler = DDPMScheduler.from_pretrained(args.pretrained_model_name_or_path, subfolder="scheduler") tokenizer = CLIPTokenizer.from_pretrained( @@ -830,13 +810,18 @@ def collate_fn(examples): if args.push_to_hub: save_model_card( - repo_name, + repo_id, images=images, base_model=args.pretrained_model_name_or_path, dataset_name=args.dataset_name, repo_folder=args.output_dir, ) - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) # Final inference # Load previous pipeline diff --git a/examples/textual_inversion/textual_inversion.py b/examples/textual_inversion/textual_inversion.py index 92f3d27d4905..42ea9c946c47 100644 --- a/examples/textual_inversion/textual_inversion.py +++ b/examples/textual_inversion/textual_inversion.py @@ -20,7 +20,6 @@ import random import warnings from pathlib import Path -from typing import Optional import numpy as np import PIL @@ -31,7 +30,7 @@ from accelerate import Accelerator from accelerate.logging import get_logger from accelerate.utils import ProjectConfiguration, set_seed -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder # TODO: remove and import from diffusers.utils when the new version of diffusers is released from packaging import version @@ -519,16 +518,6 @@ def __getitem__(self, i): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def main(): args = parse_args() logging_dir = os.path.join(args.output_dir, args.logging_dir) @@ -567,22 +556,14 @@ def main(): # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Load tokenizer if args.tokenizer_name: tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) @@ -880,7 +861,12 @@ def main(): save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) accelerator.end_training() diff --git a/examples/textual_inversion/textual_inversion_flax.py b/examples/textual_inversion/textual_inversion_flax.py index 74cfb281621a..988b67866fe9 100644 --- a/examples/textual_inversion/textual_inversion_flax.py +++ b/examples/textual_inversion/textual_inversion_flax.py @@ -4,7 +4,6 @@ import os import random from pathlib import Path -from typing import Optional import jax import jax.numpy as jnp @@ -17,7 +16,7 @@ from flax import jax_utils from flax.training import train_state from flax.training.common_utils import shard -from huggingface_hub import HfFolder, Repository, create_repo, whoami +from huggingface_hub import create_repo, upload_folder # TODO: remove and import from diffusers.utils when the new version of diffusers is released from packaging import version @@ -339,16 +338,6 @@ def __getitem__(self, i): return example -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - def resize_token_embeddings(model, new_num_tokens, initializer_token_id, placeholder_token_id, rng): if model.config.vocab_size == new_num_tokens or new_num_tokens is None: return @@ -380,22 +369,14 @@ def main(): set_seed(args.seed) if jax.process_index() == 0: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - create_repo(repo_name, exist_ok=True, token=args.hub_token) - repo = Repository(args.output_dir, clone_from=repo_name, token=args.hub_token) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: + if args.output_dir is not None: os.makedirs(args.output_dir, exist_ok=True) + if args.push_to_hub: + repo_id = create_repo( + repo_id=args.hub_model_id or Path(args.output_dir).name, exist_ok=True, token=args.hub_token + ).repo_id + # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", @@ -688,7 +669,12 @@ def compute_loss(params): jnp.save(os.path.join(args.output_dir, "learned_embeds.npy"), learned_embeds_dict) if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + upload_folder( + repo_id=repo_id, + folder_path=args.output_dir, + commit_message="End of training", + ignore_patterns=["step_*", "epoch_*"], + ) if __name__ == "__main__":