From 3947c12418ae429838d9e8957ace06b1bc0824b2 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 16 Jan 2023 12:24:21 +0100 Subject: [PATCH 1/9] Use "hub" directory for cache instead of "diffusers" --- src/diffusers/utils/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py index 35efff392cbd..e1020d7a5faa 100644 --- a/src/diffusers/utils/constants.py +++ b/src/diffusers/utils/constants.py @@ -17,7 +17,7 @@ hf_cache_home = os.path.expanduser( os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) ) -default_cache_path = os.path.join(hf_cache_home, "diffusers") +default_cache_path = os.path.join(hf_cache_home, "hub") CONFIG_NAME = "config.json" From c3f1e9fa445f6d354a56245d6dd39b48fa5e00cb Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 16 Jan 2023 13:32:06 +0100 Subject: [PATCH 2/9] Import cache locations from huggingface_hub I verified that the constants are available in huggingface_hub version 0.10.0, which is the minimum we require. Co-authored-by: Lucain Pouget --- src/diffusers/utils/constants.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py index e1020d7a5faa..5a218e897532 100644 --- a/src/diffusers/utils/constants.py +++ b/src/diffusers/utils/constants.py @@ -13,11 +13,9 @@ # limitations under the License. import os +from huggingface_hub.constants import hf_cache_home, HUGGINGFACE_HUB_CACHE -hf_cache_home = os.path.expanduser( - os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) -) -default_cache_path = os.path.join(hf_cache_home, "hub") +default_cache_path = HUGGINGFACE_HUB_CACHE CONFIG_NAME = "config.json" From e643da4e47503ede0ad8a66cf66812201df857c8 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 16 Jan 2023 13:40:19 +0100 Subject: [PATCH 3/9] make style --- src/diffusers/utils/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py index 5a218e897532..c92d5e67b352 100644 --- a/src/diffusers/utils/constants.py +++ b/src/diffusers/utils/constants.py @@ -13,7 +13,8 @@ # limitations under the License. import os -from huggingface_hub.constants import hf_cache_home, HUGGINGFACE_HUB_CACHE +from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE, hf_cache_home + default_cache_path = HUGGINGFACE_HUB_CACHE From 0e3de0ac833007834c535b21f3923a294075830f Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Sun, 26 Feb 2023 17:58:02 +0100 Subject: [PATCH 4/9] Move cached directories to new location. --- src/diffusers/utils/hub_utils.py | 68 +++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py index 7e6bd7870de7..337f6b96a0d3 100644 --- a/src/diffusers/utils/hub_utils.py +++ b/src/diffusers/utils/hub_utils.py @@ -15,7 +15,9 @@ import os +import shutil import sys +import traceback from pathlib import Path from typing import Dict, Optional, Union from uuid import uuid4 @@ -24,7 +26,7 @@ from huggingface_hub.utils import is_jinja_available from .. import __version__ -from .constants import HUGGINGFACE_CO_RESOLVE_ENDPOINT +from .constants import DIFFUSERS_CACHE, HUGGINGFACE_CO_RESOLVE_ENDPOINT from .import_utils import ( ENV_VARS_TRUE_VALUES, _flax_version, @@ -129,3 +131,67 @@ def create_model_card(args, model_name): card_path = os.path.join(args.output_dir, "README.md") model_card.save(card_path) + + +# Old default cache path, potentially to be migrated. +# This logic was more or less taken from `transformers`, with the following differences: +# - Diffusers doesn't use custom environment variables to specify the cache path. +# - There is no need to migrate the cache format, just move the files to the new location. +hf_cache_home = os.path.expanduser( + os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) +) +old_diffusers_cache = os.path.join(hf_cache_home, "diffusers") + + +def move_cache(old_cache_dir=None, new_cache_dir=None): + if new_cache_dir is None: + new_cache_dir = DIFFUSERS_CACHE + if old_cache_dir is None: + old_cache_dir = old_diffusers_cache + + for file in os.listdir(old_cache_dir): + # Move directories only + if os.path.isdir(os.path.join(old_cache_dir, file)): + # Skip directories that already exist in the new cache + if os.path.isdir(os.path.join(new_cache_dir, file)): + logger.warning( + f"Skipping migration of directory {file} because it already exists in the new cache location." + ) + shutil.move(os.path.join(old_cache_dir, file), os.path.join(new_cache_dir, file)) + + +cache_version_file = os.path.join(DIFFUSERS_CACHE, "version_diffusers_cache.txt") +if not os.path.isfile(cache_version_file): + cache_version = 0 +else: + with open(cache_version_file) as f: + cache_version = int(f.read()) + +if cache_version < 1: + old_cache_is_not_empty = os.path.isdir(old_diffusers_cache) and len(os.listdir(old_diffusers_cache)) > 0 + if old_cache_is_not_empty: + logger.warning( + "The cache for model files in Diffusers v0.14.0 has moved to a new location. Moving your " + "existing cached models. This is a one-time operation, you can interrupt it or run it " + "later by calling `diffusers.utils.hub_utils.move_cache()`." + ) + try: + move_cache() + except Exception as e: + trace = "\n".join(traceback.format_tb(e.__traceback__)) + logger.error( + f"There was a problem when trying to move your cache:\n\n{trace}\n{e.__class__.__name__}: {e}\n\nPlease " + "file an issue at https://github.com/huggingface/diffusers/issues/new/choose, copy paste this whole " + "message and we will do our best to help." + ) + +if cache_version < 1: + try: + os.makedirs(DIFFUSERS_CACHE, exist_ok=True) + with open(cache_version_file, "w") as f: + f.write("1") + except Exception: + logger.warning( + f"There was a problem when trying to write in your cache folder ({DIFFUSERS_CACHE}). Please, ensure " + "the directory exists and can be written to." + ) \ No newline at end of file From ed72dbfd1c5e5d779757a7b32a53d7aa58302551 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Sun, 26 Feb 2023 18:00:05 +0100 Subject: [PATCH 5/9] make style --- src/diffusers/utils/hub_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py index 337f6b96a0d3..85cdcc1213fa 100644 --- a/src/diffusers/utils/hub_utils.py +++ b/src/diffusers/utils/hub_utils.py @@ -148,7 +148,7 @@ def move_cache(old_cache_dir=None, new_cache_dir=None): new_cache_dir = DIFFUSERS_CACHE if old_cache_dir is None: old_cache_dir = old_diffusers_cache - + for file in os.listdir(old_cache_dir): # Move directories only if os.path.isdir(os.path.join(old_cache_dir, file)): @@ -194,4 +194,4 @@ def move_cache(old_cache_dir=None, new_cache_dir=None): logger.warning( f"There was a problem when trying to write in your cache folder ({DIFFUSERS_CACHE}). Please, ensure " "the directory exists and can be written to." - ) \ No newline at end of file + ) From 3b742cdc7bf1efdecff310acec1d4828eedf0aca Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 27 Feb 2023 17:49:30 +0100 Subject: [PATCH 6/9] Apply suggestions by @Wauplin Co-authored-by: Lucain --- src/diffusers/utils/hub_utils.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py index 85cdcc1213fa..9720209457ed 100644 --- a/src/diffusers/utils/hub_utils.py +++ b/src/diffusers/utils/hub_utils.py @@ -143,21 +143,25 @@ def create_model_card(args, model_name): old_diffusers_cache = os.path.join(hf_cache_home, "diffusers") -def move_cache(old_cache_dir=None, new_cache_dir=None): +def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str] = None) -> None: if new_cache_dir is None: - new_cache_dir = DIFFUSERS_CACHE + new_cache_dir = DIFFUSERS_CACHE # or directly use HUGGINGFACE_HUB_CACHE to be more explicit ? if old_cache_dir is None: old_cache_dir = old_diffusers_cache - for file in os.listdir(old_cache_dir): - # Move directories only - if os.path.isdir(os.path.join(old_cache_dir, file)): - # Skip directories that already exist in the new cache - if os.path.isdir(os.path.join(new_cache_dir, file)): - logger.warning( - f"Skipping migration of directory {file} because it already exists in the new cache location." - ) - shutil.move(os.path.join(old_cache_dir, file), os.path.join(new_cache_dir, file)) + old_cache_dir = Path(old_cache_dir).expanduser() + new_cache_dir = Path(new_cache_dir).expanduser() + for old_blob_path in old_cache_dir.glob("**/blobs/*"): # move file blob by blob + if old_blob_path.isfile(): + new_blob_path = new_cache_dir / old_blob_path.relative_to(old_cache_dir) + new_blob_path.parent.mkdir(parents=True, exist_ok=True) + os.replace(old_blob_path, new_blob_path) + try: + os.symlink(new_blob_path, old_blob_path) + except OSError: + logger.warning(f"Could not create symlink between old cache and new cache. If you use an older version of diffusers again, models would have to be re-downloaded.") + + # TODO: At this point, old_cache_dir only contains broken symlinks and references => can be deleted cache_version_file = os.path.join(DIFFUSERS_CACHE, "version_diffusers_cache.txt") From b0f0239a381598a5339feb49b451e2bbc4c384f8 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 27 Feb 2023 18:16:03 +0100 Subject: [PATCH 7/9] Fix is_file --- src/diffusers/utils/hub_utils.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py index 9720209457ed..fad3abb546eb 100644 --- a/src/diffusers/utils/hub_utils.py +++ b/src/diffusers/utils/hub_utils.py @@ -145,23 +145,22 @@ def create_model_card(args, model_name): def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str] = None) -> None: if new_cache_dir is None: - new_cache_dir = DIFFUSERS_CACHE # or directly use HUGGINGFACE_HUB_CACHE to be more explicit ? + new_cache_dir = DIFFUSERS_CACHE if old_cache_dir is None: old_cache_dir = old_diffusers_cache old_cache_dir = Path(old_cache_dir).expanduser() new_cache_dir = Path(new_cache_dir).expanduser() for old_blob_path in old_cache_dir.glob("**/blobs/*"): # move file blob by blob - if old_blob_path.isfile(): + if old_blob_path.is_file(): new_blob_path = new_cache_dir / old_blob_path.relative_to(old_cache_dir) new_blob_path.parent.mkdir(parents=True, exist_ok=True) os.replace(old_blob_path, new_blob_path) try: os.symlink(new_blob_path, old_blob_path) except OSError: - logger.warning(f"Could not create symlink between old cache and new cache. If you use an older version of diffusers again, models would have to be re-downloaded.") - - # TODO: At this point, old_cache_dir only contains broken symlinks and references => can be deleted + logger.warning(f"Could not create symlink between old cache and new cache. If you use an older version of diffusers again, models would have to be re-downloaded.") + # At this point, old_cache_dir contains symlinks to the new cache (it can still be used). cache_version_file = os.path.join(DIFFUSERS_CACHE, "version_diffusers_cache.txt") From d8c61276344d710633f6b2febe2ff57442dee024 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 27 Feb 2023 18:30:51 +0100 Subject: [PATCH 8/9] Ignore symlinks. Especially important if we want to ensure that the user may want to invoke the process again later, if they are keeping multiple envs with different versions. --- src/diffusers/utils/hub_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py index fad3abb546eb..9c2e628aac77 100644 --- a/src/diffusers/utils/hub_utils.py +++ b/src/diffusers/utils/hub_utils.py @@ -152,7 +152,7 @@ def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str] old_cache_dir = Path(old_cache_dir).expanduser() new_cache_dir = Path(new_cache_dir).expanduser() for old_blob_path in old_cache_dir.glob("**/blobs/*"): # move file blob by blob - if old_blob_path.is_file(): + if old_blob_path.is_file() and not old_blob_path.is_symlink(): new_blob_path = new_cache_dir / old_blob_path.relative_to(old_cache_dir) new_blob_path.parent.mkdir(parents=True, exist_ok=True) os.replace(old_blob_path, new_blob_path) From 49dd37f0c92b9d14f925881d5df4329abe8cb13d Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 27 Feb 2023 20:10:35 +0100 Subject: [PATCH 9/9] Style --- src/diffusers/utils/hub_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/diffusers/utils/hub_utils.py b/src/diffusers/utils/hub_utils.py index 9c2e628aac77..ff715595a3b0 100644 --- a/src/diffusers/utils/hub_utils.py +++ b/src/diffusers/utils/hub_utils.py @@ -15,7 +15,6 @@ import os -import shutil import sys import traceback from pathlib import Path @@ -151,7 +150,7 @@ def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str] old_cache_dir = Path(old_cache_dir).expanduser() new_cache_dir = Path(new_cache_dir).expanduser() - for old_blob_path in old_cache_dir.glob("**/blobs/*"): # move file blob by blob + for old_blob_path in old_cache_dir.glob("**/blobs/*"): # move file blob by blob if old_blob_path.is_file() and not old_blob_path.is_symlink(): new_blob_path = new_cache_dir / old_blob_path.relative_to(old_cache_dir) new_blob_path.parent.mkdir(parents=True, exist_ok=True) @@ -159,7 +158,9 @@ def move_cache(old_cache_dir: Optional[str] = None, new_cache_dir: Optional[str] try: os.symlink(new_blob_path, old_blob_path) except OSError: - logger.warning(f"Could not create symlink between old cache and new cache. If you use an older version of diffusers again, models would have to be re-downloaded.") + logger.warning( + "Could not create symlink between old cache and new cache. If you use an older version of diffusers again, files will be re-downloaded." + ) # At this point, old_cache_dir contains symlinks to the new cache (it can still be used).