From d2d8339e2e454a0cbcb1ca67325a4f3222ed4011 Mon Sep 17 00:00:00 2001 From: Aaraviitkgp Date: Thu, 20 Nov 2025 22:24:23 +0530 Subject: [PATCH 1/3] issue_42197_resolve --- src/transformers/utils/hub.py | 69 ++++++++++++++ tests/utils/test.py | 173 ++++++++++++++++++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 tests/utils/test.py diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 7103ba6b5035..c5fbf0736c30 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -413,6 +413,75 @@ def cached_files( if subfolder is None: subfolder = "" + if local_files_only or is_offline_mode(): + cache_dirs_to_try= [] + + if cache_dir is not None: + cache_dirs_to_try.append(cache_dir) + + for env_var in ["HF_HOME","TRANSFORMERS_CACHE", "HF_HUB_CACHE"]: + env_cache = os.environ.get(env_var) + if env_cache and env_cache not in cache_dirs_to_try: + cache_dirs_to_try.append(env_cache) + + default_cache = default_cache_path + if default_cache not in cache_dirs_to_try: + cache_dirs_to_try.append(default_cache) + + for potential_cache_dir in cache_dirs_to_try: + if not os.path.exists(potential_cache_dir): + continue + + # Construct the cache path following HF Hub structure + repo_id_sanitized = path_or_repo_id.replace("/", "--") + model_cache_dir = os.path.join(potential_cache_dir, f"models--{repo_id_sanitized}") + + if not os.path.exists(model_cache_dir): + continue + + # Try to find the file in snapshots + refs_dir = os.path.join(model_cache_dir, "refs") + snapshots_dir = os.path.join(model_cache_dir, "snapshots") + + if os.path.exists(refs_dir) and os.path.exists(snapshots_dir): + # Try to get commit hash from refs + ref_file = os.path.join(refs_dir, revision or "main") + if os.path.exists(ref_file): + with open(ref_file, "r", encoding="utf-8") as f: + commit_hash = f.read().strip() + + # Check if file exists in this snapshot + found_files = [] + for fname in filenames: + if subfolder: + file_path = os.path.join(snapshots_dir, commit_hash, subfolder, fname) + else: + file_path = os.path.join(snapshots_dir, commit_hash, fname) + + if os.path.exists(file_path): + found_files.append(file_path) + + # If we found all files, return them + if len(found_files) == len(filenames): + logger.info(f"Found all cached files in {snapshots_dir}/{commit_hash}") + return found_files + + # If ref doesn't exist, try to find any snapshot with the file + if os.path.exists(snapshots_dir): + for commit_dir in os.listdir(snapshots_dir): + commit_path = os.path.join(snapshots_dir, commit_dir) + if not os.path.isdir(commit_path): + continue + + if subfolder: + file_path = os.path.join(commit_path, subfolder, fname) + else: + file_path = os.path.join(commit_path, fname) + + if os.path.exists(file_path): + logger.info(f"Found cached file at {file_path}") + return file_path + # Add folder to filenames full_filenames = [os.path.join(subfolder, file) for file in filenames] diff --git a/tests/utils/test.py b/tests/utils/test.py new file mode 100644 index 000000000000..7b83340a58ce --- /dev/null +++ b/tests/utils/test.py @@ -0,0 +1,173 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tests for offline mode functionality. +Regression tests for issue #41311: https://github.com/huggingface/transformers/issues/41311 +""" + +import os +import subprocess +import sys +import tempfile +import unittest + + +class TestOfflineMode(unittest.TestCase): + """ + Test that models can be loaded offline after cache is warmed in a subprocess. + These are regression tests for issue #41311. + """ + + def test_subprocess_warm_cache_then_offline_load(self): + """ + Test that warming cache in subprocess allows offline loading in parent process. + Regression test for: https://github.com/huggingface/transformers/issues/41311 + """ + model_name = "hf-internal-testing/tiny-random-bert" + + with tempfile.TemporaryDirectory() as cache_dir: + env = os.environ.copy() + env["HF_HOME"] = cache_dir + + # Step 1: Download model in subprocess + warm_script = f""" +import os +os.environ["HF_HOME"] = "{cache_dir}" + +from transformers import AutoConfig, AutoModel, AutoTokenizer + +config = AutoConfig.from_pretrained("{model_name}") +model = AutoModel.from_pretrained("{model_name}") +tokenizer = AutoTokenizer.from_pretrained("{model_name}") +print("CACHE_WARMED") +""" + + result = subprocess.run( + [sys.executable, "-c", warm_script], + capture_output=True, + text=True, + env=env, + timeout=120, + ) + + self.assertEqual(result.returncode, 0, f"Cache warming failed: {result.stderr}") + self.assertIn("CACHE_WARMED", result.stdout) + + # Step 2: Load offline with socket blocking (after imports) + offline_script = f""" +import os +os.environ["HF_HOME"] = "{cache_dir}" +os.environ["HF_HUB_OFFLINE"] = "1" + +# Import transformers first +from transformers import AutoConfig, AutoModel, AutoTokenizer + +# Then block sockets to ensure no network access +import socket +original_socket = socket.socket +def guarded_socket(*args, **kwargs): + raise RuntimeError("Network access attempted in offline mode!") +socket.socket = guarded_socket + +try: + config = AutoConfig.from_pretrained("{model_name}") + model = AutoModel.from_pretrained("{model_name}") + tokenizer = AutoTokenizer.from_pretrained("{model_name}") + print("OFFLINE_SUCCESS") +except RuntimeError as e: + if "Network access" in str(e): + print(f"NETWORK_ATTEMPTED: {{e}}") + exit(1) + raise +except Exception as e: + print(f"FAILED: {{e}}") + import traceback + traceback.print_exc() + exit(1) +""" + + result = subprocess.run( + [sys.executable, "-c", offline_script], + capture_output=True, + text=True, + env=env, + timeout=120, + ) + + if "NETWORK_ATTEMPTED" in result.stdout: + self.fail(f"Network access attempted despite warm cache: {result.stdout}") + + self.assertIn("OFFLINE_SUCCESS", result.stdout, + f"Failed to load offline:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}") + self.assertEqual(result.returncode, 0) + + def test_pipeline_offline_after_subprocess_warm(self): + """ + Test pipeline API works offline after subprocess cache warming. + """ + model_name = "hf-internal-testing/tiny-random-bert" + + with tempfile.TemporaryDirectory() as cache_dir: + env = os.environ.copy() + env["HF_HOME"] = cache_dir + + # Warm cache + warm_script = f""" +import os +os.environ["HF_HOME"] = "{cache_dir}" + +from transformers import pipeline + +pipe = pipeline("text-classification", model="{model_name}") +print("WARMED") +""" + + result = subprocess.run([sys.executable, "-c", warm_script], + capture_output=True, text=True, env=env, timeout=120) + self.assertEqual(result.returncode, 0) + + # Load offline + offline_script = f""" +import os +os.environ["HF_HOME"] = "{cache_dir}" +os.environ["HF_HUB_OFFLINE"] = "1" + +from transformers import pipeline +import socket + +# Block sockets after imports +def no_socket(*args, **kwargs): + raise RuntimeError("Network blocked!") +socket.socket = no_socket + +try: + pipe = pipeline("text-classification", model="{model_name}") + print("SUCCESS") +except RuntimeError as e: + if "Network blocked" in str(e): + print(f"BLOCKED: {{e}}") + exit(1) + raise +except Exception as e: + print(f"ERROR: {{e}}") + exit(1) +""" + + result = subprocess.run([sys.executable, "-c", offline_script], + capture_output=True, text=True, env=env, timeout=120) + + self.assertNotIn("BLOCKED", result.stdout, "Network access attempted") + self.assertIn("SUCCESS", result.stdout) + self.assertEqual(result.returncode, 0) From aa1d426f8c1d93aa821c46c42fd03f124be60aca Mon Sep 17 00:00:00 2001 From: Aaraviitkgp Date: Thu, 20 Nov 2025 22:40:00 +0530 Subject: [PATCH 2/3] issue resolved --- src/transformers/utils/hub.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index c5fbf0736c30..897efe11d114 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -583,8 +583,6 @@ def cached_files( "Check cache directory permissions. Common causes: 1) another user is downloading the same model (please wait); " "2) a previous download was canceled and the lock file needs manual removal." ) from e - elif isinstance(e, ValueError): - raise OSError(f"{e}") from e # Now we try to recover if we can find all files correctly in the cache resolved_files = [ @@ -953,6 +951,10 @@ def push_to_hub( ``` """ ignore_metadata_errors = deprecated_kwargs.pop("ignore_metadata_errors", False) + save_jinja_files = deprecated_kwargs.pop( + "save_jinja_files", None + ) # TODO: This is only used for testing and should be removed once save_jinja_files becomes the default + repo_path_or_name = deprecated_kwargs.pop("repo_path_or_name", None) if repo_path_or_name is not None: # Should use `repo_id` instead of `repo_path_or_name`. When using `repo_path_or_name`, we try to infer @@ -998,11 +1000,15 @@ def push_to_hub( files_timestamps = self._get_files_timestamps(work_dir) # Save all files. - self.save_pretrained( - work_dir, - max_shard_size=max_shard_size, - safe_serialization=safe_serialization, - ) + if save_jinja_files: + self.save_pretrained( + work_dir, + max_shard_size=max_shard_size, + safe_serialization=safe_serialization, + save_jinja_files=True, + ) + else: + self.save_pretrained(work_dir, max_shard_size=max_shard_size, safe_serialization=safe_serialization) # Update model card if needed: model_card.save(os.path.join(work_dir, "README.md")) From bc75bbc826b3db7bcc9121d88ba119a966954edc Mon Sep 17 00:00:00 2001 From: Aaraviitkgp Date: Thu, 20 Nov 2025 22:52:19 +0530 Subject: [PATCH 3/3] Fix code formatting with ruff --- src/transformers/utils/hub.py | 10 +++++----- tests/utils/test.py | 17 +++++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 897efe11d114..78c2f6bbb68c 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -414,15 +414,15 @@ def cached_files( subfolder = "" if local_files_only or is_offline_mode(): - cache_dirs_to_try= [] + cache_dirs_to_try = [] if cache_dir is not None: cache_dirs_to_try.append(cache_dir) - for env_var in ["HF_HOME","TRANSFORMERS_CACHE", "HF_HUB_CACHE"]: - env_cache = os.environ.get(env_var) - if env_cache and env_cache not in cache_dirs_to_try: - cache_dirs_to_try.append(env_cache) + for env_var in ["HF_HOME", "TRANSFORMERS_CACHE", "HF_HUB_CACHE"]: + env_cache = os.environ.get(env_var) + if env_cache and env_cache not in cache_dirs_to_try: + cache_dirs_to_try.append(env_cache) default_cache = default_cache_path if default_cache not in cache_dirs_to_try: diff --git a/tests/utils/test.py b/tests/utils/test.py index 7b83340a58ce..ecb959899b94 100644 --- a/tests/utils/test.py +++ b/tests/utils/test.py @@ -109,8 +109,11 @@ def guarded_socket(*args, **kwargs): if "NETWORK_ATTEMPTED" in result.stdout: self.fail(f"Network access attempted despite warm cache: {result.stdout}") - self.assertIn("OFFLINE_SUCCESS", result.stdout, - f"Failed to load offline:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}") + self.assertIn( + "OFFLINE_SUCCESS", + result.stdout, + f"Failed to load offline:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}", + ) self.assertEqual(result.returncode, 0) def test_pipeline_offline_after_subprocess_warm(self): @@ -134,8 +137,9 @@ def test_pipeline_offline_after_subprocess_warm(self): print("WARMED") """ - result = subprocess.run([sys.executable, "-c", warm_script], - capture_output=True, text=True, env=env, timeout=120) + result = subprocess.run( + [sys.executable, "-c", warm_script], capture_output=True, text=True, env=env, timeout=120 + ) self.assertEqual(result.returncode, 0) # Load offline @@ -165,8 +169,9 @@ def no_socket(*args, **kwargs): exit(1) """ - result = subprocess.run([sys.executable, "-c", offline_script], - capture_output=True, text=True, env=env, timeout=120) + result = subprocess.run( + [sys.executable, "-c", offline_script], capture_output=True, text=True, env=env, timeout=120 + ) self.assertNotIn("BLOCKED", result.stdout, "Network access attempted") self.assertIn("SUCCESS", result.stdout)