Don't download both safetensor and bin files. (vllm-project#2480)

hongxiayang · Jan 18, 2024 · d246a38 · d246a38
1 parent 3d96adf
commit d246a38
Showing 1 changed file with 16 additions and 3 deletions.
diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py
@@ -1,12 +1,13 @@
 """Utilities for downloading and initializing model weights."""
 import filelock
 import glob
+import fnmatch
 import json
 import os
 from collections import defaultdict
 from typing import Any, Iterator, List, Optional, Tuple
 
-from huggingface_hub import snapshot_download
+from huggingface_hub import snapshot_download, HfFileSystem
 import numpy as np
 from safetensors.torch import load_file, save_file, safe_open
 import torch
@@ -149,6 +150,20 @@ def prepare_hf_model_weights(
         allow_patterns += ["*.pt"]
 
     if not is_local:
+        # Before we download we look at that is available:
+        fs = HfFileSystem()
+        file_list = fs.ls(model_name_or_path, detail=False, revision=revision)
+
+        # depending on what is available we download different things
+        for pattern in allow_patterns:
+            matching = fnmatch.filter(file_list, pattern)
+            if len(matching) > 0:
+                allow_patterns = [pattern]
+                if pattern == "*.safetensors":
+                    use_safetensors = True
+                break
+
+        logger.info(f"Downloading model weights {allow_patterns}")
         # Use file lock to prevent multiple processes from
         # downloading the same model weights at the same time.
         with get_lock(model_name_or_path, cache_dir):
@@ -163,8 +178,6 @@ def prepare_hf_model_weights(
     for pattern in allow_patterns:
         hf_weights_files += glob.glob(os.path.join(hf_folder, pattern))
         if len(hf_weights_files) > 0:
-            if pattern == "*.safetensors":
-                use_safetensors = True
             break
     if not use_safetensors:
         # Exclude files that are not needed for inference.