Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 7 additions & 29 deletions auto_round/export/export_to_gguf/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
LazyImport,
check_to_quantized,
clear_memory,
download_hf_model,
flatten_list,
get_block_names,
get_gguf_architecture,
Expand Down Expand Up @@ -73,7 +74,10 @@ def create_model_class(
low_cpu_mem_usage=False,
model_type=convert_hf_to_gguf.ModelType.TEXT,
):
tmp_work_dir = Path(os.path.join(output_dir, TMP_DIR_NAME))
tmp_work_dir = model.name_or_path
os.makedirs(output_dir, exist_ok=True)
if not os.path.isdir(tmp_work_dir):
tmp_work_dir = download_hf_model(tmp_work_dir)
with torch.inference_mode():
model_architecture = get_gguf_architecture(tmp_work_dir, model_type=model_type)
try:
Expand All @@ -95,7 +99,7 @@ def create_model_class(
output_type = FTYPE_MAP.get(output_type.lower())

model_instance = model_class(
dir_model=tmp_work_dir,
dir_model=Path(tmp_work_dir),
ftype=output_type,
fname_out=Path(output_dir),
is_big_endian=False,
Expand Down Expand Up @@ -126,19 +130,10 @@ def pack_gguf_layer(
):
"""Export the model to gguf format."""
global gguf_model_instance_global
tmp_work_dir = Path(os.path.join(output_dir, TMP_DIR_NAME))
if output_dir is not None and os.path.exists(output_dir) and not os.path.exists(tmp_work_dir):
if output_dir is not None and os.path.exists(output_dir):
logger.warning_once(f"{output_dir} already exists, this may cause model conflict")
tmp_work_dir = Path(os.path.join(output_dir, TMP_DIR_NAME))
if "gguf_model_instance_global" not in globals():
config = model.config
config.save_pretrained(tmp_work_dir)
if tokenizer is not None and hasattr(tokenizer, "save_pretrained"):
tokenizer.save_pretrained(tmp_work_dir)
if processor is not None:
processor.save_pretrained(tmp_work_dir)
if image_processor is not None:
image_processor.save_pretrained(tmp_work_dir)

gguf_model_instance_global = [
create_model_class(
Expand Down Expand Up @@ -201,27 +196,11 @@ def pack_gguf_layer(
@torch.inference_mode()
def save_quantized_as_gguf(output_dir, backend="gguf:q4_0", layer_config=None, vlm=False, **kwargs):
"""Export the model to gguf format."""
tmp_work_dir = Path(os.path.join(output_dir, TMP_DIR_NAME))
if output_dir is not None and os.path.exists(output_dir) and not os.path.exists(tmp_work_dir):
logger.warning(f"{output_dir} already exists, this may cause model conflict")

st = time.time()
global gguf_model_instance_global

model = kwargs["model"]
if "gguf_model_instance_global" not in globals():
config = model.config
config.save_pretrained(tmp_work_dir)
tokenizer = kwargs.get("tokenizer", None)
if tokenizer is not None:
tokenizer.save_pretrained(tmp_work_dir)
processor = kwargs.get("processor", None)
if processor is not None:
processor.save_pretrained(tmp_work_dir)
image_processor = kwargs.get("image_processor", None)
if image_processor is not None:
image_processor.save_pretrained(tmp_work_dir)

gguf_model_instance_global = [
create_model_class(output_dir, model, layer_config, backend, model_type=convert_hf_to_gguf.ModelType.TEXT)
]
Expand All @@ -237,6 +216,5 @@ def save_quantized_as_gguf(output_dir, backend="gguf:q4_0", layer_config=None, v
rt = time.time() - st
logger.info(f"Model successfully exported to {gguf_model.fname_out}, running time={rt}")
del gguf_model_instance_global
shutil.rmtree(tmp_work_dir, ignore_errors=True)

return model