From a78390da4c5f8b5f2f0c0e4c7781d0c0c1c2cdf3 Mon Sep 17 00:00:00 2001 From: JIElite Date: Wed, 17 Sep 2025 14:19:00 +0800 Subject: [PATCH 1/2] Support local GGUF in VLLM and use HF tokenizer #943 --- src/lighteval/models/vllm/vllm_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 31ec8b5a3..417c6d5d4 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -85,6 +85,8 @@ class VLLMModelConfig(ModelConfig): Attributes: model_name (str): HuggingFace Hub model ID or path to the model to load. + tokenizer (str): + HuggingFace Hub model ID or path to the tokenizer to load. revision (str): Git revision of the model. Defaults to "main". dtype (str): @@ -150,6 +152,7 @@ class VLLMModelConfig(ModelConfig): """ model_name: str + tokenizer: str | None = None revision: str = "main" # revision of the model dtype: str = "bfloat16" tensor_parallel_size: PositiveInt = 1 # how many GPUs to use for tensor parallelism @@ -289,7 +292,9 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]: def _create_auto_tokenizer(self, config: VLLMModelConfig): tokenizer = get_tokenizer( - config.model_name, + config.tokenizer + if config.tokenizer + else config.model_name, # use HF tokenizer for non-HF models, like GGUF model. tokenizer_mode="auto", trust_remote_code=config.trust_remote_code, revision=config.revision, From a00d8f95b7240d67f4f9bd300cc4df75ed6ec98f Mon Sep 17 00:00:00 2001 From: JIElite Date: Wed, 17 Sep 2025 19:18:15 +0800 Subject: [PATCH 2/2] Improve the readability of implementation --- src/lighteval/models/vllm/vllm_model.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 417c6d5d4..68632410a 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -85,7 +85,7 @@ class VLLMModelConfig(ModelConfig): Attributes: model_name (str): HuggingFace Hub model ID or path to the model to load. - tokenizer (str): + tokenizer (str | None): HuggingFace Hub model ID or path to the tokenizer to load. revision (str): Git revision of the model. Defaults to "main". @@ -292,9 +292,7 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]: def _create_auto_tokenizer(self, config: VLLMModelConfig): tokenizer = get_tokenizer( - config.tokenizer - if config.tokenizer - else config.model_name, # use HF tokenizer for non-HF models, like GGUF model. + config.tokenizer or config.model_name, # use HF tokenizer for non-HF models, like GGUF model. tokenizer_mode="auto", trust_remote_code=config.trust_remote_code, revision=config.revision,