From a78390da4c5f8b5f2f0c0e4c7781d0c0c1c2cdf3 Mon Sep 17 00:00:00 2001
From: JIElite <ita3051@gmail.com>
Date: Wed, 17 Sep 2025 14:19:00 +0800
Subject: [PATCH 1/2] Support local GGUF in VLLM and use HF tokenizer #943

---
 src/lighteval/models/vllm/vllm_model.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
index 31ec8b5a3..417c6d5d4 100644
--- a/src/lighteval/models/vllm/vllm_model.py
+++ b/src/lighteval/models/vllm/vllm_model.py
@@ -85,6 +85,8 @@ class VLLMModelConfig(ModelConfig):
     Attributes:
         model_name (str):
             HuggingFace Hub model ID or path to the model to load.
+        tokenizer (str):
+            HuggingFace Hub model ID or path to the tokenizer to load.
         revision (str):
             Git revision of the model. Defaults to "main".
         dtype (str):
@@ -150,6 +152,7 @@ class VLLMModelConfig(ModelConfig):
     """
 
     model_name: str
+    tokenizer: str | None = None
     revision: str = "main"  # revision of the model
     dtype: str = "bfloat16"
     tensor_parallel_size: PositiveInt = 1  # how many GPUs to use for tensor parallelism
@@ -289,7 +292,9 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]:
 
     def _create_auto_tokenizer(self, config: VLLMModelConfig):
         tokenizer = get_tokenizer(
-            config.model_name,
+            config.tokenizer
+            if config.tokenizer
+            else config.model_name,  # use HF tokenizer for non-HF models, like GGUF model.
             tokenizer_mode="auto",
             trust_remote_code=config.trust_remote_code,
             revision=config.revision,

From a00d8f95b7240d67f4f9bd300cc4df75ed6ec98f Mon Sep 17 00:00:00 2001
From: JIElite <ita3051@gmail.com>
Date: Wed, 17 Sep 2025 19:18:15 +0800
Subject: [PATCH 2/2] Improve the readability of implementation

---
 src/lighteval/models/vllm/vllm_model.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py
index 417c6d5d4..68632410a 100644
--- a/src/lighteval/models/vllm/vllm_model.py
+++ b/src/lighteval/models/vllm/vllm_model.py
@@ -85,7 +85,7 @@ class VLLMModelConfig(ModelConfig):
     Attributes:
         model_name (str):
             HuggingFace Hub model ID or path to the model to load.
-        tokenizer (str):
+        tokenizer (str | None):
             HuggingFace Hub model ID or path to the tokenizer to load.
         revision (str):
             Git revision of the model. Defaults to "main".
@@ -292,9 +292,7 @@ def _create_auto_model(self, config: VLLMModelConfig) -> Optional[LLM]:
 
     def _create_auto_tokenizer(self, config: VLLMModelConfig):
         tokenizer = get_tokenizer(
-            config.tokenizer
-            if config.tokenizer
-            else config.model_name,  # use HF tokenizer for non-HF models, like GGUF model.
+            config.tokenizer or config.model_name,  # use HF tokenizer for non-HF models, like GGUF model.
             tokenizer_mode="auto",
             trust_remote_code=config.trust_remote_code,
             revision=config.revision,