From e481d7fffd48728c72dfea69de053b22b3eaedb2 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 1 Jun 2026 06:14:48 -0700 Subject: [PATCH 1/3] feat: add Gemma 4 multimodal chat handler --- README.md | 1 + llama_cpp/llama_chat_format.py | 44 +++++++++++++++++++++++++++++++++ llama_cpp/server/model.py | 14 +++++++++++ tests/test_llama_chat_format.py | 31 +++++++++++++++++++++++ 4 files changed, 90 insertions(+) diff --git a/README.md b/README.md index 5de330af46..7db3e27448 100644 --- a/README.md +++ b/README.md @@ -510,6 +510,7 @@ Below are the supported multi-modal models and their respective chat handlers (P | [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` | | [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6` | | [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` | +| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` | Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images. diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index b7f6916eac..44c6c1f76f 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -3265,6 +3265,50 @@ def from_pretrained( ) +class Gemma4ChatHandler(Llava15ChatHandler): + DEFAULT_SYSTEM_MESSAGE = None + + CHAT_FORMAT = ( + "{% if messages and messages[0]['role'] == 'system' %}" + "{% if messages[0]['content'] is string %}" + "{% set first_user_prefix = messages[0]['content'] + '\n\n' %}" + "{% else %}" + "{% set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' %}" + "{% endif %}" + "{% set loop_messages = messages[1:] %}" + "{% else %}" + "{% set first_user_prefix = '' %}" + "{% set loop_messages = messages %}" + "{% endif %}" + "{% for message in loop_messages %}" + "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}" + "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}" + "{% endif %}" + "{% set role = 'model' if message['role'] == 'assistant' else message['role'] %}" + "{{ '' + role + '\n' + (first_user_prefix if loop.first else '') }}" + "{% if message['content'] is string %}" + "{{ message['content'] | trim }}" + "{% elif message['content'] is iterable %}" + "{% for item in message['content'] %}" + "{% if item['type'] == 'image_url' and item['image_url'] is string %}" + "{{ '\n\n' + item['image_url'] + '\n\n' }}" + "{% elif item['type'] == 'image_url' and item['image_url'] is mapping %}" + "{{ '\n\n' + item['image_url']['url'] + '\n\n' }}" + "{% elif item['type'] == 'text' %}" + "{{ item['text'] | trim }}" + "{% endif %}" + "{% endfor %}" + "{% else %}" + "{{ raise_exception('Invalid content type') }}" + "{% endif %}" + "{{ '\n' }}" + "{% endfor %}" + "{% if add_generation_prompt %}" + "{{ 'model\n' }}" + "{% endif %}" + ) + + class ObsidianChatHandler(Llava15ChatHandler): # Prompt Format # The model followed ChatML format. However, with ### as the separator diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py index 3922ce5df3..3222abd631 100644 --- a/llama_cpp/server/model.py +++ b/llama_cpp/server/model.py @@ -115,6 +115,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama: chat_handler = llama_cpp.llama_chat_format.Llava16ChatHandler( clip_model_path=settings.clip_model_path, verbose=settings.verbose ) + elif settings.chat_format == "gemma4": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.Gemma4ChatHandler.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.Gemma4ChatHandler( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) elif settings.chat_format == "moondream": assert settings.clip_model_path is not None, "clip model not found" if settings.hf_model_repo_id is not None: diff --git a/tests/test_llama_chat_format.py b/tests/test_llama_chat_format.py index 18c7279cf0..0469785ef0 100644 --- a/tests/test_llama_chat_format.py +++ b/tests/test_llama_chat_format.py @@ -1,6 +1,7 @@ import json import jinja2 +from jinja2.sandbox import ImmutableSandboxedEnvironment from llama_cpp import ( ChatCompletionRequestUserMessage, @@ -92,3 +93,33 @@ def test_hf_tokenizer_config_str_to_chat_formatter(): ) assert chat_formatter_respoonse.prompt == ("[INST] Hello, world! [/INST]") + + +def test_gemma4_multimodal_chat_handler_formats_image_url(): + template = ImmutableSandboxedEnvironment( + trim_blocks=True, + lstrip_blocks=True, + ).from_string(llama_chat_format.Gemma4ChatHandler.CHAT_FORMAT) + prompt = template.render( + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "Describe this image."}, + { + "type": "image_url", + "image_url": {"url": "https://example.com/cat.png"}, + }, + ], + } + ], + add_generation_prompt=True, + ) + + assert prompt == ( + "user\n" + "Describe this image.\n\n" + "https://example.com/cat.png\n\n" + "\n" + "model\n" + ) From 7561f8e1c4f23605a9b31d2c88e8afc5bc9ff02c Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 1 Jun 2026 06:16:01 -0700 Subject: [PATCH 2/3] docs: add Gemma 4 multimodal changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c80984ff68..2ecb2aa17a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat: add Gemma 4 multimodal chat support by @abetlen in #2241 - feat(ci): add CUDA 13.0 and 13.2 wheel builds by @abetlen in #2239 - feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238 - fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237 From a89b9f85db7fcaa88cc29a9ed91bf90bf6497938 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 1 Jun 2026 06:49:55 -0700 Subject: [PATCH 3/3] test: remove gemma4 chat handler test --- tests/test_llama_chat_format.py | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/tests/test_llama_chat_format.py b/tests/test_llama_chat_format.py index 0469785ef0..18c7279cf0 100644 --- a/tests/test_llama_chat_format.py +++ b/tests/test_llama_chat_format.py @@ -1,7 +1,6 @@ import json import jinja2 -from jinja2.sandbox import ImmutableSandboxedEnvironment from llama_cpp import ( ChatCompletionRequestUserMessage, @@ -93,33 +92,3 @@ def test_hf_tokenizer_config_str_to_chat_formatter(): ) assert chat_formatter_respoonse.prompt == ("[INST] Hello, world! [/INST]") - - -def test_gemma4_multimodal_chat_handler_formats_image_url(): - template = ImmutableSandboxedEnvironment( - trim_blocks=True, - lstrip_blocks=True, - ).from_string(llama_chat_format.Gemma4ChatHandler.CHAT_FORMAT) - prompt = template.render( - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "Describe this image."}, - { - "type": "image_url", - "image_url": {"url": "https://example.com/cat.png"}, - }, - ], - } - ], - add_generation_prompt=True, - ) - - assert prompt == ( - "user\n" - "Describe this image.\n\n" - "https://example.com/cat.png\n\n" - "\n" - "model\n" - )