From e481d7fffd48728c72dfea69de053b22b3eaedb2 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 06:14:48 -0700
Subject: [PATCH 1/3] feat: add Gemma 4 multimodal chat handler

---
 README.md                       |  1 +
 llama_cpp/llama_chat_format.py  | 44 +++++++++++++++++++++++++++++++++
 llama_cpp/server/model.py       | 14 +++++++++++
 tests/test_llama_chat_format.py | 31 +++++++++++++++++++++++
 4 files changed, 90 insertions(+)

diff --git a/README.md b/README.md
index 5de330af46..7db3e27448 100644
--- a/README.md
+++ b/README.md
@@ -510,6 +510,7 @@ Below are the supported multi-modal models and their respective chat handlers (P
 | [llama-3-vision-alpha](https://huggingface.co/abetlen/llama-3-vision-alpha-gguf) | `Llama3VisionAlphaChatHandler` | `llama-3-vision-alpha` |
 | [minicpm-v-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) | `MiniCPMv26ChatHandler` | `minicpm-v-2.6` |
 | [qwen2.5-vl](https://huggingface.co/unsloth/Qwen2.5-VL-3B-Instruct-GGUF) | `Qwen25VLChatHandler` | `qwen2.5-vl` |
+| [gemma-4](https://huggingface.co/unsloth/gemma-4-E4B-it-GGUF) | `Gemma4ChatHandler` | `gemma4` |
 
 Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images.
 
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index b7f6916eac..44c6c1f76f 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -3265,6 +3265,50 @@ def from_pretrained(
         )
 
 
+class Gemma4ChatHandler(Llava15ChatHandler):
+    DEFAULT_SYSTEM_MESSAGE = None
+
+    CHAT_FORMAT = (
+        "{% if messages and messages[0]['role'] == 'system' %}"
+        "{% if messages[0]['content'] is string %}"
+        "{% set first_user_prefix = messages[0]['content'] + '\n\n' %}"
+        "{% else %}"
+        "{% set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' %}"
+        "{% endif %}"
+        "{% set loop_messages = messages[1:] %}"
+        "{% else %}"
+        "{% set first_user_prefix = '' %}"
+        "{% set loop_messages = messages %}"
+        "{% endif %}"
+        "{% for message in loop_messages %}"
+        "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
+        "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
+        "{% endif %}"
+        "{% set role = 'model' if message['role'] == 'assistant' else message['role'] %}"
+        "{{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else '') }}"
+        "{% if message['content'] is string %}"
+        "{{ message['content'] | trim }}"
+        "{% elif message['content'] is iterable %}"
+        "{% for item in message['content'] %}"
+        "{% if item['type'] == 'image_url' and item['image_url'] is string %}"
+        "{{ '\n\n' + item['image_url'] + '\n\n' }}"
+        "{% elif item['type'] == 'image_url' and item['image_url'] is mapping %}"
+        "{{ '\n\n' + item['image_url']['url'] + '\n\n' }}"
+        "{% elif item['type'] == 'text' %}"
+        "{{ item['text'] | trim }}"
+        "{% endif %}"
+        "{% endfor %}"
+        "{% else %}"
+        "{{ raise_exception('Invalid content type') }}"
+        "{% endif %}"
+        "{{ '<end_of_turn>\n' }}"
+        "{% endfor %}"
+        "{% if add_generation_prompt %}"
+        "{{ '<start_of_turn>model\n' }}"
+        "{% endif %}"
+    )
+
+
 class ObsidianChatHandler(Llava15ChatHandler):
     # Prompt Format
     # The model followed ChatML format. However, with ### as the separator
diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py
index 3922ce5df3..3222abd631 100644
--- a/llama_cpp/server/model.py
+++ b/llama_cpp/server/model.py
@@ -115,6 +115,20 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
                 chat_handler = llama_cpp.llama_chat_format.Llava16ChatHandler(
                     clip_model_path=settings.clip_model_path, verbose=settings.verbose
                 )
+        elif settings.chat_format == "gemma4":
+            assert settings.clip_model_path is not None, "clip model not found"
+            if settings.hf_model_repo_id is not None:
+                chat_handler = (
+                    llama_cpp.llama_chat_format.Gemma4ChatHandler.from_pretrained(
+                        repo_id=settings.hf_model_repo_id,
+                        filename=settings.clip_model_path,
+                        verbose=settings.verbose,
+                    )
+                )
+            else:
+                chat_handler = llama_cpp.llama_chat_format.Gemma4ChatHandler(
+                    clip_model_path=settings.clip_model_path, verbose=settings.verbose
+                )
         elif settings.chat_format == "moondream":
             assert settings.clip_model_path is not None, "clip model not found"
             if settings.hf_model_repo_id is not None:
diff --git a/tests/test_llama_chat_format.py b/tests/test_llama_chat_format.py
index 18c7279cf0..0469785ef0 100644
--- a/tests/test_llama_chat_format.py
+++ b/tests/test_llama_chat_format.py
@@ -1,6 +1,7 @@
 import json
 
 import jinja2
+from jinja2.sandbox import ImmutableSandboxedEnvironment
 
 from llama_cpp import (
     ChatCompletionRequestUserMessage,
@@ -92,3 +93,33 @@ def test_hf_tokenizer_config_str_to_chat_formatter():
     )
 
     assert chat_formatter_respoonse.prompt == ("<s>[INST] Hello, world! [/INST]</s>")
+
+
+def test_gemma4_multimodal_chat_handler_formats_image_url():
+    template = ImmutableSandboxedEnvironment(
+        trim_blocks=True,
+        lstrip_blocks=True,
+    ).from_string(llama_chat_format.Gemma4ChatHandler.CHAT_FORMAT)
+    prompt = template.render(
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this image."},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/cat.png"},
+                    },
+                ],
+            }
+        ],
+        add_generation_prompt=True,
+    )
+
+    assert prompt == (
+        "<start_of_turn>user\n"
+        "Describe this image.\n\n"
+        "https://example.com/cat.png\n\n"
+        "<end_of_turn>\n"
+        "<start_of_turn>model\n"
+    )

From 7561f8e1c4f23605a9b31d2c88e8afc5bc9ff02c Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 06:16:01 -0700
Subject: [PATCH 2/3] docs: add Gemma 4 multimodal changelog entry

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c80984ff68..2ecb2aa17a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- feat: add Gemma 4 multimodal chat support by @abetlen in #2241
 - feat(ci): add CUDA 13.0 and 13.2 wheel builds by @abetlen in #2239
 - feat(ci): add CUDA 11.8 wheel builds by @abetlen in #2238
 - fix(ci): add Pascal compute capability targets to CUDA wheel builds by @abetlen in #2237

From a89b9f85db7fcaa88cc29a9ed91bf90bf6497938 Mon Sep 17 00:00:00 2001
From: abetlen <abetlen@gmail.com>
Date: Mon, 1 Jun 2026 06:49:55 -0700
Subject: [PATCH 3/3] test: remove gemma4 chat handler test

---
 tests/test_llama_chat_format.py | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/tests/test_llama_chat_format.py b/tests/test_llama_chat_format.py
index 0469785ef0..18c7279cf0 100644
--- a/tests/test_llama_chat_format.py
+++ b/tests/test_llama_chat_format.py
@@ -1,7 +1,6 @@
 import json
 
 import jinja2
-from jinja2.sandbox import ImmutableSandboxedEnvironment
 
 from llama_cpp import (
     ChatCompletionRequestUserMessage,
@@ -93,33 +92,3 @@ def test_hf_tokenizer_config_str_to_chat_formatter():
     )
 
     assert chat_formatter_respoonse.prompt == ("<s>[INST] Hello, world! [/INST]</s>")
-
-
-def test_gemma4_multimodal_chat_handler_formats_image_url():
-    template = ImmutableSandboxedEnvironment(
-        trim_blocks=True,
-        lstrip_blocks=True,
-    ).from_string(llama_chat_format.Gemma4ChatHandler.CHAT_FORMAT)
-    prompt = template.render(
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": "Describe this image."},
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": "https://example.com/cat.png"},
-                    },
-                ],
-            }
-        ],
-        add_generation_prompt=True,
-    )
-
-    assert prompt == (
-        "<start_of_turn>user\n"
-        "Describe this image.\n\n"
-        "https://example.com/cat.png\n\n"
-        "<end_of_turn>\n"
-        "<start_of_turn>model\n"
-    )