diff --git a/examples/tokenizer-playground/src/App.jsx b/examples/tokenizer-playground/src/App.jsx
index fd6deb077..c6546d20f 100644
--- a/examples/tokenizer-playground/src/App.jsx
+++ b/examples/tokenizer-playground/src/App.jsx
@@ -70,7 +70,8 @@ function App() {
-
+
+
diff --git a/examples/tokenizer-playground/src/components/Token.jsx b/examples/tokenizer-playground/src/components/Token.jsx
index b49fa7a07..579b000f9 100644
--- a/examples/tokenizer-playground/src/components/Token.jsx
+++ b/examples/tokenizer-playground/src/components/Token.jsx
@@ -14,7 +14,7 @@ export function Token({ text, position, margin }) {
+ className={`leading-5 ${COLOURS[position % COLOURS.length]}`}>
{text}
) :
)
diff --git a/package-lock.json b/package-lock.json
index c953c7232..49fb65a51 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,7 +9,7 @@
"version": "2.15.0",
"license": "Apache-2.0",
"dependencies": {
- "@huggingface/jinja": "^0.1.0",
+ "@huggingface/jinja": "^0.1.3",
"onnxruntime-web": "1.14.0",
"sharp": "^0.32.0"
},
@@ -745,9 +745,9 @@
}
},
"node_modules/@huggingface/jinja": {
- "version": "0.1.0",
- "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.1.0.tgz",
- "integrity": "sha512-NgZ0imvGPHblw+nFJN2eC+so0DmvLSEieldI7gjZZbBUDE80ypG1O+DibdeWne1vQuGBYV/pC3XL//SgxiXC7g==",
+ "version": "0.1.3",
+ "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.1.3.tgz",
+ "integrity": "sha512-9KsiorsdIK8+7VmlamAT7Uh90zxAhC/SeKaKc80v58JhtPYuwaJpmR/ST7XAUxrHAFqHTCoTH5aJnJDwSL6xIQ==",
"engines": {
"node": ">=18"
}
diff --git a/package.json b/package.json
index ff14f5c97..6fe63e41f 100644
--- a/package.json
+++ b/package.json
@@ -40,7 +40,7 @@
"dependencies": {
"onnxruntime-web": "1.14.0",
"sharp": "^0.32.0",
- "@huggingface/jinja": "^0.1.0"
+ "@huggingface/jinja": "^0.1.3"
},
"optionalDependencies": {
"onnxruntime-node": "1.14.0"
diff --git a/src/tokenizers.js b/src/tokenizers.js
index 563e39319..9692cf3b0 100644
--- a/src/tokenizers.js
+++ b/src/tokenizers.js
@@ -3204,6 +3204,10 @@ export class EsmTokenizer extends PreTrainedTokenizer { }
export class Qwen2Tokenizer extends PreTrainedTokenizer { }
+export class GemmaTokenizer extends PreTrainedTokenizer {
+ _default_chat_template = "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}"
+}
+
/**
* Helper function to build translation inputs for an `NllbTokenizer` or `M2M100Tokenizer`.
* @param {PreTrainedTokenizer} self The tokenizer instance.
@@ -4309,6 +4313,7 @@ export class AutoTokenizer {
NougatTokenizer,
VitsTokenizer,
Qwen2Tokenizer,
+ GemmaTokenizer,
// Base case:
PreTrainedTokenizer,
diff --git a/tests/generate_tests.py b/tests/generate_tests.py
index 81836b901..c449d34be 100644
--- a/tests/generate_tests.py
+++ b/tests/generate_tests.py
@@ -36,6 +36,9 @@
# Uses a pretokenizer regex which is not compatible with JavaScript.
'Qwen/Qwen1.5-0.5B-Chat',
],
+ 'gemma': [
+ 'Xenova/gemma-tokenizer',
+ ],
}
MODELS_TO_IGNORE = [