From f2ea9657627e7f8924ef12df31f07d4cb9dcbee5 Mon Sep 17 00:00:00 2001 From: Terence Pae Date: Sun, 31 Aug 2025 17:51:40 -0700 Subject: [PATCH 1/3] updated applyChatTemplate with lazy memoization --- Sources/Tokenizers/Tokenizer.swift | 14 ++++++++- Tests/TokenizersTests/ChatTemplateTests.swift | 29 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift index 1f6607c2..583c0928 100644 --- a/Sources/Tokenizers/Tokenizer.swift +++ b/Sources/Tokenizers/Tokenizer.swift @@ -284,6 +284,9 @@ public class PreTrainedTokenizer: Tokenizer { private let tokenizerConfig: Config private let cleanUpTokenizationSpaces: Bool + + // Cache for compiled Jinja templates keyed by their literal template string + private var compiledChatTemplateCache: [String: Template] = [:] public required init(tokenizerConfig: Config, tokenizerData: Config) throws { var addedTokens: [String: Int] = [:] @@ -332,6 +335,15 @@ public class PreTrainedTokenizer: Tokenizer { model = try TokenizerModel.from(tokenizerConfig: tokenizerConfig, tokenizerData: tokenizerData, addedTokens: addedTokens) } + private func compiledTemplate(for templateString: String) throws -> Template { + if let cached = compiledChatTemplateCache[templateString] { + return cached + } + let compiled = try Template(templateString) + compiledChatTemplateCache[templateString] = compiled + return compiled + } + func preTokenize(_ text: String, options: PreTokenizerOptions) -> [String] { guard let preTokenizer else { return [text] } return preTokenizer(text: text, options: options) @@ -530,7 +542,7 @@ public class PreTrainedTokenizer: Tokenizer { throw TokenizerError.missingChatTemplate } - let template = try Template(selectedChatTemplate) + let template = try compiledTemplate(for: selectedChatTemplate) var context: [String: Any] = [ "messages": messages, "add_generation_prompt": addGenerationPrompt, diff --git a/Tests/TokenizersTests/ChatTemplateTests.swift b/Tests/TokenizersTests/ChatTemplateTests.swift index 94403e90..95ae024c 100644 --- a/Tests/TokenizersTests/ChatTemplateTests.swift +++ b/Tests/TokenizersTests/ChatTemplateTests.swift @@ -6,6 +6,7 @@ // import Tokenizers +import Foundation import XCTest class ChatTemplateTests: XCTestCase { @@ -261,4 +262,32 @@ class ChatTemplateTests: XCTestCase { } } } + + /// Performance: cached vs uncached template application + func testApplyChatTemplatePerformanceCached() async throws { + let tokenizer = try await AutoTokenizer.from(pretrained: "microsoft/Phi-3-mini-128k-instruct") + + // Purposely reuse the same template literal to hit the memoized compiled template + let mistral7BDefaultTemplate = "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" + + // Prime cache once + _ = try tokenizer.applyChatTemplate(messages: messages, chatTemplate: mistral7BDefaultTemplate) + + measure(metrics: [XCTClockMetric()]) { + _ = try! tokenizer.applyChatTemplate(messages: messages, chatTemplate: mistral7BDefaultTemplate) + } + } + + /// Performance: simulate uncached runs by varying the template to bypass memoization + func testApplyChatTemplatePerformanceUncached() async throws { + let tokenizer = try await AutoTokenizer.from(pretrained: "microsoft/Phi-3-mini-128k-instruct") + + let baseTemplate = "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" + + measure(metrics: [XCTClockMetric()]) { + // Make the template string unique each iteration to force a fresh compilation + let uniqueTemplate = baseTemplate + "{# perf \(UUID().uuidString) #}" + _ = try! tokenizer.applyChatTemplate(messages: messages, chatTemplate: uniqueTemplate) + } + } } From 0f0a8518c6391950645eea7ebf6129ff4d4495d0 Mon Sep 17 00:00:00 2001 From: Terence Pae Date: Mon, 1 Sep 2025 06:09:24 -0700 Subject: [PATCH 2/3] formatted code --- Sources/Tokenizers/Tokenizer.swift | 4 ++-- Tests/TokenizersTests/ChatTemplateTests.swift | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift index 583c0928..b410f3e5 100644 --- a/Sources/Tokenizers/Tokenizer.swift +++ b/Sources/Tokenizers/Tokenizer.swift @@ -284,8 +284,8 @@ public class PreTrainedTokenizer: Tokenizer { private let tokenizerConfig: Config private let cleanUpTokenizationSpaces: Bool - - // Cache for compiled Jinja templates keyed by their literal template string + + /// Cache for compiled Jinja templates keyed by their literal template string private var compiledChatTemplateCache: [String: Template] = [:] public required init(tokenizerConfig: Config, tokenizerData: Config) throws { diff --git a/Tests/TokenizersTests/ChatTemplateTests.swift b/Tests/TokenizersTests/ChatTemplateTests.swift index 95ae024c..ed7840e3 100644 --- a/Tests/TokenizersTests/ChatTemplateTests.swift +++ b/Tests/TokenizersTests/ChatTemplateTests.swift @@ -5,8 +5,8 @@ // Created by Anthony DePasquale on 2/10/24. // -import Tokenizers import Foundation +import Tokenizers import XCTest class ChatTemplateTests: XCTestCase { From ba1355b0c0b9956515b1a34cfd212269a33878a7 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 1 Sep 2025 23:23:06 +0200 Subject: [PATCH 3/3] Reused shared phi tokenizer --- Tests/TokenizersTests/ChatTemplateTests.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tests/TokenizersTests/ChatTemplateTests.swift b/Tests/TokenizersTests/ChatTemplateTests.swift index d1ee0d59..4887d457 100644 --- a/Tests/TokenizersTests/ChatTemplateTests.swift +++ b/Tests/TokenizersTests/ChatTemplateTests.swift @@ -281,7 +281,7 @@ class ChatTemplateTests: XCTestCase { /// Performance: cached vs uncached template application func testApplyChatTemplatePerformanceCached() async throws { - let tokenizer = try await AutoTokenizer.from(pretrained: "microsoft/Phi-3-mini-128k-instruct") + let tokenizer = try await Self.sharedPhiTokenizer() // Purposely reuse the same template literal to hit the memoized compiled template let mistral7BDefaultTemplate = "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" @@ -296,7 +296,7 @@ class ChatTemplateTests: XCTestCase { /// Performance: simulate uncached runs by varying the template to bypass memoization func testApplyChatTemplatePerformanceUncached() async throws { - let tokenizer = try await AutoTokenizer.from(pretrained: "microsoft/Phi-3-mini-128k-instruct") + let tokenizer = try await Self.sharedPhiTokenizer() let baseTemplate = "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"