From abfc2950213a3c219dfbe2442de72759869c1fae Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Wed, 3 Sep 2025 14:35:32 +0200 Subject: [PATCH 1/6] Explicit check for existence of chat_template `conversational` does not always imply chat_template. Example: https://huggingface.co/facebook/blenderbot-400M-distill/blob/main/README.md See https://github.com/huggingface/huggingface.js/issues/1722 --- packages/tasks/src/model-libraries-snippets.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 1651d9a224..e2abbd16fb 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -1498,7 +1498,7 @@ export const transformers = (model: ModelData): string[] => { `${processorVarName} = ${info.processor}.from_pretrained("${model.id}"` + remote_code_snippet + ")", `model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ")" ); - if (model.tags.includes("conversational")) { + if (model.tags.includes("conversational") && model.config?.tokenizer_config?.chat_template) { if (model.tags.includes("image-text-to-text")) { autoSnippet.push( "messages = [", From 94e7d48f638ce1c25c393e939c1523b2bdf73d7a Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 8 Sep 2025 20:15:32 +0200 Subject: [PATCH 2/6] Improve chat template existence check. Notably, check for jinja field. --- packages/tasks/src/model-data.ts | 4 +++- packages/tasks/src/model-libraries-snippets.ts | 7 ++++++- packages/tasks/src/tokenizer-data.ts | 7 +++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts index 625f319fce..13732dc9ff 100644 --- a/packages/tasks/src/model-data.ts +++ b/packages/tasks/src/model-data.ts @@ -1,6 +1,6 @@ import type { PipelineType } from "./pipelines.js"; import type { WidgetExample } from "./widget-example.js"; -import type { TokenizerConfig } from "./tokenizer-data.js"; +import type { ProcessorConfig, TokenizerConfig } from "./tokenizer-data.js"; /** * Public interface for model metadata @@ -44,6 +44,8 @@ export interface ModelData { quant_method?: string; }; tokenizer_config?: TokenizerConfig; + processor_config?: ProcessorConfig; + chat_template_jinja?: string; adapter_transformers?: { model_name?: string; model_class?: string; diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index e2abbd16fb..6316ec5c53 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -1476,6 +1476,11 @@ export const terratorch = (model: ModelData): string[] => [ model = BACKBONE_REGISTRY.build("${model.id}")`, ]; +const has_chat_template = (model: ModelData): boolean => + model.config?.tokenizer_config?.chat_template !== undefined || + model.config?.processor_config?.chat_template !== undefined || + model.config?.chat_template_jinja !== undefined; + export const transformers = (model: ModelData): string[] => { const info = model.transformersInfo; if (!info) { @@ -1498,7 +1503,7 @@ export const transformers = (model: ModelData): string[] => { `${processorVarName} = ${info.processor}.from_pretrained("${model.id}"` + remote_code_snippet + ")", `model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ")" ); - if (model.tags.includes("conversational") && model.config?.tokenizer_config?.chat_template) { + if (model.tags.includes("conversational") && has_chat_template(model)) { if (model.tags.includes("image-text-to-text")) { autoSnippet.push( "messages = [", diff --git a/packages/tasks/src/tokenizer-data.ts b/packages/tasks/src/tokenizer-data.ts index 6be41e8f60..fb9f8cff71 100644 --- a/packages/tasks/src/tokenizer-data.ts +++ b/packages/tasks/src/tokenizer-data.ts @@ -30,3 +30,10 @@ export interface TokenizerConfig extends SpecialTokensMap { use_default_system_prompt?: boolean; chat_template?: string | Array<{ name: string; template: string }>; } + +/** + * Minimalistic interface for ProcessorConfig + */ +export interface ProcessorConfig { + chat_template?: string | Array<{ name: string; template: string }>; +} From 7aa1329ba4943c9dba42a39ca94f4e0bdb6f3747 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Mon, 8 Sep 2025 20:17:36 +0200 Subject: [PATCH 3/6] Rename --- packages/tasks/src/model-libraries-snippets.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 6316ec5c53..6966b29f58 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -1476,7 +1476,7 @@ export const terratorch = (model: ModelData): string[] => [ model = BACKBONE_REGISTRY.build("${model.id}")`, ]; -const has_chat_template = (model: ModelData): boolean => +const hasChatTemplate = (model: ModelData): boolean => model.config?.tokenizer_config?.chat_template !== undefined || model.config?.processor_config?.chat_template !== undefined || model.config?.chat_template_jinja !== undefined; @@ -1503,7 +1503,7 @@ export const transformers = (model: ModelData): string[] => { `${processorVarName} = ${info.processor}.from_pretrained("${model.id}"` + remote_code_snippet + ")", `model = ${info.auto_model}.from_pretrained("${model.id}"` + remote_code_snippet + ")" ); - if (model.tags.includes("conversational") && has_chat_template(model)) { + if (model.tags.includes("conversational") && hasChatTemplate(model)) { if (model.tags.includes("image-text-to-text")) { autoSnippet.push( "messages = [", From a210a39be28e977a4a12ed70d612bd4fcc1f2b70 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 9 Sep 2025 00:43:12 +0200 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: Julien Chaumond --- packages/tasks/src/model-data.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts index 13732dc9ff..c8a0345bfd 100644 --- a/packages/tasks/src/model-data.ts +++ b/packages/tasks/src/model-data.ts @@ -44,7 +44,9 @@ export interface ModelData { quant_method?: string; }; tokenizer_config?: TokenizerConfig; - processor_config?: ProcessorConfig; + processor_config?: { + chat_template?: string; + }; chat_template_jinja?: string; adapter_transformers?: { model_name?: string; From 0bb7cd91dd49bd0245ce90bff69c9a94ab303a97 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 9 Sep 2025 00:43:36 +0200 Subject: [PATCH 5/6] Update packages/tasks/src/model-data.ts --- packages/tasks/src/model-data.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts index c8a0345bfd..fd5b88f190 100644 --- a/packages/tasks/src/model-data.ts +++ b/packages/tasks/src/model-data.ts @@ -1,6 +1,6 @@ import type { PipelineType } from "./pipelines.js"; import type { WidgetExample } from "./widget-example.js"; -import type { ProcessorConfig, TokenizerConfig } from "./tokenizer-data.js"; +import type { TokenizerConfig } from "./tokenizer-data.js"; /** * Public interface for model metadata From 2750826c4e3a8ccb72556c4f792614c2d6efd2ea Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 9 Sep 2025 00:44:36 +0200 Subject: [PATCH 6/6] Update packages/tasks/src/tokenizer-data.ts --- packages/tasks/src/tokenizer-data.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/packages/tasks/src/tokenizer-data.ts b/packages/tasks/src/tokenizer-data.ts index fb9f8cff71..6be41e8f60 100644 --- a/packages/tasks/src/tokenizer-data.ts +++ b/packages/tasks/src/tokenizer-data.ts @@ -30,10 +30,3 @@ export interface TokenizerConfig extends SpecialTokensMap { use_default_system_prompt?: boolean; chat_template?: string | Array<{ name: string; template: string }>; } - -/** - * Minimalistic interface for ProcessorConfig - */ -export interface ProcessorConfig { - chat_template?: string | Array<{ name: string; template: string }>; -}