From 67621b8787ada408f355fcfc0120eb63b460a4db Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Fri, 3 May 2024 12:00:58 +0200 Subject: [PATCH 1/5] [ConWidget] recreate inferenceClient if apiToken is updated --- .../widgets/ConversationalWidget/ConversationalWidget.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte b/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte index a505a66236..74a8eaa24e 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte @@ -51,6 +51,8 @@ let inferenceClient: HfInference | undefined = undefined; let abort: AbortController | undefined = undefined; + $: inferenceClient = new HfInference(apiToken); + // Check config and compile template onMount(() => { const config = model.config; @@ -84,8 +86,6 @@ error = `Invalid chat template: "${(e as Error).message}"`; return; } - - inferenceClient = new HfInference(apiToken); }); async function handleNewMessage(): Promise { From 16a85214a8d36cfc404cd9d22b17f29375bd8497 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Fri, 3 May 2024 12:02:57 +0200 Subject: [PATCH 2/5] [+page.svelte] simulate isLoggedIn as true for default behaviour --- packages/widgets/src/routes/+page.svelte | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/widgets/src/routes/+page.svelte b/packages/widgets/src/routes/+page.svelte index e141f6e158..dcdd1e43da 100644 --- a/packages/widgets/src/routes/+page.svelte +++ b/packages/widgets/src/routes/+page.svelte @@ -28,6 +28,8 @@ apiToken = token; } } + + isLoggedIn.set(true); }); const models: ModelData[] = [ From 5794e83aafc65f39b44540cbf7eaa1b1d143cb32 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Fri, 3 May 2024 12:09:10 +0200 Subject: [PATCH 3/5] use llama as the default test model for Conv Widget --- packages/widgets/src/routes/+page.svelte | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/widgets/src/routes/+page.svelte b/packages/widgets/src/routes/+page.svelte index dcdd1e43da..b964f5d729 100644 --- a/packages/widgets/src/routes/+page.svelte +++ b/packages/widgets/src/routes/+page.svelte @@ -34,21 +34,17 @@ const models: ModelData[] = [ { - id: "mistralai/Mistral-7B-Instruct-v0.2", + id: "meta-llama/Meta-Llama-3-8B-Instruct", pipeline_tag: "text-generation", tags: ["conversational"], inference: InferenceDisplayability.Yes, config: { - architectures: ["MistralForCausalLM"], - model_type: "mistral", + architectures: ["LlamaForCausalLM"], + model_type: "llama", tokenizer_config: { - chat_template: - "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", - use_default_system_prompt: false, - bos_token: "", - eos_token: "", - unk_token: "", - pad_token: null, + chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + bos_token: "<|begin_of_text|>", + eos_token: "<|end_of_text|>", }, }, widgetData: [ From d5cb9bd0177a756d011ca25d41c44712d67f2859 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Fri, 3 May 2024 12:09:30 +0200 Subject: [PATCH 4/5] format --- packages/widgets/src/routes/+page.svelte | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/widgets/src/routes/+page.svelte b/packages/widgets/src/routes/+page.svelte index b964f5d729..80a7a43489 100644 --- a/packages/widgets/src/routes/+page.svelte +++ b/packages/widgets/src/routes/+page.svelte @@ -42,7 +42,8 @@ architectures: ["LlamaForCausalLM"], model_type: "llama", tokenizer_config: { - chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + chat_template: + "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", bos_token: "<|begin_of_text|>", eos_token: "<|end_of_text|>", }, From d8bf5044cf9a2d56847f348671a73641aa472db4 Mon Sep 17 00:00:00 2001 From: Mishig Date: Fri, 3 May 2024 17:48:15 +0200 Subject: [PATCH 5/5] [widgets] ConvWidget set `retry_on_error: false` (#650) from https://github.com/huggingface/huggingface.js/pull/649#issuecomment-2092760126 > you should pass retry_on_error: false instead of wait_for_model: false in widgets, I think that would solve it > (but yes we could probably remove retry_on_error as some point and check wait_for_model for false, true or undefined) ### on main from @gary149 > 503 but no error appears in the UI? https://huggingface.co/microsoft/Phi-3-mini-128k-instruct?text=hi ### this PR/branch https://github.com/huggingface/huggingface.js/assets/11827707/05cfd37b-fd15-482e-82ae-cdb76961707b --- .../ConversationalWidget.svelte | 1 + packages/widgets/src/routes/+page.svelte | 46 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte b/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte index 74a8eaa24e..5addaf94a6 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte @@ -165,6 +165,7 @@ signal: abort?.signal, use_cache: useCache || !$isLoggedIn, wait_for_model: withModelLoading, + retry_on_error: false, } satisfies Options; tgiSupportedModels = await getTgiSupportedModels(apiUrl); diff --git a/packages/widgets/src/routes/+page.svelte b/packages/widgets/src/routes/+page.svelte index 80a7a43489..ed3a929be5 100644 --- a/packages/widgets/src/routes/+page.svelte +++ b/packages/widgets/src/routes/+page.svelte @@ -77,6 +77,52 @@ }, ], }, + { + id: "microsoft/Phi-3-mini-128k-instruct", + pipeline_tag: "text-generation", + tags: ["conversational"], + inference: InferenceDisplayability.Yes, + config: { + architectures: ["Phi3ForCausalLM"], + model_type: "phi3", + tokenizer_config: { + bos_token: "", + chat_template: + "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", + eos_token: "<|endoftext|>", + pad_token: "<|endoftext|>", + unk_token: "", + }, + }, + widgetData: [ + { text: "This is a text-only example", example_title: "Text only" }, + { + messages: [{ content: "Please exlain QCD in very few words", role: "user" }], + example_title: "Chat messages", + }, + { + messages: [{ content: "Please exlain QCD in very few words", role: "user" }], + output: { + text: "QCD is the physics of strong force and small particles.", + }, + example_title: "Chat messages with Output", + }, + { + text: "Explain QCD in one short sentence.", + output: { + text: "QCD is the physics of strong force and small particles.", + }, + example_title: "Text only with Output", + }, + { + example_title: "Invalid example - unsupported role", + messages: [ + { role: "system", content: "This will fail because of the chat template" }, + { role: "user", content: "What's your favorite condiment?" }, + ], + }, + ], + }, { id: "google/gemma-7b", pipeline_tag: "text-generation",